aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/lockstat.txt2
-rw-r--r--Documentation/x86/early-microcode.txt43
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/include/asm/microcode.h14
-rw-r--r--arch/x86/include/asm/microcode_intel.h85
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h18
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/head64.c13
-rw-r--r--arch/x86/kernel/head_32.S11
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/microcode_core.c7
-rw-r--r--arch/x86/kernel/microcode_core_early.c76
-rw-r--r--arch/x86/kernel/microcode_intel.c198
-rw-r--r--arch/x86/kernel/microcode_intel_early.c796
-rw-r--r--arch/x86/kernel/microcode_intel_lib.c174
-rw-r--r--arch/x86/mm/init.c10
-rw-r--r--arch/x86/xen/mmu.c44
-rw-r--r--drivers/char/random.c6
-rw-r--r--drivers/idle/i7300_idle.c8
-rw-r--r--drivers/usb/chipidea/debug.c2
-rw-r--r--fs/file.c2
-rw-r--r--include/asm-generic/cmpxchg-local.h8
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/seqlock.h193
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/futex_compat.c2
-rw-r--r--kernel/lockdep.c15
-rw-r--r--kernel/time/ntp.c26
-rw-r--r--kernel/watchdog.c10
-rw-r--r--lib/locking-selftest.c34
-rw-r--r--lib/rwsem-spinlock.c69
-rw-r--r--lib/rwsem.c75
36 files changed, 1564 insertions, 433 deletions
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index cef00d42ed5b..dd2f7b26ca30 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -65,7 +65,7 @@ that had to wait on lock acquisition.
65 65
66 - CONFIGURATION 66 - CONFIGURATION
67 67
68Lock statistics are enabled via CONFIG_LOCK_STATS. 68Lock statistics are enabled via CONFIG_LOCK_STAT.
69 69
70 - USAGE 70 - USAGE
71 71
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt
new file mode 100644
index 000000000000..4aaf0dfb0cb8
--- /dev/null
+++ b/Documentation/x86/early-microcode.txt
@@ -0,0 +1,43 @@
1Early load microcode
2====================
3By Fenghua Yu <fenghua.yu@intel.com>
4
5Kernel can update microcode in early phase of boot time. Loading microcode early
6can fix CPU issues before they are observed during kernel boot time.
7
8Microcode is stored in an initrd file. The microcode is read from the initrd
9file and loaded to CPUs during boot time.
10
11The format of the combined initrd image is microcode in cpio format followed by
12the initrd image (maybe compressed). Kernel parses the combined initrd image
13during boot time. The microcode file in cpio name space is:
14kernel/x86/microcode/GenuineIntel.bin
15
16During BSP boot (before SMP starts), if the kernel finds the microcode file in
17the initrd file, it parses the microcode and saves matching microcode in memory.
18If matching microcode is found, it will be uploaded in BSP and later on in all
19APs.
20
21The cached microcode patch is applied when CPUs resume from a sleep state.
22
23There are two legacy user space interfaces to load microcode, either through
24/dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file
25in sysfs.
26
27In addition to these two legacy methods, the early loading method described
28here is the third method with which microcode can be uploaded to a system's
29CPUs.
30
31The following example script shows how to generate a new combined initrd file in
32/boot/initrd-3.5.0.ucode.img with original microcode microcode.bin and
33original initrd image /boot/initrd-3.5.0.img.
34
35mkdir initrd
36cd initrd
37mkdir kernel
38mkdir kernel/x86
39mkdir kernel/x86/microcode
40cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin
41find .|cpio -oc >../ucode.cpio
42cd ..
43cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ff0e5f3c844e..4ebc7a6e6724 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1054,6 +1054,24 @@ config MICROCODE_OLD_INTERFACE
1054 def_bool y 1054 def_bool y
1055 depends on MICROCODE 1055 depends on MICROCODE
1056 1056
1057config MICROCODE_INTEL_LIB
1058 def_bool y
1059 depends on MICROCODE_INTEL
1060
1061config MICROCODE_INTEL_EARLY
1062 bool "Early load microcode"
1063 depends on MICROCODE_INTEL && BLK_DEV_INITRD
1064 default y
1065 help
1066 This option provides functionality to read additional microcode data
1067 at the beginning of initrd image. The data tells kernel to load
1068 microcode to CPU's as early as possible. No functional change if no
1069 microcode data is glued to the initrd, therefore it's safe to say Y.
1070
1071config MICROCODE_EARLY
1072 def_bool y
1073 depends on MICROCODE_INTEL_EARLY
1074
1057config X86_MSR 1075config X86_MSR
1058 tristate "/dev/cpu/*/msr - Model-specific register support" 1076 tristate "/dev/cpu/*/msr - Model-specific register support"
1059 ---help--- 1077 ---help---
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 43d921b4752c..6825e2efd1b4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
57static inline void __exit exit_amd_microcode(void) {} 57static inline void __exit exit_amd_microcode(void) {}
58#endif 58#endif
59 59
60#ifdef CONFIG_MICROCODE_EARLY
61#define MAX_UCODE_COUNT 128
62extern void __init load_ucode_bsp(void);
63extern __init void load_ucode_ap(void);
64extern int __init save_microcode_in_initrd(void);
65#else
66static inline void __init load_ucode_bsp(void) {}
67static inline __init void load_ucode_ap(void) {}
68static inline int __init save_microcode_in_initrd(void)
69{
70 return 0;
71}
72#endif
73
60#endif /* _ASM_X86_MICROCODE_H */ 74#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
new file mode 100644
index 000000000000..5356f927d411
--- /dev/null
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -0,0 +1,85 @@
1#ifndef _ASM_X86_MICROCODE_INTEL_H
2#define _ASM_X86_MICROCODE_INTEL_H
3
4#include <asm/microcode.h>
5
6struct microcode_header_intel {
7 unsigned int hdrver;
8 unsigned int rev;
9 unsigned int date;
10 unsigned int sig;
11 unsigned int cksum;
12 unsigned int ldrver;
13 unsigned int pf;
14 unsigned int datasize;
15 unsigned int totalsize;
16 unsigned int reserved[3];
17};
18
19struct microcode_intel {
20 struct microcode_header_intel hdr;
21 unsigned int bits[0];
22};
23
24/* microcode format is extended from prescott processors */
25struct extended_signature {
26 unsigned int sig;
27 unsigned int pf;
28 unsigned int cksum;
29};
30
31struct extended_sigtable {
32 unsigned int count;
33 unsigned int cksum;
34 unsigned int reserved[3];
35 struct extended_signature sigs[0];
36};
37
38#define DEFAULT_UCODE_DATASIZE (2000)
39#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
40#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
41#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
42#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
43#define DWSIZE (sizeof(u32))
44
45#define get_totalsize(mc) \
46 (((struct microcode_intel *)mc)->hdr.totalsize ? \
47 ((struct microcode_intel *)mc)->hdr.totalsize : \
48 DEFAULT_UCODE_TOTALSIZE)
49
50#define get_datasize(mc) \
51 (((struct microcode_intel *)mc)->hdr.datasize ? \
52 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
53
54#define sigmatch(s1, s2, p1, p2) \
55 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
56
57#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
58
59extern int
60get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
61extern int microcode_sanity_check(void *mc, int print_err);
62extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
63extern int
64update_match_revision(struct microcode_header_intel *mc_header, int rev);
65
66#ifdef CONFIG_MICROCODE_INTEL_EARLY
67extern void __init load_ucode_intel_bsp(void);
68extern void __cpuinit load_ucode_intel_ap(void);
69extern void show_ucode_info_early(void);
70#else
71static inline __init void load_ucode_intel_bsp(void) {}
72static inline __cpuinit void load_ucode_intel_ap(void) {}
73static inline void show_ucode_info_early(void) {}
74#endif
75
76#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
77extern int save_mc_for_early(u8 *mc);
78#else
79static inline int save_mc_for_early(u8 *mc)
80{
81 return 0;
82}
83#endif
84
85#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8277941cbe99..3270116b1488 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -180,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
180extern void detect_extended_topology(struct cpuinfo_x86 *c); 180extern void detect_extended_topology(struct cpuinfo_x86 *c);
181extern void detect_ht(struct cpuinfo_x86 *c); 181extern void detect_ht(struct cpuinfo_x86 *c);
182 182
183#ifdef CONFIG_X86_32
184extern int have_cpuid_p(void);
185#else
186static inline int have_cpuid_p(void)
187{
188 return 1;
189}
190#endif
183static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, 191static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
184 unsigned int *ecx, unsigned int *edx) 192 unsigned int *ecx, unsigned int *edx)
185{ 193{
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6f414ed88620..6fd3fd769796 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,8 +5,6 @@
5 5
6/* misc architecture specific prototypes */ 6/* misc architecture specific prototypes */
7 7
8void early_idt_handler(void);
9
10void system_call(void); 8void system_call(void);
11void syscall_init(void); 9void syscall_init(void);
12 10
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0fee48e279cc..50a7fc0f824a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void)
20 native_write_cr3(native_read_cr3()); 20 native_write_cr3(native_read_cr3());
21} 21}
22 22
23static inline void __native_flush_tlb_global_irq_disabled(void)
24{
25 unsigned long cr4;
26
27 cr4 = native_read_cr4();
28 /* clear PGE */
29 native_write_cr4(cr4 & ~X86_CR4_PGE);
30 /* write old PGE again and flush TLBs */
31 native_write_cr4(cr4);
32}
33
23static inline void __native_flush_tlb_global(void) 34static inline void __native_flush_tlb_global(void)
24{ 35{
25 unsigned long flags; 36 unsigned long flags;
26 unsigned long cr4;
27 37
28 /* 38 /*
29 * Read-modify-write to CR4 - protect it from preemption and 39 * Read-modify-write to CR4 - protect it from preemption and
@@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void)
32 */ 42 */
33 raw_local_irq_save(flags); 43 raw_local_irq_save(flags);
34 44
35 cr4 = native_read_cr4(); 45 __native_flush_tlb_global_irq_disabled();
36 /* clear PGE */
37 native_write_cr4(cr4 & ~X86_CR4_PGE);
38 /* write old PGE again and flush TLBs */
39 native_write_cr4(cr4);
40 46
41 raw_local_irq_restore(flags); 47 raw_local_irq_restore(flags);
42} 48}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ac3b3d002833..7bd3bd310106 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -87,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
87 87
88obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 88obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
89 89
90obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o
91obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o
92obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
90microcode-y := microcode_core.o 93microcode-y := microcode_core.o
91microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 94microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
92microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 95microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9c3ab43a6954..d814772c5bed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,8 @@
37#include <asm/mce.h> 37#include <asm/mce.h>
38#include <asm/msr.h> 38#include <asm/msr.h>
39#include <asm/pat.h> 39#include <asm/pat.h>
40#include <asm/microcode.h>
41#include <asm/microcode_intel.h>
40 42
41#ifdef CONFIG_X86_LOCAL_APIC 43#ifdef CONFIG_X86_LOCAL_APIC
42#include <asm/uv/uv.h> 44#include <asm/uv/uv.h>
@@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag)
213} 215}
214 216
215/* Probe for the CPUID instruction */ 217/* Probe for the CPUID instruction */
216static int __cpuinit have_cpuid_p(void) 218int __cpuinit have_cpuid_p(void)
217{ 219{
218 return flag_is_changeable_p(X86_EFLAGS_ID); 220 return flag_is_changeable_p(X86_EFLAGS_ID);
219} 221}
@@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag)
249{ 251{
250 return 1; 252 return 1;
251} 253}
252/* Probe for the CPUID instruction */
253static inline int have_cpuid_p(void)
254{
255 return 1;
256}
257static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 254static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
258{ 255{
259} 256}
@@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void)
1223 int cpu; 1220 int cpu;
1224 int i; 1221 int i;
1225 1222
1223 /*
1224 * Load microcode on this cpu if a valid microcode is available.
1225 * This is early microcode loading procedure.
1226 */
1227 load_ucode_ap();
1228
1226 cpu = stack_smp_processor_id(); 1229 cpu = stack_smp_processor_id();
1227 t = &per_cpu(init_tss, cpu); 1230 t = &per_cpu(init_tss, cpu);
1228 oist = &per_cpu(orig_ist, cpu); 1231 oist = &per_cpu(orig_ist, cpu);
@@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void)
1314 struct tss_struct *t = &per_cpu(init_tss, cpu); 1317 struct tss_struct *t = &per_cpu(init_tss, cpu);
1315 struct thread_struct *thread = &curr->thread; 1318 struct thread_struct *thread = &curr->thread;
1316 1319
1320 show_ucode_info_early();
1321
1317 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1322 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1318 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1323 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1319 for (;;) 1324 for (;;)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 57334f4cd3af..c5e403f6d869 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,6 +26,7 @@
26#include <asm/e820.h> 26#include <asm/e820.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/bootparam_utils.h> 28#include <asm/bootparam_utils.h>
29#include <asm/microcode.h>
29 30
30/* 31/*
31 * Manage page tables very early on. 32 * Manage page tables very early on.
@@ -159,17 +160,17 @@ void __init x86_64_start_kernel(char * real_mode_data)
159 /* clear bss before set_intr_gate with early_idt_handler */ 160 /* clear bss before set_intr_gate with early_idt_handler */
160 clear_bss(); 161 clear_bss();
161 162
162 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { 163 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
163#ifdef CONFIG_EARLY_PRINTK
164 set_intr_gate(i, &early_idt_handlers[i]); 164 set_intr_gate(i, &early_idt_handlers[i]);
165#else
166 set_intr_gate(i, early_idt_handler);
167#endif
168 }
169 load_idt((const struct desc_ptr *)&idt_descr); 165 load_idt((const struct desc_ptr *)&idt_descr);
170 166
171 copy_bootdata(__va(real_mode_data)); 167 copy_bootdata(__va(real_mode_data));
172 168
169 /*
170 * Load microcode early on BSP.
171 */
172 load_ucode_bsp();
173
173 if (console_loglevel == 10) 174 if (console_loglevel == 10)
174 early_printk("Kernel alive\n"); 175 early_printk("Kernel alive\n");
175 176
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 3c3f58a0808f..73afd11799ca 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -144,6 +144,11 @@ ENTRY(startup_32)
144 movl %eax, pa(olpc_ofw_pgd) 144 movl %eax, pa(olpc_ofw_pgd)
145#endif 145#endif
146 146
147#ifdef CONFIG_MICROCODE_EARLY
148 /* Early load ucode on BSP. */
149 call load_ucode_bsp
150#endif
151
147/* 152/*
148 * Initialize page tables. This creates a PDE and a set of page 153 * Initialize page tables. This creates a PDE and a set of page
149 * tables, which are located immediately beyond __brk_base. The variable 154 * tables, which are located immediately beyond __brk_base. The variable
@@ -299,6 +304,12 @@ ENTRY(startup_32_smp)
299 movl %eax,%ss 304 movl %eax,%ss
300 leal -__PAGE_OFFSET(%ecx),%esp 305 leal -__PAGE_OFFSET(%ecx),%esp
301 306
307#ifdef CONFIG_MICROCODE_EARLY
308 /* Early load ucode on AP. */
309 call load_ucode_ap
310#endif
311
312
302default_entry: 313default_entry:
303#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ 314#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
304 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ 315 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d94f6d68be2a..b7de3b25adb5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -336,6 +336,7 @@ early_idt_handlers:
336 i = i + 1 336 i = i + 1
337 .endr 337 .endr
338 338
339/* This is global to keep gas from relaxing the jumps */
339ENTRY(early_idt_handler) 340ENTRY(early_idt_handler)
340 cld 341 cld
341 342
@@ -404,6 +405,7 @@ ENTRY(early_idt_handler)
404 addq $16,%rsp # drop vector number and error code 405 addq $16,%rsp # drop vector number and error code
405 decl early_recursion_flag(%rip) 406 decl early_recursion_flag(%rip)
406 INTERRUPT_RETURN 407 INTERRUPT_RETURN
408ENDPROC(early_idt_handler)
407 409
408 __INITDATA 410 __INITDATA
409 411
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 3a04b224d0c0..22db92bbdf1a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = {
364 364
365static void microcode_fini_cpu(int cpu) 365static void microcode_fini_cpu(int cpu)
366{ 366{
367 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
368
369 microcode_ops->microcode_fini_cpu(cpu); 367 microcode_ops->microcode_fini_cpu(cpu);
370 uci->valid = 0;
371} 368}
372 369
373static enum ucode_state microcode_resume_cpu(int cpu) 370static enum ucode_state microcode_resume_cpu(int cpu)
@@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu)
383static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) 380static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
384{ 381{
385 enum ucode_state ustate; 382 enum ucode_state ustate;
383 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
384
385 if (uci && uci->valid)
386 return UCODE_OK;
386 387
387 if (collect_cpu_info(cpu)) 388 if (collect_cpu_info(cpu))
388 return UCODE_ERROR; 389 return UCODE_ERROR;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
new file mode 100644
index 000000000000..577db8417d15
--- /dev/null
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -0,0 +1,76 @@
1/*
2 * X86 CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <asm/microcode_intel.h>
21#include <asm/processor.h>
22
23#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
24#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
25#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
26#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
27#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
28#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
29#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
30
31#define CPUID_IS(a, b, c, ebx, ecx, edx) \
32 (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
33
34/*
35 * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
36 * x86_vendor() gets vendor id for BSP.
37 *
38 * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
39 * coding, we still use x86_vendor() to get vendor id for AP.
40 *
41 * x86_vendor() gets vendor information directly through cpuid.
42 */
43static int __cpuinit x86_vendor(void)
44{
45 u32 eax = 0x00000000;
46 u32 ebx, ecx = 0, edx;
47
48 if (!have_cpuid_p())
49 return X86_VENDOR_UNKNOWN;
50
51 native_cpuid(&eax, &ebx, &ecx, &edx);
52
53 if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
54 return X86_VENDOR_INTEL;
55
56 if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
57 return X86_VENDOR_AMD;
58
59 return X86_VENDOR_UNKNOWN;
60}
61
62void __init load_ucode_bsp(void)
63{
64 int vendor = x86_vendor();
65
66 if (vendor == X86_VENDOR_INTEL)
67 load_ucode_intel_bsp();
68}
69
70void __cpuinit load_ucode_ap(void)
71{
72 int vendor = x86_vendor();
73
74 if (vendor == X86_VENDOR_INTEL)
75 load_ucode_intel_ap();
76}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3544aed39338..5fb2cebf556b 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -79,7 +79,7 @@
79#include <linux/module.h> 79#include <linux/module.h>
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81 81
82#include <asm/microcode.h> 82#include <asm/microcode_intel.h>
83#include <asm/processor.h> 83#include <asm/processor.h>
84#include <asm/msr.h> 84#include <asm/msr.h>
85 85
@@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver");
87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
88MODULE_LICENSE("GPL"); 88MODULE_LICENSE("GPL");
89 89
90struct microcode_header_intel {
91 unsigned int hdrver;
92 unsigned int rev;
93 unsigned int date;
94 unsigned int sig;
95 unsigned int cksum;
96 unsigned int ldrver;
97 unsigned int pf;
98 unsigned int datasize;
99 unsigned int totalsize;
100 unsigned int reserved[3];
101};
102
103struct microcode_intel {
104 struct microcode_header_intel hdr;
105 unsigned int bits[0];
106};
107
108/* microcode format is extended from prescott processors */
109struct extended_signature {
110 unsigned int sig;
111 unsigned int pf;
112 unsigned int cksum;
113};
114
115struct extended_sigtable {
116 unsigned int count;
117 unsigned int cksum;
118 unsigned int reserved[3];
119 struct extended_signature sigs[0];
120};
121
122#define DEFAULT_UCODE_DATASIZE (2000)
123#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
124#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
125#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
126#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
127#define DWSIZE (sizeof(u32))
128
129#define get_totalsize(mc) \
130 (((struct microcode_intel *)mc)->hdr.totalsize ? \
131 ((struct microcode_intel *)mc)->hdr.totalsize : \
132 DEFAULT_UCODE_TOTALSIZE)
133
134#define get_datasize(mc) \
135 (((struct microcode_intel *)mc)->hdr.datasize ? \
136 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
137
138#define sigmatch(s1, s2, p1, p2) \
139 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
140
141#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
142
143static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 90static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
144{ 91{
145 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 92 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
@@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
162 return 0; 109 return 0;
163} 110}
164 111
165static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
166{
167 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
168}
169
170static inline int
171update_match_revision(struct microcode_header_intel *mc_header, int rev)
172{
173 return (mc_header->rev <= rev) ? 0 : 1;
174}
175
176static int microcode_sanity_check(void *mc)
177{
178 unsigned long total_size, data_size, ext_table_size;
179 struct microcode_header_intel *mc_header = mc;
180 struct extended_sigtable *ext_header = NULL;
181 int sum, orig_sum, ext_sigcount = 0, i;
182 struct extended_signature *ext_sig;
183
184 total_size = get_totalsize(mc_header);
185 data_size = get_datasize(mc_header);
186
187 if (data_size + MC_HEADER_SIZE > total_size) {
188 pr_err("error! Bad data size in microcode data file\n");
189 return -EINVAL;
190 }
191
192 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
193 pr_err("error! Unknown microcode update format\n");
194 return -EINVAL;
195 }
196 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
197 if (ext_table_size) {
198 if ((ext_table_size < EXT_HEADER_SIZE)
199 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
200 pr_err("error! Small exttable size in microcode data file\n");
201 return -EINVAL;
202 }
203 ext_header = mc + MC_HEADER_SIZE + data_size;
204 if (ext_table_size != exttable_size(ext_header)) {
205 pr_err("error! Bad exttable size in microcode data file\n");
206 return -EFAULT;
207 }
208 ext_sigcount = ext_header->count;
209 }
210
211 /* check extended table checksum */
212 if (ext_table_size) {
213 int ext_table_sum = 0;
214 int *ext_tablep = (int *)ext_header;
215
216 i = ext_table_size / DWSIZE;
217 while (i--)
218 ext_table_sum += ext_tablep[i];
219 if (ext_table_sum) {
220 pr_warning("aborting, bad extended signature table checksum\n");
221 return -EINVAL;
222 }
223 }
224
225 /* calculate the checksum */
226 orig_sum = 0;
227 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
228 while (i--)
229 orig_sum += ((int *)mc)[i];
230 if (orig_sum) {
231 pr_err("aborting, bad checksum\n");
232 return -EINVAL;
233 }
234 if (!ext_table_size)
235 return 0;
236 /* check extended signature checksum */
237 for (i = 0; i < ext_sigcount; i++) {
238 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
239 EXT_SIGNATURE_SIZE * i;
240 sum = orig_sum
241 - (mc_header->sig + mc_header->pf + mc_header->cksum)
242 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
243 if (sum) {
244 pr_err("aborting, bad checksum\n");
245 return -EINVAL;
246 }
247 }
248 return 0;
249}
250
251/* 112/*
252 * return 0 - no update found 113 * return 0 - no update found
253 * return 1 - found update 114 * return 1 - found update
254 */ 115 */
255static int 116static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
256get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
257{ 117{
258 struct microcode_header_intel *mc_header = mc; 118 struct cpu_signature cpu_sig;
259 struct extended_sigtable *ext_header; 119 unsigned int csig, cpf, crev;
260 unsigned long total_size = get_totalsize(mc_header);
261 int ext_sigcount, i;
262 struct extended_signature *ext_sig;
263
264 if (!update_match_revision(mc_header, rev))
265 return 0;
266
267 if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
268 return 1;
269 120
270 /* Look for ext. headers: */ 121 collect_cpu_info(cpu, &cpu_sig);
271 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
272 return 0;
273 122
274 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; 123 csig = cpu_sig.sig;
275 ext_sigcount = ext_header->count; 124 cpf = cpu_sig.pf;
276 ext_sig = (void *)ext_header + EXT_HEADER_SIZE; 125 crev = cpu_sig.rev;
277 126
278 for (i = 0; i < ext_sigcount; i++) { 127 return get_matching_microcode(csig, cpf, mc_intel, crev);
279 if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
280 return 1;
281 ext_sig++;
282 }
283 return 0;
284} 128}
285 129
286static int apply_microcode(int cpu) 130int apply_microcode(int cpu)
287{ 131{
288 struct microcode_intel *mc_intel; 132 struct microcode_intel *mc_intel;
289 struct ucode_cpu_info *uci; 133 struct ucode_cpu_info *uci;
@@ -300,6 +144,14 @@ static int apply_microcode(int cpu)
300 if (mc_intel == NULL) 144 if (mc_intel == NULL)
301 return 0; 145 return 0;
302 146
147 /*
148 * Microcode on this CPU could be updated earlier. Only apply the
149 * microcode patch in mc_intel when it is newer than the one on this
150 * CPU.
151 */
152 if (get_matching_mc(mc_intel, cpu) == 0)
153 return 0;
154
303 /* write microcode via MSR 0x79 */ 155 /* write microcode via MSR 0x79 */
304 wrmsr(MSR_IA32_UCODE_WRITE, 156 wrmsr(MSR_IA32_UCODE_WRITE,
305 (unsigned long) mc_intel->bits, 157 (unsigned long) mc_intel->bits,
@@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
338 unsigned int leftover = size; 190 unsigned int leftover = size;
339 enum ucode_state state = UCODE_OK; 191 enum ucode_state state = UCODE_OK;
340 unsigned int curr_mc_size = 0; 192 unsigned int curr_mc_size = 0;
193 unsigned int csig, cpf;
341 194
342 while (leftover) { 195 while (leftover) {
343 struct microcode_header_intel mc_header; 196 struct microcode_header_intel mc_header;
@@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
362 } 215 }
363 216
364 if (get_ucode_data(mc, ucode_ptr, mc_size) || 217 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
365 microcode_sanity_check(mc) < 0) { 218 microcode_sanity_check(mc, 1) < 0) {
366 break; 219 break;
367 } 220 }
368 221
369 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 222 csig = uci->cpu_sig.sig;
223 cpf = uci->cpu_sig.pf;
224 if (get_matching_microcode(csig, cpf, mc, new_rev)) {
370 vfree(new_mc); 225 vfree(new_mc);
371 new_rev = mc_header.rev; 226 new_rev = mc_header.rev;
372 new_mc = mc; 227 new_mc = mc;
@@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
393 vfree(uci->mc); 248 vfree(uci->mc);
394 uci->mc = (struct microcode_intel *)new_mc; 249 uci->mc = (struct microcode_intel *)new_mc;
395 250
251 /*
252 * If early loading microcode is supported, save this mc into
253 * permanent memory. So it will be loaded early when a CPU is hot added
254 * or resumes.
255 */
256 save_mc_for_early(new_mc);
257
396 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 258 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
397 cpu, new_rev, uci->cpu_sig.rev); 259 cpu, new_rev, uci->cpu_sig.rev);
398out: 260out:
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
new file mode 100644
index 000000000000..7890bc838952
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -0,0 +1,796 @@
1/*
2 * Intel CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/earlycpio.h>
23#include <linux/initrd.h>
24#include <linux/cpu.h>
25#include <asm/msr.h>
26#include <asm/microcode_intel.h>
27#include <asm/processor.h>
28#include <asm/tlbflush.h>
29#include <asm/setup.h>
30
31unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
32struct mc_saved_data {
33 unsigned int mc_saved_count;
34 struct microcode_intel **mc_saved;
35} mc_saved_data;
36
37static enum ucode_state __cpuinit
38generic_load_microcode_early(struct microcode_intel **mc_saved_p,
39 unsigned int mc_saved_count,
40 struct ucode_cpu_info *uci)
41{
42 struct microcode_intel *ucode_ptr, *new_mc = NULL;
43 int new_rev = uci->cpu_sig.rev;
44 enum ucode_state state = UCODE_OK;
45 unsigned int mc_size;
46 struct microcode_header_intel *mc_header;
47 unsigned int csig = uci->cpu_sig.sig;
48 unsigned int cpf = uci->cpu_sig.pf;
49 int i;
50
51 for (i = 0; i < mc_saved_count; i++) {
52 ucode_ptr = mc_saved_p[i];
53
54 mc_header = (struct microcode_header_intel *)ucode_ptr;
55 mc_size = get_totalsize(mc_header);
56 if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
57 new_rev = mc_header->rev;
58 new_mc = ucode_ptr;
59 }
60 }
61
62 if (!new_mc) {
63 state = UCODE_NFOUND;
64 goto out;
65 }
66
67 uci->mc = (struct microcode_intel *)new_mc;
68out:
69 return state;
70}
71
72static void __cpuinit
73microcode_pointer(struct microcode_intel **mc_saved,
74 unsigned long *mc_saved_in_initrd,
75 unsigned long initrd_start, int mc_saved_count)
76{
77 int i;
78
79 for (i = 0; i < mc_saved_count; i++)
80 mc_saved[i] = (struct microcode_intel *)
81 (mc_saved_in_initrd[i] + initrd_start);
82}
83
84#ifdef CONFIG_X86_32
85static void __cpuinit
86microcode_phys(struct microcode_intel **mc_saved_tmp,
87 struct mc_saved_data *mc_saved_data)
88{
89 int i;
90 struct microcode_intel ***mc_saved;
91
92 mc_saved = (struct microcode_intel ***)
93 __pa_symbol(&mc_saved_data->mc_saved);
94 for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
95 struct microcode_intel *p;
96
97 p = *(struct microcode_intel **)
98 __pa(mc_saved_data->mc_saved + i);
99 mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
100 }
101}
102#endif
103
104static enum ucode_state __cpuinit
105load_microcode(struct mc_saved_data *mc_saved_data,
106 unsigned long *mc_saved_in_initrd,
107 unsigned long initrd_start,
108 struct ucode_cpu_info *uci)
109{
110 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
111 unsigned int count = mc_saved_data->mc_saved_count;
112
113 if (!mc_saved_data->mc_saved) {
114 microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
115 initrd_start, count);
116
117 return generic_load_microcode_early(mc_saved_tmp, count, uci);
118 } else {
119#ifdef CONFIG_X86_32
120 microcode_phys(mc_saved_tmp, mc_saved_data);
121 return generic_load_microcode_early(mc_saved_tmp, count, uci);
122#else
123 return generic_load_microcode_early(mc_saved_data->mc_saved,
124 count, uci);
125#endif
126 }
127}
128
129static u8 get_x86_family(unsigned long sig)
130{
131 u8 x86;
132
133 x86 = (sig >> 8) & 0xf;
134
135 if (x86 == 0xf)
136 x86 += (sig >> 20) & 0xff;
137
138 return x86;
139}
140
141static u8 get_x86_model(unsigned long sig)
142{
143 u8 x86, x86_model;
144
145 x86 = get_x86_family(sig);
146 x86_model = (sig >> 4) & 0xf;
147
148 if (x86 == 0x6 || x86 == 0xf)
149 x86_model += ((sig >> 16) & 0xf) << 4;
150
151 return x86_model;
152}
153
154/*
155 * Given CPU signature and a microcode patch, this function finds if the
156 * microcode patch has matching family and model with the CPU.
157 */
158static enum ucode_state
159matching_model_microcode(struct microcode_header_intel *mc_header,
160 unsigned long sig)
161{
162 u8 x86, x86_model;
163 u8 x86_ucode, x86_model_ucode;
164 struct extended_sigtable *ext_header;
165 unsigned long total_size = get_totalsize(mc_header);
166 unsigned long data_size = get_datasize(mc_header);
167 int ext_sigcount, i;
168 struct extended_signature *ext_sig;
169
170 x86 = get_x86_family(sig);
171 x86_model = get_x86_model(sig);
172
173 x86_ucode = get_x86_family(mc_header->sig);
174 x86_model_ucode = get_x86_model(mc_header->sig);
175
176 if (x86 == x86_ucode && x86_model == x86_model_ucode)
177 return UCODE_OK;
178
179 /* Look for ext. headers: */
180 if (total_size <= data_size + MC_HEADER_SIZE)
181 return UCODE_NFOUND;
182
183 ext_header = (struct extended_sigtable *)
184 mc_header + data_size + MC_HEADER_SIZE;
185 ext_sigcount = ext_header->count;
186 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
187
188 for (i = 0; i < ext_sigcount; i++) {
189 x86_ucode = get_x86_family(ext_sig->sig);
190 x86_model_ucode = get_x86_model(ext_sig->sig);
191
192 if (x86 == x86_ucode && x86_model == x86_model_ucode)
193 return UCODE_OK;
194
195 ext_sig++;
196 }
197
198 return UCODE_NFOUND;
199}
200
201static int
202save_microcode(struct mc_saved_data *mc_saved_data,
203 struct microcode_intel **mc_saved_src,
204 unsigned int mc_saved_count)
205{
206 int i, j;
207 struct microcode_intel **mc_saved_p;
208 int ret;
209
210 if (!mc_saved_count)
211 return -EINVAL;
212
213 /*
214 * Copy new microcode data.
215 */
216 mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
217 GFP_KERNEL);
218 if (!mc_saved_p)
219 return -ENOMEM;
220
221 for (i = 0; i < mc_saved_count; i++) {
222 struct microcode_intel *mc = mc_saved_src[i];
223 struct microcode_header_intel *mc_header = &mc->hdr;
224 unsigned long mc_size = get_totalsize(mc_header);
225 mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
226 if (!mc_saved_p[i]) {
227 ret = -ENOMEM;
228 goto err;
229 }
230 if (!mc_saved_src[i]) {
231 ret = -EINVAL;
232 goto err;
233 }
234 memcpy(mc_saved_p[i], mc, mc_size);
235 }
236
237 /*
238 * Point to newly saved microcode.
239 */
240 mc_saved_data->mc_saved = mc_saved_p;
241 mc_saved_data->mc_saved_count = mc_saved_count;
242
243 return 0;
244
245err:
246 for (j = 0; j <= i; j++)
247 kfree(mc_saved_p[j]);
248 kfree(mc_saved_p);
249
250 return ret;
251}
252
253/*
254 * A microcode patch in ucode_ptr is saved into mc_saved
255 * - if it has matching signature and newer revision compared to an existing
256 * patch mc_saved.
257 * - or if it is a newly discovered microcode patch.
258 *
259 * The microcode patch should have matching model with CPU.
260 */
261static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
262 unsigned int *mc_saved_count_p)
263{
264 int i;
265 int found = 0;
266 unsigned int mc_saved_count = *mc_saved_count_p;
267 struct microcode_header_intel *mc_header;
268
269 mc_header = (struct microcode_header_intel *)ucode_ptr;
270 for (i = 0; i < mc_saved_count; i++) {
271 unsigned int sig, pf;
272 unsigned int new_rev;
273 struct microcode_header_intel *mc_saved_header =
274 (struct microcode_header_intel *)mc_saved[i];
275 sig = mc_saved_header->sig;
276 pf = mc_saved_header->pf;
277 new_rev = mc_header->rev;
278
279 if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
280 found = 1;
281 if (update_match_revision(mc_header, new_rev)) {
282 /*
283 * Found an older ucode saved before.
284 * Replace the older one with this newer
285 * one.
286 */
287 mc_saved[i] =
288 (struct microcode_intel *)ucode_ptr;
289 break;
290 }
291 }
292 }
293 if (i >= mc_saved_count && !found)
294 /*
295 * This ucode is first time discovered in ucode file.
296 * Save it to memory.
297 */
298 mc_saved[mc_saved_count++] =
299 (struct microcode_intel *)ucode_ptr;
300
301 *mc_saved_count_p = mc_saved_count;
302}
303
304/*
305 * Get microcode matching with BSP's model. Only CPUs with the same model as
306 * BSP can stay in the platform.
307 */
308static enum ucode_state __init
309get_matching_model_microcode(int cpu, unsigned long start,
310 void *data, size_t size,
311 struct mc_saved_data *mc_saved_data,
312 unsigned long *mc_saved_in_initrd,
313 struct ucode_cpu_info *uci)
314{
315 u8 *ucode_ptr = data;
316 unsigned int leftover = size;
317 enum ucode_state state = UCODE_OK;
318 unsigned int mc_size;
319 struct microcode_header_intel *mc_header;
320 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
321 unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
322 int i;
323
324 while (leftover) {
325 mc_header = (struct microcode_header_intel *)ucode_ptr;
326
327 mc_size = get_totalsize(mc_header);
328 if (!mc_size || mc_size > leftover ||
329 microcode_sanity_check(ucode_ptr, 0) < 0)
330 break;
331
332 leftover -= mc_size;
333
334 /*
335 * Since APs with same family and model as the BSP may boot in
336 * the platform, we need to find and save microcode patches
337 * with the same family and model as the BSP.
338 */
339 if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
340 UCODE_OK) {
341 ucode_ptr += mc_size;
342 continue;
343 }
344
345 _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
346
347 ucode_ptr += mc_size;
348 }
349
350 if (leftover) {
351 state = UCODE_ERROR;
352 goto out;
353 }
354
355 if (mc_saved_count == 0) {
356 state = UCODE_NFOUND;
357 goto out;
358 }
359
360 for (i = 0; i < mc_saved_count; i++)
361 mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
362
363 mc_saved_data->mc_saved_count = mc_saved_count;
364out:
365 return state;
366}
367
368#define native_rdmsr(msr, val1, val2) \
369do { \
370 u64 __val = native_read_msr((msr)); \
371 (void)((val1) = (u32)__val); \
372 (void)((val2) = (u32)(__val >> 32)); \
373} while (0)
374
375#define native_wrmsr(msr, low, high) \
376 native_write_msr(msr, low, high);
377
378static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci)
379{
380 unsigned int val[2];
381 u8 x86, x86_model;
382 struct cpu_signature csig;
383 unsigned int eax, ebx, ecx, edx;
384
385 csig.sig = 0;
386 csig.pf = 0;
387 csig.rev = 0;
388
389 memset(uci, 0, sizeof(*uci));
390
391 eax = 0x00000001;
392 ecx = 0;
393 native_cpuid(&eax, &ebx, &ecx, &edx);
394 csig.sig = eax;
395
396 x86 = get_x86_family(csig.sig);
397 x86_model = get_x86_model(csig.sig);
398
399 if ((x86_model >= 5) || (x86 > 6)) {
400 /* get processor flags from MSR 0x17 */
401 native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
402 csig.pf = 1 << ((val[1] >> 18) & 7);
403 }
404 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
405
406 /* As documented in the SDM: Do a CPUID 1 here */
407 sync_core();
408
409 /* get the current revision from MSR 0x8B */
410 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
411
412 csig.rev = val[1];
413
414 uci->cpu_sig = csig;
415 uci->valid = 1;
416
417 return 0;
418}
419
420#ifdef DEBUG
421static void __ref show_saved_mc(void)
422{
423 int i, j;
424 unsigned int sig, pf, rev, total_size, data_size, date;
425 struct ucode_cpu_info uci;
426
427 if (mc_saved_data.mc_saved_count == 0) {
428 pr_debug("no micorcode data saved.\n");
429 return;
430 }
431 pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
432
433 collect_cpu_info_early(&uci);
434
435 sig = uci.cpu_sig.sig;
436 pf = uci.cpu_sig.pf;
437 rev = uci.cpu_sig.rev;
438 pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
439 smp_processor_id(), sig, pf, rev);
440
441 for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
442 struct microcode_header_intel *mc_saved_header;
443 struct extended_sigtable *ext_header;
444 int ext_sigcount;
445 struct extended_signature *ext_sig;
446
447 mc_saved_header = (struct microcode_header_intel *)
448 mc_saved_data.mc_saved[i];
449 sig = mc_saved_header->sig;
450 pf = mc_saved_header->pf;
451 rev = mc_saved_header->rev;
452 total_size = get_totalsize(mc_saved_header);
453 data_size = get_datasize(mc_saved_header);
454 date = mc_saved_header->date;
455
456 pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
457 i, sig, pf, rev, total_size,
458 date & 0xffff,
459 date >> 24,
460 (date >> 16) & 0xff);
461
462 /* Look for ext. headers: */
463 if (total_size <= data_size + MC_HEADER_SIZE)
464 continue;
465
466 ext_header = (struct extended_sigtable *)
467 mc_saved_header + data_size + MC_HEADER_SIZE;
468 ext_sigcount = ext_header->count;
469 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
470
471 for (j = 0; j < ext_sigcount; j++) {
472 sig = ext_sig->sig;
473 pf = ext_sig->pf;
474
475 pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
476 j, sig, pf);
477
478 ext_sig++;
479 }
480
481 }
482}
483#else
484static inline void show_saved_mc(void)
485{
486}
487#endif
488
489#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
490/*
491 * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
492 * hot added or resumes.
493 *
494 * Please make sure this mc should be a valid microcode patch before calling
495 * this function.
496 */
497int save_mc_for_early(u8 *mc)
498{
499 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
500 unsigned int mc_saved_count_init;
501 unsigned int mc_saved_count;
502 struct microcode_intel **mc_saved;
503 int ret = 0;
504 int i;
505
506 /*
507 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
508 * hotplug.
509 */
510 cpu_hotplug_driver_lock();
511
512 mc_saved_count_init = mc_saved_data.mc_saved_count;
513 mc_saved_count = mc_saved_data.mc_saved_count;
514 mc_saved = mc_saved_data.mc_saved;
515
516 if (mc_saved && mc_saved_count)
517 memcpy(mc_saved_tmp, mc_saved,
518 mc_saved_count * sizeof(struct mirocode_intel *));
519 /*
520 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
521 * version.
522 */
523
524 _save_mc(mc_saved_tmp, mc, &mc_saved_count);
525
526 /*
527 * Save the mc_save_tmp in global mc_saved_data.
528 */
529 ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
530 if (ret) {
531 pr_err("Can not save microcode patch.\n");
532 goto out;
533 }
534
535 show_saved_mc();
536
537 /*
538 * Free old saved microcod data.
539 */
540 if (mc_saved) {
541 for (i = 0; i < mc_saved_count_init; i++)
542 kfree(mc_saved[i]);
543 kfree(mc_saved);
544 }
545
546out:
547 cpu_hotplug_driver_unlock();
548
549 return ret;
550}
551EXPORT_SYMBOL_GPL(save_mc_for_early);
552#endif
553
554static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
555static __init enum ucode_state
556scan_microcode(unsigned long start, unsigned long end,
557 struct mc_saved_data *mc_saved_data,
558 unsigned long *mc_saved_in_initrd,
559 struct ucode_cpu_info *uci)
560{
561 unsigned int size = end - start + 1;
562 struct cpio_data cd;
563 long offset = 0;
564#ifdef CONFIG_X86_32
565 char *p = (char *)__pa_symbol(ucode_name);
566#else
567 char *p = ucode_name;
568#endif
569
570 cd.data = NULL;
571 cd.size = 0;
572
573 cd = find_cpio_data(p, (void *)start, size, &offset);
574 if (!cd.data)
575 return UCODE_ERROR;
576
577
578 return get_matching_model_microcode(0, start, cd.data, cd.size,
579 mc_saved_data, mc_saved_in_initrd,
580 uci);
581}
582
583/*
584 * Print ucode update info.
585 */
586static void __cpuinit
587print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
588{
589 int cpu = smp_processor_id();
590
591 pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
592 cpu,
593 uci->cpu_sig.rev,
594 date & 0xffff,
595 date >> 24,
596 (date >> 16) & 0xff);
597}
598
599#ifdef CONFIG_X86_32
600
601static int delay_ucode_info;
602static int current_mc_date;
603
604/*
605 * Print early updated ucode info after printk works. This is delayed info dump.
606 */
607void __cpuinit show_ucode_info_early(void)
608{
609 struct ucode_cpu_info uci;
610
611 if (delay_ucode_info) {
612 collect_cpu_info_early(&uci);
613 print_ucode_info(&uci, current_mc_date);
614 delay_ucode_info = 0;
615 }
616}
617
618/*
619 * At this point, we can not call printk() yet. Keep microcode patch number in
620 * mc_saved_data.mc_saved and delay printing microcode info in
621 * show_ucode_info_early() until printk() works.
622 */
623static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
624{
625 struct microcode_intel *mc_intel;
626 int *delay_ucode_info_p;
627 int *current_mc_date_p;
628
629 mc_intel = uci->mc;
630 if (mc_intel == NULL)
631 return;
632
633 delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
634 current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
635
636 *delay_ucode_info_p = 1;
637 *current_mc_date_p = mc_intel->hdr.date;
638}
639#else
640
641/*
642 * Flush global tlb. We only do this in x86_64 where paging has been enabled
643 * already and PGE should be enabled as well.
644 */
645static inline void __cpuinit flush_tlb_early(void)
646{
647 __native_flush_tlb_global_irq_disabled();
648}
649
650static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
651{
652 struct microcode_intel *mc_intel;
653
654 mc_intel = uci->mc;
655 if (mc_intel == NULL)
656 return;
657
658 print_ucode_info(uci, mc_intel->hdr.date);
659}
660#endif
661
662static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
663 struct ucode_cpu_info *uci)
664{
665 struct microcode_intel *mc_intel;
666 unsigned int val[2];
667
668 mc_intel = uci->mc;
669 if (mc_intel == NULL)
670 return 0;
671
672 /* write microcode via MSR 0x79 */
673 native_wrmsr(MSR_IA32_UCODE_WRITE,
674 (unsigned long) mc_intel->bits,
675 (unsigned long) mc_intel->bits >> 16 >> 16);
676 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
677
678 /* As documented in the SDM: Do a CPUID 1 here */
679 sync_core();
680
681 /* get the current revision from MSR 0x8B */
682 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
683 if (val[1] != mc_intel->hdr.rev)
684 return -1;
685
686#ifdef CONFIG_X86_64
687 /* Flush global tlb. This is precaution. */
688 flush_tlb_early();
689#endif
690 uci->cpu_sig.rev = val[1];
691
692 print_ucode(uci);
693
694 return 0;
695}
696
697/*
698 * This function converts microcode patch offsets previously stored in
699 * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
700 */
701int __init save_microcode_in_initrd(void)
702{
703 unsigned int count = mc_saved_data.mc_saved_count;
704 struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
705 int ret = 0;
706
707 if (count == 0)
708 return ret;
709
710 microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
711 ret = save_microcode(&mc_saved_data, mc_saved, count);
712 if (ret)
713 pr_err("Can not save microcod patches from initrd");
714
715 show_saved_mc();
716
717 return ret;
718}
719
720static void __init
721_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
722 unsigned long *mc_saved_in_initrd,
723 unsigned long initrd_start_early,
724 unsigned long initrd_end_early,
725 struct ucode_cpu_info *uci)
726{
727 collect_cpu_info_early(uci);
728 scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
729 mc_saved_in_initrd, uci);
730 load_microcode(mc_saved_data, mc_saved_in_initrd,
731 initrd_start_early, uci);
732 apply_microcode_early(mc_saved_data, uci);
733}
734
735void __init
736load_ucode_intel_bsp(void)
737{
738 u64 ramdisk_image, ramdisk_size;
739 unsigned long initrd_start_early, initrd_end_early;
740 struct ucode_cpu_info uci;
741#ifdef CONFIG_X86_32
742 struct boot_params *boot_params_p;
743
744 boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
745 ramdisk_image = boot_params_p->hdr.ramdisk_image;
746 ramdisk_size = boot_params_p->hdr.ramdisk_size;
747 initrd_start_early = ramdisk_image;
748 initrd_end_early = initrd_start_early + ramdisk_size;
749
750 _load_ucode_intel_bsp(
751 (struct mc_saved_data *)__pa_symbol(&mc_saved_data),
752 (unsigned long *)__pa_symbol(&mc_saved_in_initrd),
753 initrd_start_early, initrd_end_early, &uci);
754#else
755 ramdisk_image = boot_params.hdr.ramdisk_image;
756 ramdisk_size = boot_params.hdr.ramdisk_size;
757 initrd_start_early = ramdisk_image + PAGE_OFFSET;
758 initrd_end_early = initrd_start_early + ramdisk_size;
759
760 _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
761 initrd_start_early, initrd_end_early, &uci);
762#endif
763}
764
765void __cpuinit load_ucode_intel_ap(void)
766{
767 struct mc_saved_data *mc_saved_data_p;
768 struct ucode_cpu_info uci;
769 unsigned long *mc_saved_in_initrd_p;
770 unsigned long initrd_start_addr;
771#ifdef CONFIG_X86_32
772 unsigned long *initrd_start_p;
773
774 mc_saved_in_initrd_p =
775 (unsigned long *)__pa_symbol(mc_saved_in_initrd);
776 mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
777 initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
778 initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
779#else
780 mc_saved_data_p = &mc_saved_data;
781 mc_saved_in_initrd_p = mc_saved_in_initrd;
782 initrd_start_addr = initrd_start;
783#endif
784
785 /*
786 * If there is no valid ucode previously saved in memory, no need to
787 * update ucode on this AP.
788 */
789 if (mc_saved_data_p->mc_saved_count == 0)
790 return;
791
792 collect_cpu_info_early(&uci);
793 load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
794 initrd_start_addr, &uci);
795 apply_microcode_early(mc_saved_data_p, &uci);
796}
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c
new file mode 100644
index 000000000000..ce69320d0179
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_lib.c
@@ -0,0 +1,174 @@
1/*
2 * Intel CPU Microcode Update Driver for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
12 * Software Developer's Manual
13 * Order Number 253668 or free download from:
14 *
15 * http://developer.intel.com/Assets/PDF/manual/253668.pdf
16 *
17 * For more information, go to http://www.urbanmyth.org/microcode
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25#include <linux/firmware.h>
26#include <linux/uaccess.h>
27#include <linux/kernel.h>
28#include <linux/module.h>
29
30#include <asm/microcode_intel.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33
34static inline int
35update_match_cpu(unsigned int csig, unsigned int cpf,
36 unsigned int sig, unsigned int pf)
37{
38 return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
39}
40
41int
42update_match_revision(struct microcode_header_intel *mc_header, int rev)
43{
44 return (mc_header->rev <= rev) ? 0 : 1;
45}
46
47int microcode_sanity_check(void *mc, int print_err)
48{
49 unsigned long total_size, data_size, ext_table_size;
50 struct microcode_header_intel *mc_header = mc;
51 struct extended_sigtable *ext_header = NULL;
52 int sum, orig_sum, ext_sigcount = 0, i;
53 struct extended_signature *ext_sig;
54
55 total_size = get_totalsize(mc_header);
56 data_size = get_datasize(mc_header);
57
58 if (data_size + MC_HEADER_SIZE > total_size) {
59 if (print_err)
60 pr_err("error! Bad data size in microcode data file\n");
61 return -EINVAL;
62 }
63
64 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
65 if (print_err)
66 pr_err("error! Unknown microcode update format\n");
67 return -EINVAL;
68 }
69 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
70 if (ext_table_size) {
71 if ((ext_table_size < EXT_HEADER_SIZE)
72 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
73 if (print_err)
74 pr_err("error! Small exttable size in microcode data file\n");
75 return -EINVAL;
76 }
77 ext_header = mc + MC_HEADER_SIZE + data_size;
78 if (ext_table_size != exttable_size(ext_header)) {
79 if (print_err)
80 pr_err("error! Bad exttable size in microcode data file\n");
81 return -EFAULT;
82 }
83 ext_sigcount = ext_header->count;
84 }
85
86 /* check extended table checksum */
87 if (ext_table_size) {
88 int ext_table_sum = 0;
89 int *ext_tablep = (int *)ext_header;
90
91 i = ext_table_size / DWSIZE;
92 while (i--)
93 ext_table_sum += ext_tablep[i];
94 if (ext_table_sum) {
95 if (print_err)
96 pr_warn("aborting, bad extended signature table checksum\n");
97 return -EINVAL;
98 }
99 }
100
101 /* calculate the checksum */
102 orig_sum = 0;
103 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
104 while (i--)
105 orig_sum += ((int *)mc)[i];
106 if (orig_sum) {
107 if (print_err)
108 pr_err("aborting, bad checksum\n");
109 return -EINVAL;
110 }
111 if (!ext_table_size)
112 return 0;
113 /* check extended signature checksum */
114 for (i = 0; i < ext_sigcount; i++) {
115 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
116 EXT_SIGNATURE_SIZE * i;
117 sum = orig_sum
118 - (mc_header->sig + mc_header->pf + mc_header->cksum)
119 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
120 if (sum) {
121 if (print_err)
122 pr_err("aborting, bad checksum\n");
123 return -EINVAL;
124 }
125 }
126 return 0;
127}
128EXPORT_SYMBOL_GPL(microcode_sanity_check);
129
130/*
131 * return 0 - no update found
132 * return 1 - found update
133 */
134int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
135{
136 struct microcode_header_intel *mc_header = mc;
137 struct extended_sigtable *ext_header;
138 unsigned long total_size = get_totalsize(mc_header);
139 int ext_sigcount, i;
140 struct extended_signature *ext_sig;
141
142 if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
143 return 1;
144
145 /* Look for ext. headers: */
146 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
147 return 0;
148
149 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
150 ext_sigcount = ext_header->count;
151 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
152
153 for (i = 0; i < ext_sigcount; i++) {
154 if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
155 return 1;
156 ext_sig++;
157 }
158 return 0;
159}
160
161/*
162 * return 0 - no update found
163 * return 1 - found update
164 */
165int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
166{
167 struct microcode_header_intel *mc_header = mc;
168
169 if (!update_match_revision(mc_header, rev))
170 return 0;
171
172 return get_matching_sig(csig, cpf, mc, rev);
173}
174EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d41815265a0b..4903a03ae876 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,6 +16,7 @@
16#include <asm/tlb.h> 16#include <asm/tlb.h>
17#include <asm/proto.h> 17#include <asm/proto.h>
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h>
19 20
20#include "mm_internal.h" 21#include "mm_internal.h"
21 22
@@ -534,6 +535,15 @@ void free_initmem(void)
534#ifdef CONFIG_BLK_DEV_INITRD 535#ifdef CONFIG_BLK_DEV_INITRD
535void __init free_initrd_mem(unsigned long start, unsigned long end) 536void __init free_initrd_mem(unsigned long start, unsigned long end)
536{ 537{
538#ifdef CONFIG_MICROCODE_EARLY
539 /*
540 * Remember, initrd memory may contain microcode or other useful things.
541 * Before we lose initrd mem, we need to find a place to hold them
542 * now that normal virtual memory is enabled.
543 */
544 save_microcode_in_initrd();
545#endif
546
537 /* 547 /*
538 * end could be not aligned, and We can not align that, 548 * end could be not aligned, and We can not align that,
539 * decompresser could be confused by aligned initrd_end 549 * decompresser could be confused by aligned initrd_end
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f5e86eee4e0e..e8e34938c57d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1408,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1408 xen_mc_callback(set_current_cr3, (void *)cr3); 1408 xen_mc_callback(set_current_cr3, (void *)cr3);
1409 } 1409 }
1410} 1410}
1411
1412static void xen_write_cr3(unsigned long cr3) 1411static void xen_write_cr3(unsigned long cr3)
1413{ 1412{
1414 BUG_ON(preemptible()); 1413 BUG_ON(preemptible());
@@ -1434,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3)
1434 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 1433 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1435} 1434}
1436 1435
1436#ifdef CONFIG_X86_64
1437/*
1438 * At the start of the day - when Xen launches a guest, it has already
1439 * built pagetables for the guest. We diligently look over them
1440 * in xen_setup_kernel_pagetable and graft as appropiate them in the
1441 * init_level4_pgt and its friends. Then when we are happy we load
1442 * the new init_level4_pgt - and continue on.
1443 *
1444 * The generic code starts (start_kernel) and 'init_mem_mapping' sets
1445 * up the rest of the pagetables. When it has completed it loads the cr3.
1446 * N.B. that baremetal would start at 'start_kernel' (and the early
1447 * #PF handler would create bootstrap pagetables) - so we are running
1448 * with the same assumptions as what to do when write_cr3 is executed
1449 * at this point.
1450 *
1451 * Since there are no user-page tables at all, we have two variants
1452 * of xen_write_cr3 - the early bootup (this one), and the late one
1453 * (xen_write_cr3). The reason we have to do that is that in 64-bit
1454 * the Linux kernel and user-space are both in ring 3 while the
1455 * hypervisor is in ring 0.
1456 */
1457static void __init xen_write_cr3_init(unsigned long cr3)
1458{
1459 BUG_ON(preemptible());
1460
1461 xen_mc_batch(); /* disables interrupts */
1462
1463 /* Update while interrupts are disabled, so its atomic with
1464 respect to ipis */
1465 this_cpu_write(xen_cr3, cr3);
1466
1467 __xen_write_cr3(true, cr3);
1468
1469 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1470
1471 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1472}
1473#endif
1474
1437static int xen_pgd_alloc(struct mm_struct *mm) 1475static int xen_pgd_alloc(struct mm_struct *mm)
1438{ 1476{
1439 pgd_t *pgd = mm->pgd; 1477 pgd_t *pgd = mm->pgd;
@@ -2102,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2102 .write_cr2 = xen_write_cr2, 2140 .write_cr2 = xen_write_cr2,
2103 2141
2104 .read_cr3 = xen_read_cr3, 2142 .read_cr3 = xen_read_cr3,
2105#ifdef CONFIG_X86_32
2106 .write_cr3 = xen_write_cr3_init, 2143 .write_cr3 = xen_write_cr3_init,
2107#else
2108 .write_cr3 = xen_write_cr3,
2109#endif
2110 2144
2111 .flush_tlb_user = xen_flush_tlb, 2145 .flush_tlb_user = xen_flush_tlb,
2112 .flush_tlb_kernel = xen_flush_tlb, 2146 .flush_tlb_kernel = xen_flush_tlb,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85e81ec1451e..594bda9dcfc8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -445,7 +445,7 @@ static struct entropy_store input_pool = {
445 .poolinfo = &poolinfo_table[0], 445 .poolinfo = &poolinfo_table[0],
446 .name = "input", 446 .name = "input",
447 .limit = 1, 447 .limit = 1,
448 .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), 448 .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
449 .pool = input_pool_data 449 .pool = input_pool_data
450}; 450};
451 451
@@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = {
454 .name = "blocking", 454 .name = "blocking",
455 .limit = 1, 455 .limit = 1,
456 .pull = &input_pool, 456 .pull = &input_pool,
457 .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), 457 .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
458 .pool = blocking_pool_data 458 .pool = blocking_pool_data
459}; 459};
460 460
@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = {
462 .poolinfo = &poolinfo_table[1], 462 .poolinfo = &poolinfo_table[1],
463 .name = "nonblocking", 463 .name = "nonblocking",
464 .pull = &input_pool, 464 .pull = &input_pool,
465 .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), 465 .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
466 .pool = nonblocking_pool_data 466 .pool = nonblocking_pool_data
467}; 467};
468 468
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index fa080ebd568f..ffeebc7e9f1c 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -75,7 +75,7 @@ static unsigned long past_skip;
75 75
76static struct pci_dev *fbd_dev; 76static struct pci_dev *fbd_dev;
77 77
78static spinlock_t i7300_idle_lock; 78static raw_spinlock_t i7300_idle_lock;
79static int i7300_idle_active; 79static int i7300_idle_active;
80 80
81static u8 i7300_idle_thrtctl_saved; 81static u8 i7300_idle_thrtctl_saved;
@@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
457 idle_begin_time = ktime_get(); 457 idle_begin_time = ktime_get();
458 } 458 }
459 459
460 spin_lock_irqsave(&i7300_idle_lock, flags); 460 raw_spin_lock_irqsave(&i7300_idle_lock, flags);
461 if (val == IDLE_START) { 461 if (val == IDLE_START) {
462 462
463 cpumask_set_cpu(smp_processor_id(), idle_cpumask); 463 cpumask_set_cpu(smp_processor_id(), idle_cpumask);
@@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
506 } 506 }
507 } 507 }
508end: 508end:
509 spin_unlock_irqrestore(&i7300_idle_lock, flags); 509 raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
510 return 0; 510 return 0;
511} 511}
512 512
@@ -548,7 +548,7 @@ struct debugfs_file_info {
548 548
549static int __init i7300_idle_init(void) 549static int __init i7300_idle_init(void)
550{ 550{
551 spin_lock_init(&i7300_idle_lock); 551 raw_spin_lock_init(&i7300_idle_lock);
552 total_us = 0; 552 total_us = 0;
553 553
554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) 554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 3bc244d2636a..a62c4a47d52c 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -222,7 +222,7 @@ static struct {
222} dbg_data = { 222} dbg_data = {
223 .idx = 0, 223 .idx = 0,
224 .tty = 0, 224 .tty = 0,
225 .lck = __RW_LOCK_UNLOCKED(lck) 225 .lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
226}; 226};
227 227
228/** 228/**
diff --git a/fs/file.c b/fs/file.c
index 2b3570b7caeb..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index 2533fddd34a6..d8d4c898c1bb 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
21 if (size == 8 && sizeof(unsigned long) != 8) 21 if (size == 8 && sizeof(unsigned long) != 8)
22 wrong_size_cmpxchg(ptr); 22 wrong_size_cmpxchg(ptr);
23 23
24 local_irq_save(flags); 24 raw_local_irq_save(flags);
25 switch (size) { 25 switch (size) {
26 case 1: prev = *(u8 *)ptr; 26 case 1: prev = *(u8 *)ptr;
27 if (prev == old) 27 if (prev == old)
@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
42 default: 42 default:
43 wrong_size_cmpxchg(ptr); 43 wrong_size_cmpxchg(ptr);
44 } 44 }
45 local_irq_restore(flags); 45 raw_local_irq_restore(flags);
46 return prev; 46 return prev;
47} 47}
48 48
@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
55 u64 prev; 55 u64 prev;
56 unsigned long flags; 56 unsigned long flags;
57 57
58 local_irq_save(flags); 58 raw_local_irq_save(flags);
59 prev = *(u64 *)ptr; 59 prev = *(u64 *)ptr;
60 if (prev == old) 60 if (prev == old)
61 *(u64 *)ptr = new; 61 *(u64 *)ptr = new;
62 local_irq_restore(flags); 62 raw_local_irq_restore(flags);
63 return prev; 63 return prev;
64} 64}
65 65
diff --git a/include/linux/idr.h b/include/linux/idr.h
index de7e190f1af4..e5eb125effe6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -136,7 +136,7 @@ struct ida {
136 struct ida_bitmap *free_bitmap; 136 struct ida_bitmap *free_bitmap;
137}; 137};
138 138
139#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } 139#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
140#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) 140#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
141 141
142int ida_pre_get(struct ida *ida, gfp_t gfp_mask); 142int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index bfe88c4aa251..f1e877b79ed8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -412,7 +412,7 @@ struct lock_class_key { };
412 412
413#define lockdep_depth(tsk) (0) 413#define lockdep_depth(tsk) (0)
414 414
415#define lockdep_assert_held(l) do { } while (0) 415#define lockdep_assert_held(l) do { (void)(l); } while (0)
416 416
417#define lockdep_recursing(tsk) (0) 417#define lockdep_recursing(tsk) (0)
418 418
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 600060e25ec6..18299057402f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -30,92 +30,12 @@
30#include <linux/preempt.h> 30#include <linux/preempt.h>
31#include <asm/processor.h> 31#include <asm/processor.h>
32 32
33typedef struct {
34 unsigned sequence;
35 spinlock_t lock;
36} seqlock_t;
37
38/*
39 * These macros triggered gcc-3.x compile-time problems. We think these are
40 * OK now. Be cautious.
41 */
42#define __SEQLOCK_UNLOCKED(lockname) \
43 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
44
45#define seqlock_init(x) \
46 do { \
47 (x)->sequence = 0; \
48 spin_lock_init(&(x)->lock); \
49 } while (0)
50
51#define DEFINE_SEQLOCK(x) \
52 seqlock_t x = __SEQLOCK_UNLOCKED(x)
53
54/* Lock out other writers and update the count.
55 * Acts like a normal spin_lock/unlock.
56 * Don't need preempt_disable() because that is in the spin_lock already.
57 */
58static inline void write_seqlock(seqlock_t *sl)
59{
60 spin_lock(&sl->lock);
61 ++sl->sequence;
62 smp_wmb();
63}
64
65static inline void write_sequnlock(seqlock_t *sl)
66{
67 smp_wmb();
68 sl->sequence++;
69 spin_unlock(&sl->lock);
70}
71
72static inline int write_tryseqlock(seqlock_t *sl)
73{
74 int ret = spin_trylock(&sl->lock);
75
76 if (ret) {
77 ++sl->sequence;
78 smp_wmb();
79 }
80 return ret;
81}
82
83/* Start of read calculation -- fetch last complete writer token */
84static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
85{
86 unsigned ret;
87
88repeat:
89 ret = ACCESS_ONCE(sl->sequence);
90 if (unlikely(ret & 1)) {
91 cpu_relax();
92 goto repeat;
93 }
94 smp_rmb();
95
96 return ret;
97}
98
99/*
100 * Test if reader processed invalid data.
101 *
102 * If sequence value changed then writer changed data while in section.
103 */
104static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
105{
106 smp_rmb();
107
108 return unlikely(sl->sequence != start);
109}
110
111
112/* 33/*
113 * Version using sequence counter only. 34 * Version using sequence counter only.
114 * This can be used when code has its own mutex protecting the 35 * This can be used when code has its own mutex protecting the
115 * updating starting before the write_seqcountbeqin() and ending 36 * updating starting before the write_seqcountbeqin() and ending
116 * after the write_seqcount_end(). 37 * after the write_seqcount_end().
117 */ 38 */
118
119typedef struct seqcount { 39typedef struct seqcount {
120 unsigned sequence; 40 unsigned sequence;
121} seqcount_t; 41} seqcount_t;
@@ -218,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
218static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 138static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
219{ 139{
220 smp_rmb(); 140 smp_rmb();
221
222 return __read_seqcount_retry(s, start); 141 return __read_seqcount_retry(s, start);
223} 142}
224 143
@@ -252,31 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s)
252 s->sequence+=2; 171 s->sequence+=2;
253} 172}
254 173
174typedef struct {
175 struct seqcount seqcount;
176 spinlock_t lock;
177} seqlock_t;
178
255/* 179/*
256 * Possible sw/hw IRQ protected versions of the interfaces. 180 * These macros triggered gcc-3.x compile-time problems. We think these are
181 * OK now. Be cautious.
257 */ 182 */
258#define write_seqlock_irqsave(lock, flags) \ 183#define __SEQLOCK_UNLOCKED(lockname) \
259 do { local_irq_save(flags); write_seqlock(lock); } while (0) 184 { \
260#define write_seqlock_irq(lock) \ 185 .seqcount = SEQCNT_ZERO, \
261 do { local_irq_disable(); write_seqlock(lock); } while (0) 186 .lock = __SPIN_LOCK_UNLOCKED(lockname) \
262#define write_seqlock_bh(lock) \ 187 }
263 do { local_bh_disable(); write_seqlock(lock); } while (0) 188
189#define seqlock_init(x) \
190 do { \
191 seqcount_init(&(x)->seqcount); \
192 spin_lock_init(&(x)->lock); \
193 } while (0)
264 194
265#define write_sequnlock_irqrestore(lock, flags) \ 195#define DEFINE_SEQLOCK(x) \
266 do { write_sequnlock(lock); local_irq_restore(flags); } while(0) 196 seqlock_t x = __SEQLOCK_UNLOCKED(x)
267#define write_sequnlock_irq(lock) \
268 do { write_sequnlock(lock); local_irq_enable(); } while(0)
269#define write_sequnlock_bh(lock) \
270 do { write_sequnlock(lock); local_bh_enable(); } while(0)
271 197
272#define read_seqbegin_irqsave(lock, flags) \ 198/*
273 ({ local_irq_save(flags); read_seqbegin(lock); }) 199 * Read side functions for starting and finalizing a read side section.
200 */
201static inline unsigned read_seqbegin(const seqlock_t *sl)
202{
203 return read_seqcount_begin(&sl->seqcount);
204}
274 205
275#define read_seqretry_irqrestore(lock, iv, flags) \ 206static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
276 ({ \ 207{
277 int ret = read_seqretry(lock, iv); \ 208 return read_seqcount_retry(&sl->seqcount, start);
278 local_irq_restore(flags); \ 209}
279 ret; \ 210
280 }) 211/*
212 * Lock out other writers and update the count.
213 * Acts like a normal spin_lock/unlock.
214 * Don't need preempt_disable() because that is in the spin_lock already.
215 */
216static inline void write_seqlock(seqlock_t *sl)
217{
218 spin_lock(&sl->lock);
219 write_seqcount_begin(&sl->seqcount);
220}
221
222static inline void write_sequnlock(seqlock_t *sl)
223{
224 write_seqcount_end(&sl->seqcount);
225 spin_unlock(&sl->lock);
226}
227
228static inline void write_seqlock_bh(seqlock_t *sl)
229{
230 spin_lock_bh(&sl->lock);
231 write_seqcount_begin(&sl->seqcount);
232}
233
234static inline void write_sequnlock_bh(seqlock_t *sl)
235{
236 write_seqcount_end(&sl->seqcount);
237 spin_unlock_bh(&sl->lock);
238}
239
240static inline void write_seqlock_irq(seqlock_t *sl)
241{
242 spin_lock_irq(&sl->lock);
243 write_seqcount_begin(&sl->seqcount);
244}
245
246static inline void write_sequnlock_irq(seqlock_t *sl)
247{
248 write_seqcount_end(&sl->seqcount);
249 spin_unlock_irq(&sl->lock);
250}
251
252static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
253{
254 unsigned long flags;
255
256 spin_lock_irqsave(&sl->lock, flags);
257 write_seqcount_begin(&sl->seqcount);
258 return flags;
259}
260
261#define write_seqlock_irqsave(lock, flags) \
262 do { flags = __write_seqlock_irqsave(lock); } while (0)
263
264static inline void
265write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
266{
267 write_seqcount_end(&sl->seqcount);
268 spin_unlock_irqrestore(&sl->lock, flags);
269}
281 270
282#endif /* __LINUX_SEQLOCK_H */ 271#endif /* __LINUX_SEQLOCK_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index 9618b6e9fb36..fbc07a29ec53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2472 if (!futex_cmpxchg_enabled) 2472 if (!futex_cmpxchg_enabled)
2473 return -ENOSYS; 2473 return -ENOSYS;
2474 2474
2475 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2476
2477 rcu_read_lock(); 2475 rcu_read_lock();
2478 2476
2479 ret = -ESRCH; 2477 ret = -ESRCH;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 83e368b005fc..a9642d528630 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -142,8 +142,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
142 if (!futex_cmpxchg_enabled) 142 if (!futex_cmpxchg_enabled)
143 return -ENOSYS; 143 return -ENOSYS;
144 144
145 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
146
147 rcu_read_lock(); 145 rcu_read_lock();
148 146
149 ret = -ESRCH; 147 ret = -ESRCH;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 7981e5b2350d..8a0efac4f99d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3190#endif 3190#endif
3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
3192 debug_locks_off(); 3192 debug_locks_off();
3193 printk("BUG: MAX_LOCK_DEPTH too low!\n"); 3193 printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n",
3194 curr->lockdep_depth, MAX_LOCK_DEPTH);
3194 printk("turning off the locking correctness validator.\n"); 3195 printk("turning off the locking correctness validator.\n");
3196
3197 lockdep_print_held_locks(current);
3198 debug_show_all_locks();
3195 dump_stack(); 3199 dump_stack();
3200
3196 return 0; 3201 return 0;
3197 } 3202 }
3198 3203
@@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3203} 3208}
3204 3209
3205static int 3210static int
3206print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3211print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3207 unsigned long ip) 3212 unsigned long ip)
3208{ 3213{
3209 if (!debug_locks_off()) 3214 if (!debug_locks_off())
@@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
3246 return 0; 3251 return 0;
3247 3252
3248 if (curr->lockdep_depth <= 0) 3253 if (curr->lockdep_depth <= 0)
3249 return print_unlock_inbalance_bug(curr, lock, ip); 3254 return print_unlock_imbalance_bug(curr, lock, ip);
3250 3255
3251 return 1; 3256 return 1;
3252} 3257}
@@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
3317 goto found_it; 3322 goto found_it;
3318 prev_hlock = hlock; 3323 prev_hlock = hlock;
3319 } 3324 }
3320 return print_unlock_inbalance_bug(curr, lock, ip); 3325 return print_unlock_imbalance_bug(curr, lock, ip);
3321 3326
3322found_it: 3327found_it:
3323 lockdep_init_map(lock, name, key, 0); 3328 lockdep_init_map(lock, name, key, 0);
@@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr,
3384 goto found_it; 3389 goto found_it;
3385 prev_hlock = hlock; 3390 prev_hlock = hlock;
3386 } 3391 }
3387 return print_unlock_inbalance_bug(curr, lock, ip); 3392 return print_unlock_imbalance_bug(curr, lock, ip);
3388 3393
3389found_it: 3394found_it:
3390 if (hlock->instance == lock) 3395 if (hlock->instance == lock)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b10a42bb0165..072bb066bb7d 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -23,7 +23,7 @@
23 * NTP timekeeping variables: 23 * NTP timekeeping variables:
24 */ 24 */
25 25
26DEFINE_SPINLOCK(ntp_lock); 26DEFINE_RAW_SPINLOCK(ntp_lock);
27 27
28 28
29/* USER_HZ period (usecs): */ 29/* USER_HZ period (usecs): */
@@ -348,7 +348,7 @@ void ntp_clear(void)
348{ 348{
349 unsigned long flags; 349 unsigned long flags;
350 350
351 spin_lock_irqsave(&ntp_lock, flags); 351 raw_spin_lock_irqsave(&ntp_lock, flags);
352 352
353 time_adjust = 0; /* stop active adjtime() */ 353 time_adjust = 0; /* stop active adjtime() */
354 time_status |= STA_UNSYNC; 354 time_status |= STA_UNSYNC;
@@ -362,7 +362,7 @@ void ntp_clear(void)
362 362
363 /* Clear PPS state variables */ 363 /* Clear PPS state variables */
364 pps_clear(); 364 pps_clear();
365 spin_unlock_irqrestore(&ntp_lock, flags); 365 raw_spin_unlock_irqrestore(&ntp_lock, flags);
366 366
367} 367}
368 368
@@ -372,9 +372,9 @@ u64 ntp_tick_length(void)
372 unsigned long flags; 372 unsigned long flags;
373 s64 ret; 373 s64 ret;
374 374
375 spin_lock_irqsave(&ntp_lock, flags); 375 raw_spin_lock_irqsave(&ntp_lock, flags);
376 ret = tick_length; 376 ret = tick_length;
377 spin_unlock_irqrestore(&ntp_lock, flags); 377 raw_spin_unlock_irqrestore(&ntp_lock, flags);
378 return ret; 378 return ret;
379} 379}
380 380
@@ -395,7 +395,7 @@ int second_overflow(unsigned long secs)
395 int leap = 0; 395 int leap = 0;
396 unsigned long flags; 396 unsigned long flags;
397 397
398 spin_lock_irqsave(&ntp_lock, flags); 398 raw_spin_lock_irqsave(&ntp_lock, flags);
399 399
400 /* 400 /*
401 * Leap second processing. If in leap-insert state at the end of the 401 * Leap second processing. If in leap-insert state at the end of the
@@ -479,7 +479,7 @@ int second_overflow(unsigned long secs)
479 time_adjust = 0; 479 time_adjust = 0;
480 480
481out: 481out:
482 spin_unlock_irqrestore(&ntp_lock, flags); 482 raw_spin_unlock_irqrestore(&ntp_lock, flags);
483 483
484 return leap; 484 return leap;
485} 485}
@@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc)
672 672
673 getnstimeofday(&ts); 673 getnstimeofday(&ts);
674 674
675 spin_lock_irq(&ntp_lock); 675 raw_spin_lock_irq(&ntp_lock);
676 676
677 if (txc->modes & ADJ_ADJTIME) { 677 if (txc->modes & ADJ_ADJTIME) {
678 long save_adjust = time_adjust; 678 long save_adjust = time_adjust;
@@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc)
714 /* fill PPS status fields */ 714 /* fill PPS status fields */
715 pps_fill_timex(txc); 715 pps_fill_timex(txc);
716 716
717 spin_unlock_irq(&ntp_lock); 717 raw_spin_unlock_irq(&ntp_lock);
718 718
719 txc->time.tv_sec = ts.tv_sec; 719 txc->time.tv_sec = ts.tv_sec;
720 txc->time.tv_usec = ts.tv_nsec; 720 txc->time.tv_usec = ts.tv_nsec;
@@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
912 912
913 pts_norm = pps_normalize_ts(*phase_ts); 913 pts_norm = pps_normalize_ts(*phase_ts);
914 914
915 spin_lock_irqsave(&ntp_lock, flags); 915 raw_spin_lock_irqsave(&ntp_lock, flags);
916 916
917 /* clear the error bits, they will be set again if needed */ 917 /* clear the error bits, they will be set again if needed */
918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
925 * just start the frequency interval */ 925 * just start the frequency interval */
926 if (unlikely(pps_fbase.tv_sec == 0)) { 926 if (unlikely(pps_fbase.tv_sec == 0)) {
927 pps_fbase = *raw_ts; 927 pps_fbase = *raw_ts;
928 spin_unlock_irqrestore(&ntp_lock, flags); 928 raw_spin_unlock_irqrestore(&ntp_lock, flags);
929 return; 929 return;
930 } 930 }
931 931
@@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
940 time_status |= STA_PPSJITTER; 940 time_status |= STA_PPSJITTER;
941 /* restart the frequency calibration interval */ 941 /* restart the frequency calibration interval */
942 pps_fbase = *raw_ts; 942 pps_fbase = *raw_ts;
943 spin_unlock_irqrestore(&ntp_lock, flags); 943 raw_spin_unlock_irqrestore(&ntp_lock, flags);
944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 944 pr_err("hardpps: PPSJITTER: bad pulse\n");
945 return; 945 return;
946 } 946 }
@@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
957 957
958 hardpps_update_phase(pts_norm.nsec); 958 hardpps_update_phase(pts_norm.nsec);
959 959
960 spin_unlock_irqrestore(&ntp_lock, flags); 960 raw_spin_unlock_irqrestore(&ntp_lock, flags);
961} 961}
962EXPORT_SYMBOL(hardpps); 962EXPORT_SYMBOL(hardpps);
963 963
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 27689422aa92..4a944676358e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -113,9 +113,9 @@ static int get_softlockup_thresh(void)
113 * resolution, and we don't need to waste time with a big divide when 113 * resolution, and we don't need to waste time with a big divide when
114 * 2^30ns == 1.074s. 114 * 2^30ns == 1.074s.
115 */ 115 */
116static unsigned long get_timestamp(int this_cpu) 116static unsigned long get_timestamp(void)
117{ 117{
118 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ 118 return local_clock() >> 30LL; /* 2^30 ~= 10^9 */
119} 119}
120 120
121static void set_sample_period(void) 121static void set_sample_period(void)
@@ -133,9 +133,7 @@ static void set_sample_period(void)
133/* Commands for resetting the watchdog */ 133/* Commands for resetting the watchdog */
134static void __touch_watchdog(void) 134static void __touch_watchdog(void)
135{ 135{
136 int this_cpu = smp_processor_id(); 136 __this_cpu_write(watchdog_touch_ts, get_timestamp());
137
138 __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
139} 137}
140 138
141void touch_softlockup_watchdog(void) 139void touch_softlockup_watchdog(void)
@@ -196,7 +194,7 @@ static int is_hardlockup(void)
196 194
197static int is_softlockup(unsigned long touch_ts) 195static int is_softlockup(unsigned long touch_ts)
198{ 196{
199 unsigned long now = get_timestamp(smp_processor_id()); 197 unsigned long now = get_timestamp();
200 198
201 /* Warn about unreasonable delays: */ 199 /* Warn about unreasonable delays: */
202 if (time_after(now, touch_ts + get_softlockup_thresh())) 200 if (time_after(now, touch_ts + get_softlockup_thresh()))
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 7aae0f2a5e0a..c3eb261a7df3 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
47 * Normal standalone locks, for the circular and irq-context 47 * Normal standalone locks, for the circular and irq-context
48 * dependency tests: 48 * dependency tests:
49 */ 49 */
50static DEFINE_SPINLOCK(lock_A); 50static DEFINE_RAW_SPINLOCK(lock_A);
51static DEFINE_SPINLOCK(lock_B); 51static DEFINE_RAW_SPINLOCK(lock_B);
52static DEFINE_SPINLOCK(lock_C); 52static DEFINE_RAW_SPINLOCK(lock_C);
53static DEFINE_SPINLOCK(lock_D); 53static DEFINE_RAW_SPINLOCK(lock_D);
54 54
55static DEFINE_RWLOCK(rwlock_A); 55static DEFINE_RWLOCK(rwlock_A);
56static DEFINE_RWLOCK(rwlock_B); 56static DEFINE_RWLOCK(rwlock_B);
@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
73 * but X* and Y* are different classes. We do this so that 73 * but X* and Y* are different classes. We do this so that
74 * we do not trigger a real lockup: 74 * we do not trigger a real lockup:
75 */ 75 */
76static DEFINE_SPINLOCK(lock_X1); 76static DEFINE_RAW_SPINLOCK(lock_X1);
77static DEFINE_SPINLOCK(lock_X2); 77static DEFINE_RAW_SPINLOCK(lock_X2);
78static DEFINE_SPINLOCK(lock_Y1); 78static DEFINE_RAW_SPINLOCK(lock_Y1);
79static DEFINE_SPINLOCK(lock_Y2); 79static DEFINE_RAW_SPINLOCK(lock_Y2);
80static DEFINE_SPINLOCK(lock_Z1); 80static DEFINE_RAW_SPINLOCK(lock_Z1);
81static DEFINE_SPINLOCK(lock_Z2); 81static DEFINE_RAW_SPINLOCK(lock_Z2);
82 82
83static DEFINE_RWLOCK(rwlock_X1); 83static DEFINE_RWLOCK(rwlock_X1);
84static DEFINE_RWLOCK(rwlock_X2); 84static DEFINE_RWLOCK(rwlock_X2);
@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
107 */ 107 */
108#define INIT_CLASS_FUNC(class) \ 108#define INIT_CLASS_FUNC(class) \
109static noinline void \ 109static noinline void \
110init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ 110init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
111 struct rw_semaphore *rwsem) \ 111 struct mutex *mutex, struct rw_semaphore *rwsem)\
112{ \ 112{ \
113 spin_lock_init(lock); \ 113 raw_spin_lock_init(lock); \
114 rwlock_init(rwlock); \ 114 rwlock_init(rwlock); \
115 mutex_init(mutex); \ 115 mutex_init(mutex); \
116 init_rwsem(rwsem); \ 116 init_rwsem(rwsem); \
@@ -168,10 +168,10 @@ static void init_shared_classes(void)
168 * Shortcuts for lock/unlock API variants, to keep 168 * Shortcuts for lock/unlock API variants, to keep
169 * the testcases compact: 169 * the testcases compact:
170 */ 170 */
171#define L(x) spin_lock(&lock_##x) 171#define L(x) raw_spin_lock(&lock_##x)
172#define U(x) spin_unlock(&lock_##x) 172#define U(x) raw_spin_unlock(&lock_##x)
173#define LU(x) L(x); U(x) 173#define LU(x) L(x); U(x)
174#define SI(x) spin_lock_init(&lock_##x) 174#define SI(x) raw_spin_lock_init(&lock_##x)
175 175
176#define WL(x) write_lock(&rwlock_##x) 176#define WL(x) write_lock(&rwlock_##x)
177#define WU(x) write_unlock(&rwlock_##x) 177#define WU(x) write_unlock(&rwlock_##x)
@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
911 911
912#define I2(x) \ 912#define I2(x) \
913 do { \ 913 do { \
914 spin_lock_init(&lock_##x); \ 914 raw_spin_lock_init(&lock_##x); \
915 rwlock_init(&rwlock_##x); \ 915 rwlock_init(&rwlock_##x); \
916 mutex_init(&mutex_##x); \ 916 mutex_init(&mutex_##x); \
917 init_rwsem(&rwsem_##x); \ 917 init_rwsem(&rwsem_##x); \
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 7e0d6a58fc83..7542afbb22b3 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
73 goto dont_wake_writers; 73 goto dont_wake_writers;
74 } 74 }
75 75
76 /* if we are allowed to wake writers try to grant a single write lock 76 /*
77 * if there's a writer at the front of the queue 77 * as we support write lock stealing, we can't set sem->activity
78 * - we leave the 'waiting count' incremented to signify potential 78 * to -1 here to indicate we get the lock. Instead, we wake it up
79 * contention 79 * to let it go get it again.
80 */ 80 */
81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { 81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
82 sem->activity = -1; 82 wake_up_process(waiter->task);
83 list_del(&waiter->list);
84 tsk = waiter->task;
85 /* Don't touch waiter after ->task has been NULLed */
86 smp_mb();
87 waiter->task = NULL;
88 wake_up_process(tsk);
89 put_task_struct(tsk);
90 goto out; 83 goto out;
91 } 84 }
92 85
@@ -121,18 +114,10 @@ static inline struct rw_semaphore *
121__rwsem_wake_one_writer(struct rw_semaphore *sem) 114__rwsem_wake_one_writer(struct rw_semaphore *sem)
122{ 115{
123 struct rwsem_waiter *waiter; 116 struct rwsem_waiter *waiter;
124 struct task_struct *tsk;
125
126 sem->activity = -1;
127 117
128 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 118 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
129 list_del(&waiter->list); 119 wake_up_process(waiter->task);
130 120
131 tsk = waiter->task;
132 smp_mb();
133 waiter->task = NULL;
134 wake_up_process(tsk);
135 put_task_struct(tsk);
136 return sem; 121 return sem;
137} 122}
138 123
@@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem)
204 189
205/* 190/*
206 * get a write lock on the semaphore 191 * get a write lock on the semaphore
207 * - we increment the waiting count anyway to indicate an exclusive lock
208 */ 192 */
209void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) 193void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
210{ 194{
@@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
214 198
215 raw_spin_lock_irqsave(&sem->wait_lock, flags); 199 raw_spin_lock_irqsave(&sem->wait_lock, flags);
216 200
217 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
218 /* granted */
219 sem->activity = -1;
220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
221 goto out;
222 }
223
224 tsk = current;
225 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
226
227 /* set up my own style of waitqueue */ 201 /* set up my own style of waitqueue */
202 tsk = current;
228 waiter.task = tsk; 203 waiter.task = tsk;
229 waiter.flags = RWSEM_WAITING_FOR_WRITE; 204 waiter.flags = RWSEM_WAITING_FOR_WRITE;
230 get_task_struct(tsk);
231
232 list_add_tail(&waiter.list, &sem->wait_list); 205 list_add_tail(&waiter.list, &sem->wait_list);
233 206
234 /* we don't need to touch the semaphore struct anymore */ 207 /* wait for someone to release the lock */
235 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
236
237 /* wait to be given the lock */
238 for (;;) { 208 for (;;) {
239 if (!waiter.task) 209 /*
210 * That is the key to support write lock stealing: allows the
211 * task already on CPU to get the lock soon rather than put
212 * itself into sleep and waiting for system woke it or someone
213 * else in the head of the wait list up.
214 */
215 if (sem->activity == 0)
240 break; 216 break;
241 schedule();
242 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 217 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
218 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
219 schedule();
220 raw_spin_lock_irqsave(&sem->wait_lock, flags);
243 } 221 }
222 /* got the lock */
223 sem->activity = -1;
224 list_del(&waiter.list);
244 225
245 tsk->state = TASK_RUNNING; 226 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
246 out:
247 ;
248} 227}
249 228
250void __sched __down_write(struct rw_semaphore *sem) 229void __sched __down_write(struct rw_semaphore *sem)
@@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem)
262 241
263 raw_spin_lock_irqsave(&sem->wait_lock, flags); 242 raw_spin_lock_irqsave(&sem->wait_lock, flags);
264 243
265 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 244 if (sem->activity == 0) {
266 /* granted */ 245 /* got the lock */
267 sem->activity = -1; 246 sem->activity = -1;
268 ret = 1; 247 ret = 1;
269 } 248 }
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 8337e1b9bb8d..ad5e0df16ab4 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -2,6 +2,8 @@
2 * 2 *
3 * Written by David Howells (dhowells@redhat.com). 3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c 4 * Derived from arch/i386/kernel/semaphore.c
5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
5 */ 7 */
6#include <linux/rwsem.h> 8#include <linux/rwsem.h>
7#include <linux/sched.h> 9#include <linux/sched.h>
@@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
60 struct rwsem_waiter *waiter; 62 struct rwsem_waiter *waiter;
61 struct task_struct *tsk; 63 struct task_struct *tsk;
62 struct list_head *next; 64 struct list_head *next;
63 signed long oldcount, woken, loop, adjustment; 65 signed long woken, loop, adjustment;
64 66
65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 67 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 68 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
@@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
72 */ 74 */
73 goto out; 75 goto out;
74 76
75 /* There's a writer at the front of the queue - try to grant it the 77 /* Wake up the writing waiter and let the task grab the sem: */
76 * write lock. However, we only wake this writer if we can transition 78 wake_up_process(waiter->task);
77 * the active part of the count from 0 -> 1
78 */
79 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80 if (waiter->list.next == &sem->wait_list)
81 adjustment -= RWSEM_WAITING_BIAS;
82
83 try_again_write:
84 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85 if (oldcount & RWSEM_ACTIVE_MASK)
86 /* Someone grabbed the sem already */
87 goto undo_write;
88
89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
90 * It is an allocated on the waiter's stack and may become invalid at
91 * any time after that point (due to a wakeup from another source).
92 */
93 list_del(&waiter->list);
94 tsk = waiter->task;
95 smp_mb();
96 waiter->task = NULL;
97 wake_up_process(tsk);
98 put_task_struct(tsk);
99 goto out; 79 goto out;
100 80
101 readers_only: 81 readers_only:
@@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
157 137
158 out: 138 out:
159 return sem; 139 return sem;
140}
141
142/* Try to get write sem, caller holds sem->wait_lock: */
143static int try_get_writer_sem(struct rw_semaphore *sem,
144 struct rwsem_waiter *waiter)
145{
146 struct rwsem_waiter *fwaiter;
147 long oldcount, adjustment;
160 148
161 /* undo the change to the active count, but check for a transition 149 /* only steal when first waiter is writing */
162 * 1->0 */ 150 fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
163 undo_write: 151 if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
152 return 0;
153
154 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
155 /* Only one waiter in the queue: */
156 if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
157 adjustment -= RWSEM_WAITING_BIAS;
158
159try_again_write:
160 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
161 if (!(oldcount & RWSEM_ACTIVE_MASK)) {
162 /* No active lock: */
163 struct task_struct *tsk = waiter->task;
164
165 list_del(&waiter->list);
166 smp_mb();
167 put_task_struct(tsk);
168 tsk->state = TASK_RUNNING;
169 return 1;
170 }
171 /* some one grabbed the sem already */
164 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) 172 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
165 goto out; 173 return 0;
166 goto try_again_write; 174 goto try_again_write;
167} 175}
168 176
@@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
210 for (;;) { 218 for (;;) {
211 if (!waiter.task) 219 if (!waiter.task)
212 break; 220 break;
221
222 raw_spin_lock_irq(&sem->wait_lock);
223 /* Try to get the writer sem, may steal from the head writer: */
224 if (flags == RWSEM_WAITING_FOR_WRITE)
225 if (try_get_writer_sem(sem, &waiter)) {
226 raw_spin_unlock_irq(&sem->wait_lock);
227 return sem;
228 }
229 raw_spin_unlock_irq(&sem->wait_lock);
213 schedule(); 230 schedule();
214 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 231 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
215 } 232 }