diff options
36 files changed, 1564 insertions, 433 deletions
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index cef00d42ed5b..dd2f7b26ca30 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt | |||
| @@ -65,7 +65,7 @@ that had to wait on lock acquisition. | |||
| 65 | 65 | ||
| 66 | - CONFIGURATION | 66 | - CONFIGURATION |
| 67 | 67 | ||
| 68 | Lock statistics are enabled via CONFIG_LOCK_STATS. | 68 | Lock statistics are enabled via CONFIG_LOCK_STAT. |
| 69 | 69 | ||
| 70 | - USAGE | 70 | - USAGE |
| 71 | 71 | ||
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt new file mode 100644 index 000000000000..4aaf0dfb0cb8 --- /dev/null +++ b/Documentation/x86/early-microcode.txt | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | Early load microcode | ||
| 2 | ==================== | ||
| 3 | By Fenghua Yu <fenghua.yu@intel.com> | ||
| 4 | |||
| 5 | Kernel can update microcode in early phase of boot time. Loading microcode early | ||
| 6 | can fix CPU issues before they are observed during kernel boot time. | ||
| 7 | |||
| 8 | Microcode is stored in an initrd file. The microcode is read from the initrd | ||
| 9 | file and loaded to CPUs during boot time. | ||
| 10 | |||
| 11 | The format of the combined initrd image is microcode in cpio format followed by | ||
| 12 | the initrd image (maybe compressed). Kernel parses the combined initrd image | ||
| 13 | during boot time. The microcode file in cpio name space is: | ||
| 14 | kernel/x86/microcode/GenuineIntel.bin | ||
| 15 | |||
| 16 | During BSP boot (before SMP starts), if the kernel finds the microcode file in | ||
| 17 | the initrd file, it parses the microcode and saves matching microcode in memory. | ||
| 18 | If matching microcode is found, it will be uploaded in BSP and later on in all | ||
| 19 | APs. | ||
| 20 | |||
| 21 | The cached microcode patch is applied when CPUs resume from a sleep state. | ||
| 22 | |||
| 23 | There are two legacy user space interfaces to load microcode, either through | ||
| 24 | /dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file | ||
| 25 | in sysfs. | ||
| 26 | |||
| 27 | In addition to these two legacy methods, the early loading method described | ||
| 28 | here is the third method with which microcode can be uploaded to a system's | ||
| 29 | CPUs. | ||
| 30 | |||
| 31 | The following example script shows how to generate a new combined initrd file in | ||
| 32 | /boot/initrd-3.5.0.ucode.img with original microcode microcode.bin and | ||
| 33 | original initrd image /boot/initrd-3.5.0.img. | ||
| 34 | |||
| 35 | mkdir initrd | ||
| 36 | cd initrd | ||
| 37 | mkdir kernel | ||
| 38 | mkdir kernel/x86 | ||
| 39 | mkdir kernel/x86/microcode | ||
| 40 | cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin | ||
| 41 | find .|cpio -oc >../ucode.cpio | ||
| 42 | cd .. | ||
| 43 | cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff0e5f3c844e..4ebc7a6e6724 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -1054,6 +1054,24 @@ config MICROCODE_OLD_INTERFACE | |||
| 1054 | def_bool y | 1054 | def_bool y |
| 1055 | depends on MICROCODE | 1055 | depends on MICROCODE |
| 1056 | 1056 | ||
| 1057 | config MICROCODE_INTEL_LIB | ||
| 1058 | def_bool y | ||
| 1059 | depends on MICROCODE_INTEL | ||
| 1060 | |||
| 1061 | config MICROCODE_INTEL_EARLY | ||
| 1062 | bool "Early load microcode" | ||
| 1063 | depends on MICROCODE_INTEL && BLK_DEV_INITRD | ||
| 1064 | default y | ||
| 1065 | help | ||
| 1066 | This option provides functionality to read additional microcode data | ||
| 1067 | at the beginning of initrd image. The data tells kernel to load | ||
| 1068 | microcode to CPU's as early as possible. No functional change if no | ||
| 1069 | microcode data is glued to the initrd, therefore it's safe to say Y. | ||
| 1070 | |||
| 1071 | config MICROCODE_EARLY | ||
| 1072 | def_bool y | ||
| 1073 | depends on MICROCODE_INTEL_EARLY | ||
| 1074 | |||
| 1057 | config X86_MSR | 1075 | config X86_MSR |
| 1058 | tristate "/dev/cpu/*/msr - Model-specific register support" | 1076 | tristate "/dev/cpu/*/msr - Model-specific register support" |
| 1059 | ---help--- | 1077 | ---help--- |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 43d921b4752c..6825e2efd1b4 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
| @@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void) | |||
| 57 | static inline void __exit exit_amd_microcode(void) {} | 57 | static inline void __exit exit_amd_microcode(void) {} |
| 58 | #endif | 58 | #endif |
| 59 | 59 | ||
| 60 | #ifdef CONFIG_MICROCODE_EARLY | ||
| 61 | #define MAX_UCODE_COUNT 128 | ||
| 62 | extern void __init load_ucode_bsp(void); | ||
| 63 | extern __init void load_ucode_ap(void); | ||
| 64 | extern int __init save_microcode_in_initrd(void); | ||
| 65 | #else | ||
| 66 | static inline void __init load_ucode_bsp(void) {} | ||
| 67 | static inline __init void load_ucode_ap(void) {} | ||
| 68 | static inline int __init save_microcode_in_initrd(void) | ||
| 69 | { | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | #endif | ||
| 73 | |||
| 60 | #endif /* _ASM_X86_MICROCODE_H */ | 74 | #endif /* _ASM_X86_MICROCODE_H */ |
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h new file mode 100644 index 000000000000..5356f927d411 --- /dev/null +++ b/arch/x86/include/asm/microcode_intel.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | #ifndef _ASM_X86_MICROCODE_INTEL_H | ||
| 2 | #define _ASM_X86_MICROCODE_INTEL_H | ||
| 3 | |||
| 4 | #include <asm/microcode.h> | ||
| 5 | |||
| 6 | struct microcode_header_intel { | ||
| 7 | unsigned int hdrver; | ||
| 8 | unsigned int rev; | ||
| 9 | unsigned int date; | ||
| 10 | unsigned int sig; | ||
| 11 | unsigned int cksum; | ||
| 12 | unsigned int ldrver; | ||
| 13 | unsigned int pf; | ||
| 14 | unsigned int datasize; | ||
| 15 | unsigned int totalsize; | ||
| 16 | unsigned int reserved[3]; | ||
| 17 | }; | ||
| 18 | |||
| 19 | struct microcode_intel { | ||
| 20 | struct microcode_header_intel hdr; | ||
| 21 | unsigned int bits[0]; | ||
| 22 | }; | ||
| 23 | |||
| 24 | /* microcode format is extended from prescott processors */ | ||
| 25 | struct extended_signature { | ||
| 26 | unsigned int sig; | ||
| 27 | unsigned int pf; | ||
| 28 | unsigned int cksum; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct extended_sigtable { | ||
| 32 | unsigned int count; | ||
| 33 | unsigned int cksum; | ||
| 34 | unsigned int reserved[3]; | ||
| 35 | struct extended_signature sigs[0]; | ||
| 36 | }; | ||
| 37 | |||
| 38 | #define DEFAULT_UCODE_DATASIZE (2000) | ||
| 39 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) | ||
| 40 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
| 41 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | ||
| 42 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | ||
| 43 | #define DWSIZE (sizeof(u32)) | ||
| 44 | |||
| 45 | #define get_totalsize(mc) \ | ||
| 46 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ | ||
| 47 | ((struct microcode_intel *)mc)->hdr.totalsize : \ | ||
| 48 | DEFAULT_UCODE_TOTALSIZE) | ||
| 49 | |||
| 50 | #define get_datasize(mc) \ | ||
| 51 | (((struct microcode_intel *)mc)->hdr.datasize ? \ | ||
| 52 | ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
| 53 | |||
| 54 | #define sigmatch(s1, s2, p1, p2) \ | ||
| 55 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
| 56 | |||
| 57 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
| 58 | |||
| 59 | extern int | ||
| 60 | get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev); | ||
| 61 | extern int microcode_sanity_check(void *mc, int print_err); | ||
| 62 | extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev); | ||
| 63 | extern int | ||
| 64 | update_match_revision(struct microcode_header_intel *mc_header, int rev); | ||
| 65 | |||
| 66 | #ifdef CONFIG_MICROCODE_INTEL_EARLY | ||
| 67 | extern void __init load_ucode_intel_bsp(void); | ||
| 68 | extern void __cpuinit load_ucode_intel_ap(void); | ||
| 69 | extern void show_ucode_info_early(void); | ||
| 70 | #else | ||
| 71 | static inline __init void load_ucode_intel_bsp(void) {} | ||
| 72 | static inline __cpuinit void load_ucode_intel_ap(void) {} | ||
| 73 | static inline void show_ucode_info_early(void) {} | ||
| 74 | #endif | ||
| 75 | |||
| 76 | #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) | ||
| 77 | extern int save_mc_for_early(u8 *mc); | ||
| 78 | #else | ||
| 79 | static inline int save_mc_for_early(u8 *mc) | ||
| 80 | { | ||
| 81 | return 0; | ||
| 82 | } | ||
| 83 | #endif | ||
| 84 | |||
| 85 | #endif /* _ASM_X86_MICROCODE_INTEL_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8277941cbe99..3270116b1488 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -180,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); | |||
| 180 | extern void detect_extended_topology(struct cpuinfo_x86 *c); | 180 | extern void detect_extended_topology(struct cpuinfo_x86 *c); |
| 181 | extern void detect_ht(struct cpuinfo_x86 *c); | 181 | extern void detect_ht(struct cpuinfo_x86 *c); |
| 182 | 182 | ||
| 183 | #ifdef CONFIG_X86_32 | ||
| 184 | extern int have_cpuid_p(void); | ||
| 185 | #else | ||
| 186 | static inline int have_cpuid_p(void) | ||
| 187 | { | ||
| 188 | return 1; | ||
| 189 | } | ||
| 190 | #endif | ||
| 183 | static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, | 191 | static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, |
| 184 | unsigned int *ecx, unsigned int *edx) | 192 | unsigned int *ecx, unsigned int *edx) |
| 185 | { | 193 | { |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6f414ed88620..6fd3fd769796 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | /* misc architecture specific prototypes */ | 6 | /* misc architecture specific prototypes */ |
| 7 | 7 | ||
| 8 | void early_idt_handler(void); | ||
| 9 | |||
| 10 | void system_call(void); | 8 | void system_call(void); |
| 11 | void syscall_init(void); | 9 | void syscall_init(void); |
| 12 | 10 | ||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0fee48e279cc..50a7fc0f824a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
| @@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void) | |||
| 20 | native_write_cr3(native_read_cr3()); | 20 | native_write_cr3(native_read_cr3()); |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | static inline void __native_flush_tlb_global_irq_disabled(void) | ||
| 24 | { | ||
| 25 | unsigned long cr4; | ||
| 26 | |||
| 27 | cr4 = native_read_cr4(); | ||
| 28 | /* clear PGE */ | ||
| 29 | native_write_cr4(cr4 & ~X86_CR4_PGE); | ||
| 30 | /* write old PGE again and flush TLBs */ | ||
| 31 | native_write_cr4(cr4); | ||
| 32 | } | ||
| 33 | |||
| 23 | static inline void __native_flush_tlb_global(void) | 34 | static inline void __native_flush_tlb_global(void) |
| 24 | { | 35 | { |
| 25 | unsigned long flags; | 36 | unsigned long flags; |
| 26 | unsigned long cr4; | ||
| 27 | 37 | ||
| 28 | /* | 38 | /* |
| 29 | * Read-modify-write to CR4 - protect it from preemption and | 39 | * Read-modify-write to CR4 - protect it from preemption and |
| @@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void) | |||
| 32 | */ | 42 | */ |
| 33 | raw_local_irq_save(flags); | 43 | raw_local_irq_save(flags); |
| 34 | 44 | ||
| 35 | cr4 = native_read_cr4(); | 45 | __native_flush_tlb_global_irq_disabled(); |
| 36 | /* clear PGE */ | ||
| 37 | native_write_cr4(cr4 & ~X86_CR4_PGE); | ||
| 38 | /* write old PGE again and flush TLBs */ | ||
| 39 | native_write_cr4(cr4); | ||
| 40 | 46 | ||
| 41 | raw_local_irq_restore(flags); | 47 | raw_local_irq_restore(flags); |
| 42 | } | 48 | } |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ac3b3d002833..7bd3bd310106 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -87,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | |||
| 87 | 87 | ||
| 88 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 88 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
| 89 | 89 | ||
| 90 | obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o | ||
| 91 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o | ||
| 92 | obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o | ||
| 90 | microcode-y := microcode_core.o | 93 | microcode-y := microcode_core.o |
| 91 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | 94 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o |
| 92 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 95 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9c3ab43a6954..d814772c5bed 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -37,6 +37,8 @@ | |||
| 37 | #include <asm/mce.h> | 37 | #include <asm/mce.h> |
| 38 | #include <asm/msr.h> | 38 | #include <asm/msr.h> |
| 39 | #include <asm/pat.h> | 39 | #include <asm/pat.h> |
| 40 | #include <asm/microcode.h> | ||
| 41 | #include <asm/microcode_intel.h> | ||
| 40 | 42 | ||
| 41 | #ifdef CONFIG_X86_LOCAL_APIC | 43 | #ifdef CONFIG_X86_LOCAL_APIC |
| 42 | #include <asm/uv/uv.h> | 44 | #include <asm/uv/uv.h> |
| @@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag) | |||
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | /* Probe for the CPUID instruction */ | 217 | /* Probe for the CPUID instruction */ |
| 216 | static int __cpuinit have_cpuid_p(void) | 218 | int __cpuinit have_cpuid_p(void) |
| 217 | { | 219 | { |
| 218 | return flag_is_changeable_p(X86_EFLAGS_ID); | 220 | return flag_is_changeable_p(X86_EFLAGS_ID); |
| 219 | } | 221 | } |
| @@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag) | |||
| 249 | { | 251 | { |
| 250 | return 1; | 252 | return 1; |
| 251 | } | 253 | } |
| 252 | /* Probe for the CPUID instruction */ | ||
| 253 | static inline int have_cpuid_p(void) | ||
| 254 | { | ||
| 255 | return 1; | ||
| 256 | } | ||
| 257 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 254 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
| 258 | { | 255 | { |
| 259 | } | 256 | } |
| @@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void) | |||
| 1223 | int cpu; | 1220 | int cpu; |
| 1224 | int i; | 1221 | int i; |
| 1225 | 1222 | ||
| 1223 | /* | ||
| 1224 | * Load microcode on this cpu if a valid microcode is available. | ||
| 1225 | * This is early microcode loading procedure. | ||
| 1226 | */ | ||
| 1227 | load_ucode_ap(); | ||
| 1228 | |||
| 1226 | cpu = stack_smp_processor_id(); | 1229 | cpu = stack_smp_processor_id(); |
| 1227 | t = &per_cpu(init_tss, cpu); | 1230 | t = &per_cpu(init_tss, cpu); |
| 1228 | oist = &per_cpu(orig_ist, cpu); | 1231 | oist = &per_cpu(orig_ist, cpu); |
| @@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void) | |||
| 1314 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1317 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
| 1315 | struct thread_struct *thread = &curr->thread; | 1318 | struct thread_struct *thread = &curr->thread; |
| 1316 | 1319 | ||
| 1320 | show_ucode_info_early(); | ||
| 1321 | |||
| 1317 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { | 1322 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { |
| 1318 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | 1323 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
| 1319 | for (;;) | 1324 | for (;;) |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 57334f4cd3af..c5e403f6d869 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
| 27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
| 28 | #include <asm/bootparam_utils.h> | 28 | #include <asm/bootparam_utils.h> |
| 29 | #include <asm/microcode.h> | ||
| 29 | 30 | ||
| 30 | /* | 31 | /* |
| 31 | * Manage page tables very early on. | 32 | * Manage page tables very early on. |
| @@ -159,17 +160,17 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 159 | /* clear bss before set_intr_gate with early_idt_handler */ | 160 | /* clear bss before set_intr_gate with early_idt_handler */ |
| 160 | clear_bss(); | 161 | clear_bss(); |
| 161 | 162 | ||
| 162 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 163 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) |
| 163 | #ifdef CONFIG_EARLY_PRINTK | ||
| 164 | set_intr_gate(i, &early_idt_handlers[i]); | 164 | set_intr_gate(i, &early_idt_handlers[i]); |
| 165 | #else | ||
| 166 | set_intr_gate(i, early_idt_handler); | ||
| 167 | #endif | ||
| 168 | } | ||
| 169 | load_idt((const struct desc_ptr *)&idt_descr); | 165 | load_idt((const struct desc_ptr *)&idt_descr); |
| 170 | 166 | ||
| 171 | copy_bootdata(__va(real_mode_data)); | 167 | copy_bootdata(__va(real_mode_data)); |
| 172 | 168 | ||
| 169 | /* | ||
| 170 | * Load microcode early on BSP. | ||
| 171 | */ | ||
| 172 | load_ucode_bsp(); | ||
| 173 | |||
| 173 | if (console_loglevel == 10) | 174 | if (console_loglevel == 10) |
| 174 | early_printk("Kernel alive\n"); | 175 | early_printk("Kernel alive\n"); |
| 175 | 176 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 3c3f58a0808f..73afd11799ca 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -144,6 +144,11 @@ ENTRY(startup_32) | |||
| 144 | movl %eax, pa(olpc_ofw_pgd) | 144 | movl %eax, pa(olpc_ofw_pgd) |
| 145 | #endif | 145 | #endif |
| 146 | 146 | ||
| 147 | #ifdef CONFIG_MICROCODE_EARLY | ||
| 148 | /* Early load ucode on BSP. */ | ||
| 149 | call load_ucode_bsp | ||
| 150 | #endif | ||
| 151 | |||
| 147 | /* | 152 | /* |
| 148 | * Initialize page tables. This creates a PDE and a set of page | 153 | * Initialize page tables. This creates a PDE and a set of page |
| 149 | * tables, which are located immediately beyond __brk_base. The variable | 154 | * tables, which are located immediately beyond __brk_base. The variable |
| @@ -299,6 +304,12 @@ ENTRY(startup_32_smp) | |||
| 299 | movl %eax,%ss | 304 | movl %eax,%ss |
| 300 | leal -__PAGE_OFFSET(%ecx),%esp | 305 | leal -__PAGE_OFFSET(%ecx),%esp |
| 301 | 306 | ||
| 307 | #ifdef CONFIG_MICROCODE_EARLY | ||
| 308 | /* Early load ucode on AP. */ | ||
| 309 | call load_ucode_ap | ||
| 310 | #endif | ||
| 311 | |||
| 312 | |||
| 302 | default_entry: | 313 | default_entry: |
| 303 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | 314 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ |
| 304 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | 315 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d94f6d68be2a..b7de3b25adb5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -336,6 +336,7 @@ early_idt_handlers: | |||
| 336 | i = i + 1 | 336 | i = i + 1 |
| 337 | .endr | 337 | .endr |
| 338 | 338 | ||
| 339 | /* This is global to keep gas from relaxing the jumps */ | ||
| 339 | ENTRY(early_idt_handler) | 340 | ENTRY(early_idt_handler) |
| 340 | cld | 341 | cld |
| 341 | 342 | ||
| @@ -404,6 +405,7 @@ ENTRY(early_idt_handler) | |||
| 404 | addq $16,%rsp # drop vector number and error code | 405 | addq $16,%rsp # drop vector number and error code |
| 405 | decl early_recursion_flag(%rip) | 406 | decl early_recursion_flag(%rip) |
| 406 | INTERRUPT_RETURN | 407 | INTERRUPT_RETURN |
| 408 | ENDPROC(early_idt_handler) | ||
| 407 | 409 | ||
| 408 | __INITDATA | 410 | __INITDATA |
| 409 | 411 | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 3a04b224d0c0..22db92bbdf1a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = { | |||
| 364 | 364 | ||
| 365 | static void microcode_fini_cpu(int cpu) | 365 | static void microcode_fini_cpu(int cpu) |
| 366 | { | 366 | { |
| 367 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 368 | |||
| 369 | microcode_ops->microcode_fini_cpu(cpu); | 367 | microcode_ops->microcode_fini_cpu(cpu); |
| 370 | uci->valid = 0; | ||
| 371 | } | 368 | } |
| 372 | 369 | ||
| 373 | static enum ucode_state microcode_resume_cpu(int cpu) | 370 | static enum ucode_state microcode_resume_cpu(int cpu) |
| @@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu) | |||
| 383 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) | 380 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) |
| 384 | { | 381 | { |
| 385 | enum ucode_state ustate; | 382 | enum ucode_state ustate; |
| 383 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 384 | |||
| 385 | if (uci && uci->valid) | ||
| 386 | return UCODE_OK; | ||
| 386 | 387 | ||
| 387 | if (collect_cpu_info(cpu)) | 388 | if (collect_cpu_info(cpu)) |
| 388 | return UCODE_ERROR; | 389 | return UCODE_ERROR; |
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c new file mode 100644 index 000000000000..577db8417d15 --- /dev/null +++ b/arch/x86/kernel/microcode_core_early.c | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | /* | ||
| 2 | * X86 CPU microcode early update for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
| 5 | * H Peter Anvin" <hpa@zytor.com> | ||
| 6 | * | ||
| 7 | * This driver allows to early upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture | ||
| 12 | * Software Developer's Manual. | ||
| 13 | * | ||
| 14 | * This program is free software; you can redistribute it and/or | ||
| 15 | * modify it under the terms of the GNU General Public License | ||
| 16 | * as published by the Free Software Foundation; either version | ||
| 17 | * 2 of the License, or (at your option) any later version. | ||
| 18 | */ | ||
| 19 | #include <linux/module.h> | ||
| 20 | #include <asm/microcode_intel.h> | ||
| 21 | #include <asm/processor.h> | ||
| 22 | |||
| 23 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) | ||
| 24 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') | ||
| 25 | #define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') | ||
| 26 | #define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') | ||
| 27 | #define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') | ||
| 28 | #define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') | ||
| 29 | #define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') | ||
| 30 | |||
| 31 | #define CPUID_IS(a, b, c, ebx, ecx, edx) \ | ||
| 32 | (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) | ||
| 33 | |||
| 34 | /* | ||
| 35 | * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. | ||
| 36 | * x86_vendor() gets vendor id for BSP. | ||
| 37 | * | ||
| 38 | * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify | ||
| 39 | * coding, we still use x86_vendor() to get vendor id for AP. | ||
| 40 | * | ||
| 41 | * x86_vendor() gets vendor information directly through cpuid. | ||
| 42 | */ | ||
| 43 | static int __cpuinit x86_vendor(void) | ||
| 44 | { | ||
| 45 | u32 eax = 0x00000000; | ||
| 46 | u32 ebx, ecx = 0, edx; | ||
| 47 | |||
| 48 | if (!have_cpuid_p()) | ||
| 49 | return X86_VENDOR_UNKNOWN; | ||
| 50 | |||
| 51 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
| 52 | |||
| 53 | if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) | ||
| 54 | return X86_VENDOR_INTEL; | ||
| 55 | |||
| 56 | if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) | ||
| 57 | return X86_VENDOR_AMD; | ||
| 58 | |||
| 59 | return X86_VENDOR_UNKNOWN; | ||
| 60 | } | ||
| 61 | |||
| 62 | void __init load_ucode_bsp(void) | ||
| 63 | { | ||
| 64 | int vendor = x86_vendor(); | ||
| 65 | |||
| 66 | if (vendor == X86_VENDOR_INTEL) | ||
| 67 | load_ucode_intel_bsp(); | ||
| 68 | } | ||
| 69 | |||
| 70 | void __cpuinit load_ucode_ap(void) | ||
| 71 | { | ||
| 72 | int vendor = x86_vendor(); | ||
| 73 | |||
| 74 | if (vendor == X86_VENDOR_INTEL) | ||
| 75 | load_ucode_intel_ap(); | ||
| 76 | } | ||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3544aed39338..5fb2cebf556b 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -79,7 +79,7 @@ | |||
| 79 | #include <linux/module.h> | 79 | #include <linux/module.h> |
| 80 | #include <linux/vmalloc.h> | 80 | #include <linux/vmalloc.h> |
| 81 | 81 | ||
| 82 | #include <asm/microcode.h> | 82 | #include <asm/microcode_intel.h> |
| 83 | #include <asm/processor.h> | 83 | #include <asm/processor.h> |
| 84 | #include <asm/msr.h> | 84 | #include <asm/msr.h> |
| 85 | 85 | ||
| @@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver"); | |||
| 87 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 87 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
| 88 | MODULE_LICENSE("GPL"); | 88 | MODULE_LICENSE("GPL"); |
| 89 | 89 | ||
| 90 | struct microcode_header_intel { | ||
| 91 | unsigned int hdrver; | ||
| 92 | unsigned int rev; | ||
| 93 | unsigned int date; | ||
| 94 | unsigned int sig; | ||
| 95 | unsigned int cksum; | ||
| 96 | unsigned int ldrver; | ||
| 97 | unsigned int pf; | ||
| 98 | unsigned int datasize; | ||
| 99 | unsigned int totalsize; | ||
| 100 | unsigned int reserved[3]; | ||
| 101 | }; | ||
| 102 | |||
| 103 | struct microcode_intel { | ||
| 104 | struct microcode_header_intel hdr; | ||
| 105 | unsigned int bits[0]; | ||
| 106 | }; | ||
| 107 | |||
| 108 | /* microcode format is extended from prescott processors */ | ||
| 109 | struct extended_signature { | ||
| 110 | unsigned int sig; | ||
| 111 | unsigned int pf; | ||
| 112 | unsigned int cksum; | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct extended_sigtable { | ||
| 116 | unsigned int count; | ||
| 117 | unsigned int cksum; | ||
| 118 | unsigned int reserved[3]; | ||
| 119 | struct extended_signature sigs[0]; | ||
| 120 | }; | ||
| 121 | |||
| 122 | #define DEFAULT_UCODE_DATASIZE (2000) | ||
| 123 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) | ||
| 124 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
| 125 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | ||
| 126 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | ||
| 127 | #define DWSIZE (sizeof(u32)) | ||
| 128 | |||
| 129 | #define get_totalsize(mc) \ | ||
| 130 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ | ||
| 131 | ((struct microcode_intel *)mc)->hdr.totalsize : \ | ||
| 132 | DEFAULT_UCODE_TOTALSIZE) | ||
| 133 | |||
| 134 | #define get_datasize(mc) \ | ||
| 135 | (((struct microcode_intel *)mc)->hdr.datasize ? \ | ||
| 136 | ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
| 137 | |||
| 138 | #define sigmatch(s1, s2, p1, p2) \ | ||
| 139 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
| 140 | |||
| 141 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
| 142 | |||
| 143 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | 90 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) |
| 144 | { | 91 | { |
| 145 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | 92 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); |
| @@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
| 162 | return 0; | 109 | return 0; |
| 163 | } | 110 | } |
| 164 | 111 | ||
| 165 | static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) | ||
| 166 | { | ||
| 167 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; | ||
| 168 | } | ||
| 169 | |||
| 170 | static inline int | ||
| 171 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
| 172 | { | ||
| 173 | return (mc_header->rev <= rev) ? 0 : 1; | ||
| 174 | } | ||
| 175 | |||
| 176 | static int microcode_sanity_check(void *mc) | ||
| 177 | { | ||
| 178 | unsigned long total_size, data_size, ext_table_size; | ||
| 179 | struct microcode_header_intel *mc_header = mc; | ||
| 180 | struct extended_sigtable *ext_header = NULL; | ||
| 181 | int sum, orig_sum, ext_sigcount = 0, i; | ||
| 182 | struct extended_signature *ext_sig; | ||
| 183 | |||
| 184 | total_size = get_totalsize(mc_header); | ||
| 185 | data_size = get_datasize(mc_header); | ||
| 186 | |||
| 187 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
| 188 | pr_err("error! Bad data size in microcode data file\n"); | ||
| 189 | return -EINVAL; | ||
| 190 | } | ||
| 191 | |||
| 192 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
| 193 | pr_err("error! Unknown microcode update format\n"); | ||
| 194 | return -EINVAL; | ||
| 195 | } | ||
| 196 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
| 197 | if (ext_table_size) { | ||
| 198 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
| 199 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
| 200 | pr_err("error! Small exttable size in microcode data file\n"); | ||
| 201 | return -EINVAL; | ||
| 202 | } | ||
| 203 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
| 204 | if (ext_table_size != exttable_size(ext_header)) { | ||
| 205 | pr_err("error! Bad exttable size in microcode data file\n"); | ||
| 206 | return -EFAULT; | ||
| 207 | } | ||
| 208 | ext_sigcount = ext_header->count; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* check extended table checksum */ | ||
| 212 | if (ext_table_size) { | ||
| 213 | int ext_table_sum = 0; | ||
| 214 | int *ext_tablep = (int *)ext_header; | ||
| 215 | |||
| 216 | i = ext_table_size / DWSIZE; | ||
| 217 | while (i--) | ||
| 218 | ext_table_sum += ext_tablep[i]; | ||
| 219 | if (ext_table_sum) { | ||
| 220 | pr_warning("aborting, bad extended signature table checksum\n"); | ||
| 221 | return -EINVAL; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | /* calculate the checksum */ | ||
| 226 | orig_sum = 0; | ||
| 227 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
| 228 | while (i--) | ||
| 229 | orig_sum += ((int *)mc)[i]; | ||
| 230 | if (orig_sum) { | ||
| 231 | pr_err("aborting, bad checksum\n"); | ||
| 232 | return -EINVAL; | ||
| 233 | } | ||
| 234 | if (!ext_table_size) | ||
| 235 | return 0; | ||
| 236 | /* check extended signature checksum */ | ||
| 237 | for (i = 0; i < ext_sigcount; i++) { | ||
| 238 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
| 239 | EXT_SIGNATURE_SIZE * i; | ||
| 240 | sum = orig_sum | ||
| 241 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
| 242 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
| 243 | if (sum) { | ||
| 244 | pr_err("aborting, bad checksum\n"); | ||
| 245 | return -EINVAL; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | 112 | /* |
| 252 | * return 0 - no update found | 113 | * return 0 - no update found |
| 253 | * return 1 - found update | 114 | * return 1 - found update |
| 254 | */ | 115 | */ |
| 255 | static int | 116 | static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) |
| 256 | get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) | ||
| 257 | { | 117 | { |
| 258 | struct microcode_header_intel *mc_header = mc; | 118 | struct cpu_signature cpu_sig; |
| 259 | struct extended_sigtable *ext_header; | 119 | unsigned int csig, cpf, crev; |
| 260 | unsigned long total_size = get_totalsize(mc_header); | ||
| 261 | int ext_sigcount, i; | ||
| 262 | struct extended_signature *ext_sig; | ||
| 263 | |||
| 264 | if (!update_match_revision(mc_header, rev)) | ||
| 265 | return 0; | ||
| 266 | |||
| 267 | if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) | ||
| 268 | return 1; | ||
| 269 | 120 | ||
| 270 | /* Look for ext. headers: */ | 121 | collect_cpu_info(cpu, &cpu_sig); |
| 271 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
| 272 | return 0; | ||
| 273 | 122 | ||
| 274 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | 123 | csig = cpu_sig.sig; |
| 275 | ext_sigcount = ext_header->count; | 124 | cpf = cpu_sig.pf; |
| 276 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | 125 | crev = cpu_sig.rev; |
| 277 | 126 | ||
| 278 | for (i = 0; i < ext_sigcount; i++) { | 127 | return get_matching_microcode(csig, cpf, mc_intel, crev); |
| 279 | if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) | ||
| 280 | return 1; | ||
| 281 | ext_sig++; | ||
| 282 | } | ||
| 283 | return 0; | ||
| 284 | } | 128 | } |
| 285 | 129 | ||
| 286 | static int apply_microcode(int cpu) | 130 | int apply_microcode(int cpu) |
| 287 | { | 131 | { |
| 288 | struct microcode_intel *mc_intel; | 132 | struct microcode_intel *mc_intel; |
| 289 | struct ucode_cpu_info *uci; | 133 | struct ucode_cpu_info *uci; |
| @@ -300,6 +144,14 @@ static int apply_microcode(int cpu) | |||
| 300 | if (mc_intel == NULL) | 144 | if (mc_intel == NULL) |
| 301 | return 0; | 145 | return 0; |
| 302 | 146 | ||
| 147 | /* | ||
| 148 | * Microcode on this CPU could be updated earlier. Only apply the | ||
| 149 | * microcode patch in mc_intel when it is newer than the one on this | ||
| 150 | * CPU. | ||
| 151 | */ | ||
| 152 | if (get_matching_mc(mc_intel, cpu) == 0) | ||
| 153 | return 0; | ||
| 154 | |||
| 303 | /* write microcode via MSR 0x79 */ | 155 | /* write microcode via MSR 0x79 */ |
| 304 | wrmsr(MSR_IA32_UCODE_WRITE, | 156 | wrmsr(MSR_IA32_UCODE_WRITE, |
| 305 | (unsigned long) mc_intel->bits, | 157 | (unsigned long) mc_intel->bits, |
| @@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 338 | unsigned int leftover = size; | 190 | unsigned int leftover = size; |
| 339 | enum ucode_state state = UCODE_OK; | 191 | enum ucode_state state = UCODE_OK; |
| 340 | unsigned int curr_mc_size = 0; | 192 | unsigned int curr_mc_size = 0; |
| 193 | unsigned int csig, cpf; | ||
| 341 | 194 | ||
| 342 | while (leftover) { | 195 | while (leftover) { |
| 343 | struct microcode_header_intel mc_header; | 196 | struct microcode_header_intel mc_header; |
| @@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 362 | } | 215 | } |
| 363 | 216 | ||
| 364 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 217 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
| 365 | microcode_sanity_check(mc) < 0) { | 218 | microcode_sanity_check(mc, 1) < 0) { |
| 366 | break; | 219 | break; |
| 367 | } | 220 | } |
| 368 | 221 | ||
| 369 | if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { | 222 | csig = uci->cpu_sig.sig; |
| 223 | cpf = uci->cpu_sig.pf; | ||
| 224 | if (get_matching_microcode(csig, cpf, mc, new_rev)) { | ||
| 370 | vfree(new_mc); | 225 | vfree(new_mc); |
| 371 | new_rev = mc_header.rev; | 226 | new_rev = mc_header.rev; |
| 372 | new_mc = mc; | 227 | new_mc = mc; |
| @@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 393 | vfree(uci->mc); | 248 | vfree(uci->mc); |
| 394 | uci->mc = (struct microcode_intel *)new_mc; | 249 | uci->mc = (struct microcode_intel *)new_mc; |
| 395 | 250 | ||
| 251 | /* | ||
| 252 | * If early loading microcode is supported, save this mc into | ||
| 253 | * permanent memory. So it will be loaded early when a CPU is hot added | ||
| 254 | * or resumes. | ||
| 255 | */ | ||
| 256 | save_mc_for_early(new_mc); | ||
| 257 | |||
| 396 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", | 258 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
| 397 | cpu, new_rev, uci->cpu_sig.rev); | 259 | cpu, new_rev, uci->cpu_sig.rev); |
| 398 | out: | 260 | out: |
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c new file mode 100644 index 000000000000..7890bc838952 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_early.c | |||
| @@ -0,0 +1,796 @@ | |||
| 1 | /* | ||
| 2 | * Intel CPU microcode early update for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
| 5 | * H Peter Anvin" <hpa@zytor.com> | ||
| 6 | * | ||
| 7 | * This allows to early upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture | ||
| 12 | * Software Developer's Manual. | ||
| 13 | * | ||
| 14 | * This program is free software; you can redistribute it and/or | ||
| 15 | * modify it under the terms of the GNU General Public License | ||
| 16 | * as published by the Free Software Foundation; either version | ||
| 17 | * 2 of the License, or (at your option) any later version. | ||
| 18 | */ | ||
| 19 | #include <linux/module.h> | ||
| 20 | #include <linux/mm.h> | ||
| 21 | #include <linux/slab.h> | ||
| 22 | #include <linux/earlycpio.h> | ||
| 23 | #include <linux/initrd.h> | ||
| 24 | #include <linux/cpu.h> | ||
| 25 | #include <asm/msr.h> | ||
| 26 | #include <asm/microcode_intel.h> | ||
| 27 | #include <asm/processor.h> | ||
| 28 | #include <asm/tlbflush.h> | ||
| 29 | #include <asm/setup.h> | ||
| 30 | |||
| 31 | unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; | ||
| 32 | struct mc_saved_data { | ||
| 33 | unsigned int mc_saved_count; | ||
| 34 | struct microcode_intel **mc_saved; | ||
| 35 | } mc_saved_data; | ||
| 36 | |||
| 37 | static enum ucode_state __cpuinit | ||
| 38 | generic_load_microcode_early(struct microcode_intel **mc_saved_p, | ||
| 39 | unsigned int mc_saved_count, | ||
| 40 | struct ucode_cpu_info *uci) | ||
| 41 | { | ||
| 42 | struct microcode_intel *ucode_ptr, *new_mc = NULL; | ||
| 43 | int new_rev = uci->cpu_sig.rev; | ||
| 44 | enum ucode_state state = UCODE_OK; | ||
| 45 | unsigned int mc_size; | ||
| 46 | struct microcode_header_intel *mc_header; | ||
| 47 | unsigned int csig = uci->cpu_sig.sig; | ||
| 48 | unsigned int cpf = uci->cpu_sig.pf; | ||
| 49 | int i; | ||
| 50 | |||
| 51 | for (i = 0; i < mc_saved_count; i++) { | ||
| 52 | ucode_ptr = mc_saved_p[i]; | ||
| 53 | |||
| 54 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
| 55 | mc_size = get_totalsize(mc_header); | ||
| 56 | if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { | ||
| 57 | new_rev = mc_header->rev; | ||
| 58 | new_mc = ucode_ptr; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | if (!new_mc) { | ||
| 63 | state = UCODE_NFOUND; | ||
| 64 | goto out; | ||
| 65 | } | ||
| 66 | |||
| 67 | uci->mc = (struct microcode_intel *)new_mc; | ||
| 68 | out: | ||
| 69 | return state; | ||
| 70 | } | ||
| 71 | |||
| 72 | static void __cpuinit | ||
| 73 | microcode_pointer(struct microcode_intel **mc_saved, | ||
| 74 | unsigned long *mc_saved_in_initrd, | ||
| 75 | unsigned long initrd_start, int mc_saved_count) | ||
| 76 | { | ||
| 77 | int i; | ||
| 78 | |||
| 79 | for (i = 0; i < mc_saved_count; i++) | ||
| 80 | mc_saved[i] = (struct microcode_intel *) | ||
| 81 | (mc_saved_in_initrd[i] + initrd_start); | ||
| 82 | } | ||
| 83 | |||
| 84 | #ifdef CONFIG_X86_32 | ||
| 85 | static void __cpuinit | ||
| 86 | microcode_phys(struct microcode_intel **mc_saved_tmp, | ||
| 87 | struct mc_saved_data *mc_saved_data) | ||
| 88 | { | ||
| 89 | int i; | ||
| 90 | struct microcode_intel ***mc_saved; | ||
| 91 | |||
| 92 | mc_saved = (struct microcode_intel ***) | ||
| 93 | __pa_symbol(&mc_saved_data->mc_saved); | ||
| 94 | for (i = 0; i < mc_saved_data->mc_saved_count; i++) { | ||
| 95 | struct microcode_intel *p; | ||
| 96 | |||
| 97 | p = *(struct microcode_intel **) | ||
| 98 | __pa(mc_saved_data->mc_saved + i); | ||
| 99 | mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | #endif | ||
| 103 | |||
| 104 | static enum ucode_state __cpuinit | ||
| 105 | load_microcode(struct mc_saved_data *mc_saved_data, | ||
| 106 | unsigned long *mc_saved_in_initrd, | ||
| 107 | unsigned long initrd_start, | ||
| 108 | struct ucode_cpu_info *uci) | ||
| 109 | { | ||
| 110 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
| 111 | unsigned int count = mc_saved_data->mc_saved_count; | ||
| 112 | |||
| 113 | if (!mc_saved_data->mc_saved) { | ||
| 114 | microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, | ||
| 115 | initrd_start, count); | ||
| 116 | |||
| 117 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | ||
| 118 | } else { | ||
| 119 | #ifdef CONFIG_X86_32 | ||
| 120 | microcode_phys(mc_saved_tmp, mc_saved_data); | ||
| 121 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | ||
| 122 | #else | ||
| 123 | return generic_load_microcode_early(mc_saved_data->mc_saved, | ||
| 124 | count, uci); | ||
| 125 | #endif | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | static u8 get_x86_family(unsigned long sig) | ||
| 130 | { | ||
| 131 | u8 x86; | ||
| 132 | |||
| 133 | x86 = (sig >> 8) & 0xf; | ||
| 134 | |||
| 135 | if (x86 == 0xf) | ||
| 136 | x86 += (sig >> 20) & 0xff; | ||
| 137 | |||
| 138 | return x86; | ||
| 139 | } | ||
| 140 | |||
| 141 | static u8 get_x86_model(unsigned long sig) | ||
| 142 | { | ||
| 143 | u8 x86, x86_model; | ||
| 144 | |||
| 145 | x86 = get_x86_family(sig); | ||
| 146 | x86_model = (sig >> 4) & 0xf; | ||
| 147 | |||
| 148 | if (x86 == 0x6 || x86 == 0xf) | ||
| 149 | x86_model += ((sig >> 16) & 0xf) << 4; | ||
| 150 | |||
| 151 | return x86_model; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* | ||
| 155 | * Given CPU signature and a microcode patch, this function finds if the | ||
| 156 | * microcode patch has matching family and model with the CPU. | ||
| 157 | */ | ||
| 158 | static enum ucode_state | ||
| 159 | matching_model_microcode(struct microcode_header_intel *mc_header, | ||
| 160 | unsigned long sig) | ||
| 161 | { | ||
| 162 | u8 x86, x86_model; | ||
| 163 | u8 x86_ucode, x86_model_ucode; | ||
| 164 | struct extended_sigtable *ext_header; | ||
| 165 | unsigned long total_size = get_totalsize(mc_header); | ||
| 166 | unsigned long data_size = get_datasize(mc_header); | ||
| 167 | int ext_sigcount, i; | ||
| 168 | struct extended_signature *ext_sig; | ||
| 169 | |||
| 170 | x86 = get_x86_family(sig); | ||
| 171 | x86_model = get_x86_model(sig); | ||
| 172 | |||
| 173 | x86_ucode = get_x86_family(mc_header->sig); | ||
| 174 | x86_model_ucode = get_x86_model(mc_header->sig); | ||
| 175 | |||
| 176 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | ||
| 177 | return UCODE_OK; | ||
| 178 | |||
| 179 | /* Look for ext. headers: */ | ||
| 180 | if (total_size <= data_size + MC_HEADER_SIZE) | ||
| 181 | return UCODE_NFOUND; | ||
| 182 | |||
| 183 | ext_header = (struct extended_sigtable *) | ||
| 184 | mc_header + data_size + MC_HEADER_SIZE; | ||
| 185 | ext_sigcount = ext_header->count; | ||
| 186 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
| 187 | |||
| 188 | for (i = 0; i < ext_sigcount; i++) { | ||
| 189 | x86_ucode = get_x86_family(ext_sig->sig); | ||
| 190 | x86_model_ucode = get_x86_model(ext_sig->sig); | ||
| 191 | |||
| 192 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | ||
| 193 | return UCODE_OK; | ||
| 194 | |||
| 195 | ext_sig++; | ||
| 196 | } | ||
| 197 | |||
| 198 | return UCODE_NFOUND; | ||
| 199 | } | ||
| 200 | |||
| 201 | static int | ||
| 202 | save_microcode(struct mc_saved_data *mc_saved_data, | ||
| 203 | struct microcode_intel **mc_saved_src, | ||
| 204 | unsigned int mc_saved_count) | ||
| 205 | { | ||
| 206 | int i, j; | ||
| 207 | struct microcode_intel **mc_saved_p; | ||
| 208 | int ret; | ||
| 209 | |||
| 210 | if (!mc_saved_count) | ||
| 211 | return -EINVAL; | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Copy new microcode data. | ||
| 215 | */ | ||
| 216 | mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), | ||
| 217 | GFP_KERNEL); | ||
| 218 | if (!mc_saved_p) | ||
| 219 | return -ENOMEM; | ||
| 220 | |||
| 221 | for (i = 0; i < mc_saved_count; i++) { | ||
| 222 | struct microcode_intel *mc = mc_saved_src[i]; | ||
| 223 | struct microcode_header_intel *mc_header = &mc->hdr; | ||
| 224 | unsigned long mc_size = get_totalsize(mc_header); | ||
| 225 | mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); | ||
| 226 | if (!mc_saved_p[i]) { | ||
| 227 | ret = -ENOMEM; | ||
| 228 | goto err; | ||
| 229 | } | ||
| 230 | if (!mc_saved_src[i]) { | ||
| 231 | ret = -EINVAL; | ||
| 232 | goto err; | ||
| 233 | } | ||
| 234 | memcpy(mc_saved_p[i], mc, mc_size); | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * Point to newly saved microcode. | ||
| 239 | */ | ||
| 240 | mc_saved_data->mc_saved = mc_saved_p; | ||
| 241 | mc_saved_data->mc_saved_count = mc_saved_count; | ||
| 242 | |||
| 243 | return 0; | ||
| 244 | |||
| 245 | err: | ||
| 246 | for (j = 0; j <= i; j++) | ||
| 247 | kfree(mc_saved_p[j]); | ||
| 248 | kfree(mc_saved_p); | ||
| 249 | |||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * A microcode patch in ucode_ptr is saved into mc_saved | ||
| 255 | * - if it has matching signature and newer revision compared to an existing | ||
| 256 | * patch mc_saved. | ||
| 257 | * - or if it is a newly discovered microcode patch. | ||
| 258 | * | ||
| 259 | * The microcode patch should have matching model with CPU. | ||
| 260 | */ | ||
| 261 | static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, | ||
| 262 | unsigned int *mc_saved_count_p) | ||
| 263 | { | ||
| 264 | int i; | ||
| 265 | int found = 0; | ||
| 266 | unsigned int mc_saved_count = *mc_saved_count_p; | ||
| 267 | struct microcode_header_intel *mc_header; | ||
| 268 | |||
| 269 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
| 270 | for (i = 0; i < mc_saved_count; i++) { | ||
| 271 | unsigned int sig, pf; | ||
| 272 | unsigned int new_rev; | ||
| 273 | struct microcode_header_intel *mc_saved_header = | ||
| 274 | (struct microcode_header_intel *)mc_saved[i]; | ||
| 275 | sig = mc_saved_header->sig; | ||
| 276 | pf = mc_saved_header->pf; | ||
| 277 | new_rev = mc_header->rev; | ||
| 278 | |||
| 279 | if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { | ||
| 280 | found = 1; | ||
| 281 | if (update_match_revision(mc_header, new_rev)) { | ||
| 282 | /* | ||
| 283 | * Found an older ucode saved before. | ||
| 284 | * Replace the older one with this newer | ||
| 285 | * one. | ||
| 286 | */ | ||
| 287 | mc_saved[i] = | ||
| 288 | (struct microcode_intel *)ucode_ptr; | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | } | ||
| 292 | } | ||
| 293 | if (i >= mc_saved_count && !found) | ||
| 294 | /* | ||
| 295 | * This ucode is first time discovered in ucode file. | ||
| 296 | * Save it to memory. | ||
| 297 | */ | ||
| 298 | mc_saved[mc_saved_count++] = | ||
| 299 | (struct microcode_intel *)ucode_ptr; | ||
| 300 | |||
| 301 | *mc_saved_count_p = mc_saved_count; | ||
| 302 | } | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Get microcode matching with BSP's model. Only CPUs with the same model as | ||
| 306 | * BSP can stay in the platform. | ||
| 307 | */ | ||
| 308 | static enum ucode_state __init | ||
| 309 | get_matching_model_microcode(int cpu, unsigned long start, | ||
| 310 | void *data, size_t size, | ||
| 311 | struct mc_saved_data *mc_saved_data, | ||
| 312 | unsigned long *mc_saved_in_initrd, | ||
| 313 | struct ucode_cpu_info *uci) | ||
| 314 | { | ||
| 315 | u8 *ucode_ptr = data; | ||
| 316 | unsigned int leftover = size; | ||
| 317 | enum ucode_state state = UCODE_OK; | ||
| 318 | unsigned int mc_size; | ||
| 319 | struct microcode_header_intel *mc_header; | ||
| 320 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
| 321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; | ||
| 322 | int i; | ||
| 323 | |||
| 324 | while (leftover) { | ||
| 325 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
| 326 | |||
| 327 | mc_size = get_totalsize(mc_header); | ||
| 328 | if (!mc_size || mc_size > leftover || | ||
| 329 | microcode_sanity_check(ucode_ptr, 0) < 0) | ||
| 330 | break; | ||
| 331 | |||
| 332 | leftover -= mc_size; | ||
| 333 | |||
| 334 | /* | ||
| 335 | * Since APs with same family and model as the BSP may boot in | ||
| 336 | * the platform, we need to find and save microcode patches | ||
| 337 | * with the same family and model as the BSP. | ||
| 338 | */ | ||
| 339 | if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != | ||
| 340 | UCODE_OK) { | ||
| 341 | ucode_ptr += mc_size; | ||
| 342 | continue; | ||
| 343 | } | ||
| 344 | |||
| 345 | _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); | ||
| 346 | |||
| 347 | ucode_ptr += mc_size; | ||
| 348 | } | ||
| 349 | |||
| 350 | if (leftover) { | ||
| 351 | state = UCODE_ERROR; | ||
| 352 | goto out; | ||
| 353 | } | ||
| 354 | |||
| 355 | if (mc_saved_count == 0) { | ||
| 356 | state = UCODE_NFOUND; | ||
| 357 | goto out; | ||
| 358 | } | ||
| 359 | |||
| 360 | for (i = 0; i < mc_saved_count; i++) | ||
| 361 | mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; | ||
| 362 | |||
| 363 | mc_saved_data->mc_saved_count = mc_saved_count; | ||
| 364 | out: | ||
| 365 | return state; | ||
| 366 | } | ||
| 367 | |||
| 368 | #define native_rdmsr(msr, val1, val2) \ | ||
| 369 | do { \ | ||
| 370 | u64 __val = native_read_msr((msr)); \ | ||
| 371 | (void)((val1) = (u32)__val); \ | ||
| 372 | (void)((val2) = (u32)(__val >> 32)); \ | ||
| 373 | } while (0) | ||
| 374 | |||
| 375 | #define native_wrmsr(msr, low, high) \ | ||
| 376 | native_write_msr(msr, low, high); | ||
| 377 | |||
| 378 | static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci) | ||
| 379 | { | ||
| 380 | unsigned int val[2]; | ||
| 381 | u8 x86, x86_model; | ||
| 382 | struct cpu_signature csig; | ||
| 383 | unsigned int eax, ebx, ecx, edx; | ||
| 384 | |||
| 385 | csig.sig = 0; | ||
| 386 | csig.pf = 0; | ||
| 387 | csig.rev = 0; | ||
| 388 | |||
| 389 | memset(uci, 0, sizeof(*uci)); | ||
| 390 | |||
| 391 | eax = 0x00000001; | ||
| 392 | ecx = 0; | ||
| 393 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
| 394 | csig.sig = eax; | ||
| 395 | |||
| 396 | x86 = get_x86_family(csig.sig); | ||
| 397 | x86_model = get_x86_model(csig.sig); | ||
| 398 | |||
| 399 | if ((x86_model >= 5) || (x86 > 6)) { | ||
| 400 | /* get processor flags from MSR 0x17 */ | ||
| 401 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
| 402 | csig.pf = 1 << ((val[1] >> 18) & 7); | ||
| 403 | } | ||
| 404 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 405 | |||
| 406 | /* As documented in the SDM: Do a CPUID 1 here */ | ||
| 407 | sync_core(); | ||
| 408 | |||
| 409 | /* get the current revision from MSR 0x8B */ | ||
| 410 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
| 411 | |||
| 412 | csig.rev = val[1]; | ||
| 413 | |||
| 414 | uci->cpu_sig = csig; | ||
| 415 | uci->valid = 1; | ||
| 416 | |||
| 417 | return 0; | ||
| 418 | } | ||
| 419 | |||
| 420 | #ifdef DEBUG | ||
| 421 | static void __ref show_saved_mc(void) | ||
| 422 | { | ||
| 423 | int i, j; | ||
| 424 | unsigned int sig, pf, rev, total_size, data_size, date; | ||
| 425 | struct ucode_cpu_info uci; | ||
| 426 | |||
| 427 | if (mc_saved_data.mc_saved_count == 0) { | ||
| 428 | pr_debug("no micorcode data saved.\n"); | ||
| 429 | return; | ||
| 430 | } | ||
| 431 | pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); | ||
| 432 | |||
| 433 | collect_cpu_info_early(&uci); | ||
| 434 | |||
| 435 | sig = uci.cpu_sig.sig; | ||
| 436 | pf = uci.cpu_sig.pf; | ||
| 437 | rev = uci.cpu_sig.rev; | ||
| 438 | pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
| 439 | smp_processor_id(), sig, pf, rev); | ||
| 440 | |||
| 441 | for (i = 0; i < mc_saved_data.mc_saved_count; i++) { | ||
| 442 | struct microcode_header_intel *mc_saved_header; | ||
| 443 | struct extended_sigtable *ext_header; | ||
| 444 | int ext_sigcount; | ||
| 445 | struct extended_signature *ext_sig; | ||
| 446 | |||
| 447 | mc_saved_header = (struct microcode_header_intel *) | ||
| 448 | mc_saved_data.mc_saved[i]; | ||
| 449 | sig = mc_saved_header->sig; | ||
| 450 | pf = mc_saved_header->pf; | ||
| 451 | rev = mc_saved_header->rev; | ||
| 452 | total_size = get_totalsize(mc_saved_header); | ||
| 453 | data_size = get_datasize(mc_saved_header); | ||
| 454 | date = mc_saved_header->date; | ||
| 455 | |||
| 456 | pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", | ||
| 457 | i, sig, pf, rev, total_size, | ||
| 458 | date & 0xffff, | ||
| 459 | date >> 24, | ||
| 460 | (date >> 16) & 0xff); | ||
| 461 | |||
| 462 | /* Look for ext. headers: */ | ||
| 463 | if (total_size <= data_size + MC_HEADER_SIZE) | ||
| 464 | continue; | ||
| 465 | |||
| 466 | ext_header = (struct extended_sigtable *) | ||
| 467 | mc_saved_header + data_size + MC_HEADER_SIZE; | ||
| 468 | ext_sigcount = ext_header->count; | ||
| 469 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
| 470 | |||
| 471 | for (j = 0; j < ext_sigcount; j++) { | ||
| 472 | sig = ext_sig->sig; | ||
| 473 | pf = ext_sig->pf; | ||
| 474 | |||
| 475 | pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", | ||
| 476 | j, sig, pf); | ||
| 477 | |||
| 478 | ext_sig++; | ||
| 479 | } | ||
| 480 | |||
| 481 | } | ||
| 482 | } | ||
| 483 | #else | ||
| 484 | static inline void show_saved_mc(void) | ||
| 485 | { | ||
| 486 | } | ||
| 487 | #endif | ||
| 488 | |||
| 489 | #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) | ||
| 490 | /* | ||
| 491 | * Save this mc into mc_saved_data. So it will be loaded early when a CPU is | ||
| 492 | * hot added or resumes. | ||
| 493 | * | ||
| 494 | * Please make sure this mc should be a valid microcode patch before calling | ||
| 495 | * this function. | ||
| 496 | */ | ||
| 497 | int save_mc_for_early(u8 *mc) | ||
| 498 | { | ||
| 499 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
| 500 | unsigned int mc_saved_count_init; | ||
| 501 | unsigned int mc_saved_count; | ||
| 502 | struct microcode_intel **mc_saved; | ||
| 503 | int ret = 0; | ||
| 504 | int i; | ||
| 505 | |||
| 506 | /* | ||
| 507 | * Hold hotplug lock so mc_saved_data is not accessed by a CPU in | ||
| 508 | * hotplug. | ||
| 509 | */ | ||
| 510 | cpu_hotplug_driver_lock(); | ||
| 511 | |||
| 512 | mc_saved_count_init = mc_saved_data.mc_saved_count; | ||
| 513 | mc_saved_count = mc_saved_data.mc_saved_count; | ||
| 514 | mc_saved = mc_saved_data.mc_saved; | ||
| 515 | |||
| 516 | if (mc_saved && mc_saved_count) | ||
| 517 | memcpy(mc_saved_tmp, mc_saved, | ||
| 518 | mc_saved_count * sizeof(struct mirocode_intel *)); | ||
| 519 | /* | ||
| 520 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer | ||
| 521 | * version. | ||
| 522 | */ | ||
| 523 | |||
| 524 | _save_mc(mc_saved_tmp, mc, &mc_saved_count); | ||
| 525 | |||
| 526 | /* | ||
| 527 | * Save the mc_save_tmp in global mc_saved_data. | ||
| 528 | */ | ||
| 529 | ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); | ||
| 530 | if (ret) { | ||
| 531 | pr_err("Can not save microcode patch.\n"); | ||
| 532 | goto out; | ||
| 533 | } | ||
| 534 | |||
| 535 | show_saved_mc(); | ||
| 536 | |||
| 537 | /* | ||
| 538 | * Free old saved microcod data. | ||
| 539 | */ | ||
| 540 | if (mc_saved) { | ||
| 541 | for (i = 0; i < mc_saved_count_init; i++) | ||
| 542 | kfree(mc_saved[i]); | ||
| 543 | kfree(mc_saved); | ||
| 544 | } | ||
| 545 | |||
| 546 | out: | ||
| 547 | cpu_hotplug_driver_unlock(); | ||
| 548 | |||
| 549 | return ret; | ||
| 550 | } | ||
| 551 | EXPORT_SYMBOL_GPL(save_mc_for_early); | ||
| 552 | #endif | ||
| 553 | |||
| 554 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; | ||
| 555 | static __init enum ucode_state | ||
| 556 | scan_microcode(unsigned long start, unsigned long end, | ||
| 557 | struct mc_saved_data *mc_saved_data, | ||
| 558 | unsigned long *mc_saved_in_initrd, | ||
| 559 | struct ucode_cpu_info *uci) | ||
| 560 | { | ||
| 561 | unsigned int size = end - start + 1; | ||
| 562 | struct cpio_data cd; | ||
| 563 | long offset = 0; | ||
| 564 | #ifdef CONFIG_X86_32 | ||
| 565 | char *p = (char *)__pa_symbol(ucode_name); | ||
| 566 | #else | ||
| 567 | char *p = ucode_name; | ||
| 568 | #endif | ||
| 569 | |||
| 570 | cd.data = NULL; | ||
| 571 | cd.size = 0; | ||
| 572 | |||
| 573 | cd = find_cpio_data(p, (void *)start, size, &offset); | ||
| 574 | if (!cd.data) | ||
| 575 | return UCODE_ERROR; | ||
| 576 | |||
| 577 | |||
| 578 | return get_matching_model_microcode(0, start, cd.data, cd.size, | ||
| 579 | mc_saved_data, mc_saved_in_initrd, | ||
| 580 | uci); | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * Print ucode update info. | ||
| 585 | */ | ||
| 586 | static void __cpuinit | ||
| 587 | print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) | ||
| 588 | { | ||
| 589 | int cpu = smp_processor_id(); | ||
| 590 | |||
| 591 | pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", | ||
| 592 | cpu, | ||
| 593 | uci->cpu_sig.rev, | ||
| 594 | date & 0xffff, | ||
| 595 | date >> 24, | ||
| 596 | (date >> 16) & 0xff); | ||
| 597 | } | ||
| 598 | |||
| 599 | #ifdef CONFIG_X86_32 | ||
| 600 | |||
| 601 | static int delay_ucode_info; | ||
| 602 | static int current_mc_date; | ||
| 603 | |||
| 604 | /* | ||
| 605 | * Print early updated ucode info after printk works. This is delayed info dump. | ||
| 606 | */ | ||
| 607 | void __cpuinit show_ucode_info_early(void) | ||
| 608 | { | ||
| 609 | struct ucode_cpu_info uci; | ||
| 610 | |||
| 611 | if (delay_ucode_info) { | ||
| 612 | collect_cpu_info_early(&uci); | ||
| 613 | print_ucode_info(&uci, current_mc_date); | ||
| 614 | delay_ucode_info = 0; | ||
| 615 | } | ||
| 616 | } | ||
| 617 | |||
| 618 | /* | ||
| 619 | * At this point, we can not call printk() yet. Keep microcode patch number in | ||
| 620 | * mc_saved_data.mc_saved and delay printing microcode info in | ||
| 621 | * show_ucode_info_early() until printk() works. | ||
| 622 | */ | ||
| 623 | static void __cpuinit print_ucode(struct ucode_cpu_info *uci) | ||
| 624 | { | ||
| 625 | struct microcode_intel *mc_intel; | ||
| 626 | int *delay_ucode_info_p; | ||
| 627 | int *current_mc_date_p; | ||
| 628 | |||
| 629 | mc_intel = uci->mc; | ||
| 630 | if (mc_intel == NULL) | ||
| 631 | return; | ||
| 632 | |||
| 633 | delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); | ||
| 634 | current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); | ||
| 635 | |||
| 636 | *delay_ucode_info_p = 1; | ||
| 637 | *current_mc_date_p = mc_intel->hdr.date; | ||
| 638 | } | ||
| 639 | #else | ||
| 640 | |||
| 641 | /* | ||
| 642 | * Flush global tlb. We only do this in x86_64 where paging has been enabled | ||
| 643 | * already and PGE should be enabled as well. | ||
| 644 | */ | ||
| 645 | static inline void __cpuinit flush_tlb_early(void) | ||
| 646 | { | ||
| 647 | __native_flush_tlb_global_irq_disabled(); | ||
| 648 | } | ||
| 649 | |||
| 650 | static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) | ||
| 651 | { | ||
| 652 | struct microcode_intel *mc_intel; | ||
| 653 | |||
| 654 | mc_intel = uci->mc; | ||
| 655 | if (mc_intel == NULL) | ||
| 656 | return; | ||
| 657 | |||
| 658 | print_ucode_info(uci, mc_intel->hdr.date); | ||
| 659 | } | ||
| 660 | #endif | ||
| 661 | |||
| 662 | static int apply_microcode_early(struct mc_saved_data *mc_saved_data, | ||
| 663 | struct ucode_cpu_info *uci) | ||
| 664 | { | ||
| 665 | struct microcode_intel *mc_intel; | ||
| 666 | unsigned int val[2]; | ||
| 667 | |||
| 668 | mc_intel = uci->mc; | ||
| 669 | if (mc_intel == NULL) | ||
| 670 | return 0; | ||
| 671 | |||
| 672 | /* write microcode via MSR 0x79 */ | ||
| 673 | native_wrmsr(MSR_IA32_UCODE_WRITE, | ||
| 674 | (unsigned long) mc_intel->bits, | ||
| 675 | (unsigned long) mc_intel->bits >> 16 >> 16); | ||
| 676 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 677 | |||
| 678 | /* As documented in the SDM: Do a CPUID 1 here */ | ||
| 679 | sync_core(); | ||
| 680 | |||
| 681 | /* get the current revision from MSR 0x8B */ | ||
| 682 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
| 683 | if (val[1] != mc_intel->hdr.rev) | ||
| 684 | return -1; | ||
| 685 | |||
| 686 | #ifdef CONFIG_X86_64 | ||
| 687 | /* Flush global tlb. This is precaution. */ | ||
| 688 | flush_tlb_early(); | ||
| 689 | #endif | ||
| 690 | uci->cpu_sig.rev = val[1]; | ||
| 691 | |||
| 692 | print_ucode(uci); | ||
| 693 | |||
| 694 | return 0; | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | ||
| 698 | * This function converts microcode patch offsets previously stored in | ||
| 699 | * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. | ||
| 700 | */ | ||
| 701 | int __init save_microcode_in_initrd(void) | ||
| 702 | { | ||
| 703 | unsigned int count = mc_saved_data.mc_saved_count; | ||
| 704 | struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; | ||
| 705 | int ret = 0; | ||
| 706 | |||
| 707 | if (count == 0) | ||
| 708 | return ret; | ||
| 709 | |||
| 710 | microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); | ||
| 711 | ret = save_microcode(&mc_saved_data, mc_saved, count); | ||
| 712 | if (ret) | ||
| 713 | pr_err("Can not save microcod patches from initrd"); | ||
| 714 | |||
| 715 | show_saved_mc(); | ||
| 716 | |||
| 717 | return ret; | ||
| 718 | } | ||
| 719 | |||
| 720 | static void __init | ||
| 721 | _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, | ||
| 722 | unsigned long *mc_saved_in_initrd, | ||
| 723 | unsigned long initrd_start_early, | ||
| 724 | unsigned long initrd_end_early, | ||
| 725 | struct ucode_cpu_info *uci) | ||
| 726 | { | ||
| 727 | collect_cpu_info_early(uci); | ||
| 728 | scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, | ||
| 729 | mc_saved_in_initrd, uci); | ||
| 730 | load_microcode(mc_saved_data, mc_saved_in_initrd, | ||
| 731 | initrd_start_early, uci); | ||
| 732 | apply_microcode_early(mc_saved_data, uci); | ||
| 733 | } | ||
| 734 | |||
| 735 | void __init | ||
| 736 | load_ucode_intel_bsp(void) | ||
| 737 | { | ||
| 738 | u64 ramdisk_image, ramdisk_size; | ||
| 739 | unsigned long initrd_start_early, initrd_end_early; | ||
| 740 | struct ucode_cpu_info uci; | ||
| 741 | #ifdef CONFIG_X86_32 | ||
| 742 | struct boot_params *boot_params_p; | ||
| 743 | |||
| 744 | boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); | ||
| 745 | ramdisk_image = boot_params_p->hdr.ramdisk_image; | ||
| 746 | ramdisk_size = boot_params_p->hdr.ramdisk_size; | ||
| 747 | initrd_start_early = ramdisk_image; | ||
| 748 | initrd_end_early = initrd_start_early + ramdisk_size; | ||
| 749 | |||
| 750 | _load_ucode_intel_bsp( | ||
| 751 | (struct mc_saved_data *)__pa_symbol(&mc_saved_data), | ||
| 752 | (unsigned long *)__pa_symbol(&mc_saved_in_initrd), | ||
| 753 | initrd_start_early, initrd_end_early, &uci); | ||
| 754 | #else | ||
| 755 | ramdisk_image = boot_params.hdr.ramdisk_image; | ||
| 756 | ramdisk_size = boot_params.hdr.ramdisk_size; | ||
| 757 | initrd_start_early = ramdisk_image + PAGE_OFFSET; | ||
| 758 | initrd_end_early = initrd_start_early + ramdisk_size; | ||
| 759 | |||
| 760 | _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, | ||
| 761 | initrd_start_early, initrd_end_early, &uci); | ||
| 762 | #endif | ||
| 763 | } | ||
| 764 | |||
| 765 | void __cpuinit load_ucode_intel_ap(void) | ||
| 766 | { | ||
| 767 | struct mc_saved_data *mc_saved_data_p; | ||
| 768 | struct ucode_cpu_info uci; | ||
| 769 | unsigned long *mc_saved_in_initrd_p; | ||
| 770 | unsigned long initrd_start_addr; | ||
| 771 | #ifdef CONFIG_X86_32 | ||
| 772 | unsigned long *initrd_start_p; | ||
| 773 | |||
| 774 | mc_saved_in_initrd_p = | ||
| 775 | (unsigned long *)__pa_symbol(mc_saved_in_initrd); | ||
| 776 | mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); | ||
| 777 | initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); | ||
| 778 | initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); | ||
| 779 | #else | ||
| 780 | mc_saved_data_p = &mc_saved_data; | ||
| 781 | mc_saved_in_initrd_p = mc_saved_in_initrd; | ||
| 782 | initrd_start_addr = initrd_start; | ||
| 783 | #endif | ||
| 784 | |||
| 785 | /* | ||
| 786 | * If there is no valid ucode previously saved in memory, no need to | ||
| 787 | * update ucode on this AP. | ||
| 788 | */ | ||
| 789 | if (mc_saved_data_p->mc_saved_count == 0) | ||
| 790 | return; | ||
| 791 | |||
| 792 | collect_cpu_info_early(&uci); | ||
| 793 | load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, | ||
| 794 | initrd_start_addr, &uci); | ||
| 795 | apply_microcode_early(mc_saved_data_p, &uci); | ||
| 796 | } | ||
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c new file mode 100644 index 000000000000..ce69320d0179 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_lib.c | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | /* | ||
| 2 | * Intel CPU Microcode Update Driver for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
| 5 | * H Peter Anvin" <hpa@zytor.com> | ||
| 6 | * | ||
| 7 | * This driver allows to upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | ||
| 12 | * Software Developer's Manual | ||
| 13 | * Order Number 253668 or free download from: | ||
| 14 | * | ||
| 15 | * http://developer.intel.com/Assets/PDF/manual/253668.pdf | ||
| 16 | * | ||
| 17 | * For more information, go to http://www.urbanmyth.org/microcode | ||
| 18 | * | ||
| 19 | * This program is free software; you can redistribute it and/or | ||
| 20 | * modify it under the terms of the GNU General Public License | ||
| 21 | * as published by the Free Software Foundation; either version | ||
| 22 | * 2 of the License, or (at your option) any later version. | ||
| 23 | * | ||
| 24 | */ | ||
| 25 | #include <linux/firmware.h> | ||
| 26 | #include <linux/uaccess.h> | ||
| 27 | #include <linux/kernel.h> | ||
| 28 | #include <linux/module.h> | ||
| 29 | |||
| 30 | #include <asm/microcode_intel.h> | ||
| 31 | #include <asm/processor.h> | ||
| 32 | #include <asm/msr.h> | ||
| 33 | |||
| 34 | static inline int | ||
| 35 | update_match_cpu(unsigned int csig, unsigned int cpf, | ||
| 36 | unsigned int sig, unsigned int pf) | ||
| 37 | { | ||
| 38 | return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; | ||
| 39 | } | ||
| 40 | |||
| 41 | int | ||
| 42 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
| 43 | { | ||
| 44 | return (mc_header->rev <= rev) ? 0 : 1; | ||
| 45 | } | ||
| 46 | |||
| 47 | int microcode_sanity_check(void *mc, int print_err) | ||
| 48 | { | ||
| 49 | unsigned long total_size, data_size, ext_table_size; | ||
| 50 | struct microcode_header_intel *mc_header = mc; | ||
| 51 | struct extended_sigtable *ext_header = NULL; | ||
| 52 | int sum, orig_sum, ext_sigcount = 0, i; | ||
| 53 | struct extended_signature *ext_sig; | ||
| 54 | |||
| 55 | total_size = get_totalsize(mc_header); | ||
| 56 | data_size = get_datasize(mc_header); | ||
| 57 | |||
| 58 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
| 59 | if (print_err) | ||
| 60 | pr_err("error! Bad data size in microcode data file\n"); | ||
| 61 | return -EINVAL; | ||
| 62 | } | ||
| 63 | |||
| 64 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
| 65 | if (print_err) | ||
| 66 | pr_err("error! Unknown microcode update format\n"); | ||
| 67 | return -EINVAL; | ||
| 68 | } | ||
| 69 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
| 70 | if (ext_table_size) { | ||
| 71 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
| 72 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
| 73 | if (print_err) | ||
| 74 | pr_err("error! Small exttable size in microcode data file\n"); | ||
| 75 | return -EINVAL; | ||
| 76 | } | ||
| 77 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
| 78 | if (ext_table_size != exttable_size(ext_header)) { | ||
| 79 | if (print_err) | ||
| 80 | pr_err("error! Bad exttable size in microcode data file\n"); | ||
| 81 | return -EFAULT; | ||
| 82 | } | ||
| 83 | ext_sigcount = ext_header->count; | ||
| 84 | } | ||
| 85 | |||
| 86 | /* check extended table checksum */ | ||
| 87 | if (ext_table_size) { | ||
| 88 | int ext_table_sum = 0; | ||
| 89 | int *ext_tablep = (int *)ext_header; | ||
| 90 | |||
| 91 | i = ext_table_size / DWSIZE; | ||
| 92 | while (i--) | ||
| 93 | ext_table_sum += ext_tablep[i]; | ||
| 94 | if (ext_table_sum) { | ||
| 95 | if (print_err) | ||
| 96 | pr_warn("aborting, bad extended signature table checksum\n"); | ||
| 97 | return -EINVAL; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /* calculate the checksum */ | ||
| 102 | orig_sum = 0; | ||
| 103 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
| 104 | while (i--) | ||
| 105 | orig_sum += ((int *)mc)[i]; | ||
| 106 | if (orig_sum) { | ||
| 107 | if (print_err) | ||
| 108 | pr_err("aborting, bad checksum\n"); | ||
| 109 | return -EINVAL; | ||
| 110 | } | ||
| 111 | if (!ext_table_size) | ||
| 112 | return 0; | ||
| 113 | /* check extended signature checksum */ | ||
| 114 | for (i = 0; i < ext_sigcount; i++) { | ||
| 115 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
| 116 | EXT_SIGNATURE_SIZE * i; | ||
| 117 | sum = orig_sum | ||
| 118 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
| 119 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
| 120 | if (sum) { | ||
| 121 | if (print_err) | ||
| 122 | pr_err("aborting, bad checksum\n"); | ||
| 123 | return -EINVAL; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | EXPORT_SYMBOL_GPL(microcode_sanity_check); | ||
| 129 | |||
| 130 | /* | ||
| 131 | * return 0 - no update found | ||
| 132 | * return 1 - found update | ||
| 133 | */ | ||
| 134 | int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) | ||
| 135 | { | ||
| 136 | struct microcode_header_intel *mc_header = mc; | ||
| 137 | struct extended_sigtable *ext_header; | ||
| 138 | unsigned long total_size = get_totalsize(mc_header); | ||
| 139 | int ext_sigcount, i; | ||
| 140 | struct extended_signature *ext_sig; | ||
| 141 | |||
| 142 | if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) | ||
| 143 | return 1; | ||
| 144 | |||
| 145 | /* Look for ext. headers: */ | ||
| 146 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
| 147 | return 0; | ||
| 148 | |||
| 149 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | ||
| 150 | ext_sigcount = ext_header->count; | ||
| 151 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
| 152 | |||
| 153 | for (i = 0; i < ext_sigcount; i++) { | ||
| 154 | if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) | ||
| 155 | return 1; | ||
| 156 | ext_sig++; | ||
| 157 | } | ||
| 158 | return 0; | ||
| 159 | } | ||
| 160 | |||
| 161 | /* | ||
| 162 | * return 0 - no update found | ||
| 163 | * return 1 - found update | ||
| 164 | */ | ||
| 165 | int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) | ||
| 166 | { | ||
| 167 | struct microcode_header_intel *mc_header = mc; | ||
| 168 | |||
| 169 | if (!update_match_revision(mc_header, rev)) | ||
| 170 | return 0; | ||
| 171 | |||
| 172 | return get_matching_sig(csig, cpf, mc, rev); | ||
| 173 | } | ||
| 174 | EXPORT_SYMBOL_GPL(get_matching_microcode); | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d41815265a0b..4903a03ae876 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <asm/tlb.h> | 16 | #include <asm/tlb.h> |
| 17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
| 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
| 19 | #include <asm/microcode.h> | ||
| 19 | 20 | ||
| 20 | #include "mm_internal.h" | 21 | #include "mm_internal.h" |
| 21 | 22 | ||
| @@ -534,6 +535,15 @@ void free_initmem(void) | |||
| 534 | #ifdef CONFIG_BLK_DEV_INITRD | 535 | #ifdef CONFIG_BLK_DEV_INITRD |
| 535 | void __init free_initrd_mem(unsigned long start, unsigned long end) | 536 | void __init free_initrd_mem(unsigned long start, unsigned long end) |
| 536 | { | 537 | { |
| 538 | #ifdef CONFIG_MICROCODE_EARLY | ||
| 539 | /* | ||
| 540 | * Remember, initrd memory may contain microcode or other useful things. | ||
| 541 | * Before we lose initrd mem, we need to find a place to hold them | ||
| 542 | * now that normal virtual memory is enabled. | ||
| 543 | */ | ||
| 544 | save_microcode_in_initrd(); | ||
| 545 | #endif | ||
| 546 | |||
| 537 | /* | 547 | /* |
| 538 | * end could be not aligned, and We can not align that, | 548 | * end could be not aligned, and We can not align that, |
| 539 | * decompresser could be confused by aligned initrd_end | 549 | * decompresser could be confused by aligned initrd_end |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5e86eee4e0e..e8e34938c57d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -1408,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) | |||
| 1408 | xen_mc_callback(set_current_cr3, (void *)cr3); | 1408 | xen_mc_callback(set_current_cr3, (void *)cr3); |
| 1409 | } | 1409 | } |
| 1410 | } | 1410 | } |
| 1411 | |||
| 1412 | static void xen_write_cr3(unsigned long cr3) | 1411 | static void xen_write_cr3(unsigned long cr3) |
| 1413 | { | 1412 | { |
| 1414 | BUG_ON(preemptible()); | 1413 | BUG_ON(preemptible()); |
| @@ -1434,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3) | |||
| 1434 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | 1433 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
| 1435 | } | 1434 | } |
| 1436 | 1435 | ||
| 1436 | #ifdef CONFIG_X86_64 | ||
| 1437 | /* | ||
| 1438 | * At the start of the day - when Xen launches a guest, it has already | ||
| 1439 | * built pagetables for the guest. We diligently look over them | ||
| 1440 | * in xen_setup_kernel_pagetable and graft as appropiate them in the | ||
| 1441 | * init_level4_pgt and its friends. Then when we are happy we load | ||
| 1442 | * the new init_level4_pgt - and continue on. | ||
| 1443 | * | ||
| 1444 | * The generic code starts (start_kernel) and 'init_mem_mapping' sets | ||
| 1445 | * up the rest of the pagetables. When it has completed it loads the cr3. | ||
| 1446 | * N.B. that baremetal would start at 'start_kernel' (and the early | ||
| 1447 | * #PF handler would create bootstrap pagetables) - so we are running | ||
| 1448 | * with the same assumptions as what to do when write_cr3 is executed | ||
| 1449 | * at this point. | ||
| 1450 | * | ||
| 1451 | * Since there are no user-page tables at all, we have two variants | ||
| 1452 | * of xen_write_cr3 - the early bootup (this one), and the late one | ||
| 1453 | * (xen_write_cr3). The reason we have to do that is that in 64-bit | ||
| 1454 | * the Linux kernel and user-space are both in ring 3 while the | ||
| 1455 | * hypervisor is in ring 0. | ||
| 1456 | */ | ||
| 1457 | static void __init xen_write_cr3_init(unsigned long cr3) | ||
| 1458 | { | ||
| 1459 | BUG_ON(preemptible()); | ||
| 1460 | |||
| 1461 | xen_mc_batch(); /* disables interrupts */ | ||
| 1462 | |||
| 1463 | /* Update while interrupts are disabled, so its atomic with | ||
| 1464 | respect to ipis */ | ||
| 1465 | this_cpu_write(xen_cr3, cr3); | ||
| 1466 | |||
| 1467 | __xen_write_cr3(true, cr3); | ||
| 1468 | |||
| 1469 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | ||
| 1470 | |||
| 1471 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | ||
| 1472 | } | ||
| 1473 | #endif | ||
| 1474 | |||
| 1437 | static int xen_pgd_alloc(struct mm_struct *mm) | 1475 | static int xen_pgd_alloc(struct mm_struct *mm) |
| 1438 | { | 1476 | { |
| 1439 | pgd_t *pgd = mm->pgd; | 1477 | pgd_t *pgd = mm->pgd; |
| @@ -2102,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
| 2102 | .write_cr2 = xen_write_cr2, | 2140 | .write_cr2 = xen_write_cr2, |
| 2103 | 2141 | ||
| 2104 | .read_cr3 = xen_read_cr3, | 2142 | .read_cr3 = xen_read_cr3, |
| 2105 | #ifdef CONFIG_X86_32 | ||
| 2106 | .write_cr3 = xen_write_cr3_init, | 2143 | .write_cr3 = xen_write_cr3_init, |
| 2107 | #else | ||
| 2108 | .write_cr3 = xen_write_cr3, | ||
| 2109 | #endif | ||
| 2110 | 2144 | ||
| 2111 | .flush_tlb_user = xen_flush_tlb, | 2145 | .flush_tlb_user = xen_flush_tlb, |
| 2112 | .flush_tlb_kernel = xen_flush_tlb, | 2146 | .flush_tlb_kernel = xen_flush_tlb, |
diff --git a/drivers/char/random.c b/drivers/char/random.c index 85e81ec1451e..594bda9dcfc8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
| @@ -445,7 +445,7 @@ static struct entropy_store input_pool = { | |||
| 445 | .poolinfo = &poolinfo_table[0], | 445 | .poolinfo = &poolinfo_table[0], |
| 446 | .name = "input", | 446 | .name = "input", |
| 447 | .limit = 1, | 447 | .limit = 1, |
| 448 | .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), | 448 | .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), |
| 449 | .pool = input_pool_data | 449 | .pool = input_pool_data |
| 450 | }; | 450 | }; |
| 451 | 451 | ||
| @@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = { | |||
| 454 | .name = "blocking", | 454 | .name = "blocking", |
| 455 | .limit = 1, | 455 | .limit = 1, |
| 456 | .pull = &input_pool, | 456 | .pull = &input_pool, |
| 457 | .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), | 457 | .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), |
| 458 | .pool = blocking_pool_data | 458 | .pool = blocking_pool_data |
| 459 | }; | 459 | }; |
| 460 | 460 | ||
| @@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = { | |||
| 462 | .poolinfo = &poolinfo_table[1], | 462 | .poolinfo = &poolinfo_table[1], |
| 463 | .name = "nonblocking", | 463 | .name = "nonblocking", |
| 464 | .pull = &input_pool, | 464 | .pull = &input_pool, |
| 465 | .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), | 465 | .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), |
| 466 | .pool = nonblocking_pool_data | 466 | .pool = nonblocking_pool_data |
| 467 | }; | 467 | }; |
| 468 | 468 | ||
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index fa080ebd568f..ffeebc7e9f1c 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c | |||
| @@ -75,7 +75,7 @@ static unsigned long past_skip; | |||
| 75 | 75 | ||
| 76 | static struct pci_dev *fbd_dev; | 76 | static struct pci_dev *fbd_dev; |
| 77 | 77 | ||
| 78 | static spinlock_t i7300_idle_lock; | 78 | static raw_spinlock_t i7300_idle_lock; |
| 79 | static int i7300_idle_active; | 79 | static int i7300_idle_active; |
| 80 | 80 | ||
| 81 | static u8 i7300_idle_thrtctl_saved; | 81 | static u8 i7300_idle_thrtctl_saved; |
| @@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, | |||
| 457 | idle_begin_time = ktime_get(); | 457 | idle_begin_time = ktime_get(); |
| 458 | } | 458 | } |
| 459 | 459 | ||
| 460 | spin_lock_irqsave(&i7300_idle_lock, flags); | 460 | raw_spin_lock_irqsave(&i7300_idle_lock, flags); |
| 461 | if (val == IDLE_START) { | 461 | if (val == IDLE_START) { |
| 462 | 462 | ||
| 463 | cpumask_set_cpu(smp_processor_id(), idle_cpumask); | 463 | cpumask_set_cpu(smp_processor_id(), idle_cpumask); |
| @@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, | |||
| 506 | } | 506 | } |
| 507 | } | 507 | } |
| 508 | end: | 508 | end: |
| 509 | spin_unlock_irqrestore(&i7300_idle_lock, flags); | 509 | raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); |
| 510 | return 0; | 510 | return 0; |
| 511 | } | 511 | } |
| 512 | 512 | ||
| @@ -548,7 +548,7 @@ struct debugfs_file_info { | |||
| 548 | 548 | ||
| 549 | static int __init i7300_idle_init(void) | 549 | static int __init i7300_idle_init(void) |
| 550 | { | 550 | { |
| 551 | spin_lock_init(&i7300_idle_lock); | 551 | raw_spin_lock_init(&i7300_idle_lock); |
| 552 | total_us = 0; | 552 | total_us = 0; |
| 553 | 553 | ||
| 554 | if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) | 554 | if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) |
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 3bc244d2636a..a62c4a47d52c 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c | |||
| @@ -222,7 +222,7 @@ static struct { | |||
| 222 | } dbg_data = { | 222 | } dbg_data = { |
| 223 | .idx = 0, | 223 | .idx = 0, |
| 224 | .tty = 0, | 224 | .tty = 0, |
| 225 | .lck = __RW_LOCK_UNLOCKED(lck) | 225 | .lck = __RW_LOCK_UNLOCKED(dbg_data.lck) |
| 226 | }; | 226 | }; |
| 227 | 227 | ||
| 228 | /** | 228 | /** |
| @@ -516,7 +516,7 @@ struct files_struct init_files = { | |||
| 516 | .close_on_exec = init_files.close_on_exec_init, | 516 | .close_on_exec = init_files.close_on_exec_init, |
| 517 | .open_fds = init_files.open_fds_init, | 517 | .open_fds = init_files.open_fds_init, |
| 518 | }, | 518 | }, |
| 519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), |
| 520 | }; | 520 | }; |
| 521 | 521 | ||
| 522 | /* | 522 | /* |
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index 2533fddd34a6..d8d4c898c1bb 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h | |||
| @@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, | |||
| 21 | if (size == 8 && sizeof(unsigned long) != 8) | 21 | if (size == 8 && sizeof(unsigned long) != 8) |
| 22 | wrong_size_cmpxchg(ptr); | 22 | wrong_size_cmpxchg(ptr); |
| 23 | 23 | ||
| 24 | local_irq_save(flags); | 24 | raw_local_irq_save(flags); |
| 25 | switch (size) { | 25 | switch (size) { |
| 26 | case 1: prev = *(u8 *)ptr; | 26 | case 1: prev = *(u8 *)ptr; |
| 27 | if (prev == old) | 27 | if (prev == old) |
| @@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, | |||
| 42 | default: | 42 | default: |
| 43 | wrong_size_cmpxchg(ptr); | 43 | wrong_size_cmpxchg(ptr); |
| 44 | } | 44 | } |
| 45 | local_irq_restore(flags); | 45 | raw_local_irq_restore(flags); |
| 46 | return prev; | 46 | return prev; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| @@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, | |||
| 55 | u64 prev; | 55 | u64 prev; |
| 56 | unsigned long flags; | 56 | unsigned long flags; |
| 57 | 57 | ||
| 58 | local_irq_save(flags); | 58 | raw_local_irq_save(flags); |
| 59 | prev = *(u64 *)ptr; | 59 | prev = *(u64 *)ptr; |
| 60 | if (prev == old) | 60 | if (prev == old) |
| 61 | *(u64 *)ptr = new; | 61 | *(u64 *)ptr = new; |
| 62 | local_irq_restore(flags); | 62 | raw_local_irq_restore(flags); |
| 63 | return prev; | 63 | return prev; |
| 64 | } | 64 | } |
| 65 | 65 | ||
diff --git a/include/linux/idr.h b/include/linux/idr.h index de7e190f1af4..e5eb125effe6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h | |||
| @@ -136,7 +136,7 @@ struct ida { | |||
| 136 | struct ida_bitmap *free_bitmap; | 136 | struct ida_bitmap *free_bitmap; |
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | #define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } | 139 | #define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } |
| 140 | #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) | 140 | #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) |
| 141 | 141 | ||
| 142 | int ida_pre_get(struct ida *ida, gfp_t gfp_mask); | 142 | int ida_pre_get(struct ida *ida, gfp_t gfp_mask); |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index bfe88c4aa251..f1e877b79ed8 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
| @@ -412,7 +412,7 @@ struct lock_class_key { }; | |||
| 412 | 412 | ||
| 413 | #define lockdep_depth(tsk) (0) | 413 | #define lockdep_depth(tsk) (0) |
| 414 | 414 | ||
| 415 | #define lockdep_assert_held(l) do { } while (0) | 415 | #define lockdep_assert_held(l) do { (void)(l); } while (0) |
| 416 | 416 | ||
| 417 | #define lockdep_recursing(tsk) (0) | 417 | #define lockdep_recursing(tsk) (0) |
| 418 | 418 | ||
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 600060e25ec6..18299057402f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h | |||
| @@ -30,92 +30,12 @@ | |||
| 30 | #include <linux/preempt.h> | 30 | #include <linux/preempt.h> |
| 31 | #include <asm/processor.h> | 31 | #include <asm/processor.h> |
| 32 | 32 | ||
| 33 | typedef struct { | ||
| 34 | unsigned sequence; | ||
| 35 | spinlock_t lock; | ||
| 36 | } seqlock_t; | ||
| 37 | |||
| 38 | /* | ||
| 39 | * These macros triggered gcc-3.x compile-time problems. We think these are | ||
| 40 | * OK now. Be cautious. | ||
| 41 | */ | ||
| 42 | #define __SEQLOCK_UNLOCKED(lockname) \ | ||
| 43 | { 0, __SPIN_LOCK_UNLOCKED(lockname) } | ||
| 44 | |||
| 45 | #define seqlock_init(x) \ | ||
| 46 | do { \ | ||
| 47 | (x)->sequence = 0; \ | ||
| 48 | spin_lock_init(&(x)->lock); \ | ||
| 49 | } while (0) | ||
| 50 | |||
| 51 | #define DEFINE_SEQLOCK(x) \ | ||
| 52 | seqlock_t x = __SEQLOCK_UNLOCKED(x) | ||
| 53 | |||
| 54 | /* Lock out other writers and update the count. | ||
| 55 | * Acts like a normal spin_lock/unlock. | ||
| 56 | * Don't need preempt_disable() because that is in the spin_lock already. | ||
| 57 | */ | ||
| 58 | static inline void write_seqlock(seqlock_t *sl) | ||
| 59 | { | ||
| 60 | spin_lock(&sl->lock); | ||
| 61 | ++sl->sequence; | ||
| 62 | smp_wmb(); | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void write_sequnlock(seqlock_t *sl) | ||
| 66 | { | ||
| 67 | smp_wmb(); | ||
| 68 | sl->sequence++; | ||
| 69 | spin_unlock(&sl->lock); | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline int write_tryseqlock(seqlock_t *sl) | ||
| 73 | { | ||
| 74 | int ret = spin_trylock(&sl->lock); | ||
| 75 | |||
| 76 | if (ret) { | ||
| 77 | ++sl->sequence; | ||
| 78 | smp_wmb(); | ||
| 79 | } | ||
| 80 | return ret; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* Start of read calculation -- fetch last complete writer token */ | ||
| 84 | static __always_inline unsigned read_seqbegin(const seqlock_t *sl) | ||
| 85 | { | ||
| 86 | unsigned ret; | ||
| 87 | |||
| 88 | repeat: | ||
| 89 | ret = ACCESS_ONCE(sl->sequence); | ||
| 90 | if (unlikely(ret & 1)) { | ||
| 91 | cpu_relax(); | ||
| 92 | goto repeat; | ||
| 93 | } | ||
| 94 | smp_rmb(); | ||
| 95 | |||
| 96 | return ret; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Test if reader processed invalid data. | ||
| 101 | * | ||
| 102 | * If sequence value changed then writer changed data while in section. | ||
| 103 | */ | ||
| 104 | static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) | ||
| 105 | { | ||
| 106 | smp_rmb(); | ||
| 107 | |||
| 108 | return unlikely(sl->sequence != start); | ||
| 109 | } | ||
| 110 | |||
| 111 | |||
| 112 | /* | 33 | /* |
| 113 | * Version using sequence counter only. | 34 | * Version using sequence counter only. |
| 114 | * This can be used when code has its own mutex protecting the | 35 | * This can be used when code has its own mutex protecting the |
| 115 | * updating starting before the write_seqcountbeqin() and ending | 36 | * updating starting before the write_seqcountbeqin() and ending |
| 116 | * after the write_seqcount_end(). | 37 | * after the write_seqcount_end(). |
| 117 | */ | 38 | */ |
| 118 | |||
| 119 | typedef struct seqcount { | 39 | typedef struct seqcount { |
| 120 | unsigned sequence; | 40 | unsigned sequence; |
| 121 | } seqcount_t; | 41 | } seqcount_t; |
| @@ -218,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) | |||
| 218 | static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | 138 | static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) |
| 219 | { | 139 | { |
| 220 | smp_rmb(); | 140 | smp_rmb(); |
| 221 | |||
| 222 | return __read_seqcount_retry(s, start); | 141 | return __read_seqcount_retry(s, start); |
| 223 | } | 142 | } |
| 224 | 143 | ||
| @@ -252,31 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s) | |||
| 252 | s->sequence+=2; | 171 | s->sequence+=2; |
| 253 | } | 172 | } |
| 254 | 173 | ||
| 174 | typedef struct { | ||
| 175 | struct seqcount seqcount; | ||
| 176 | spinlock_t lock; | ||
| 177 | } seqlock_t; | ||
| 178 | |||
| 255 | /* | 179 | /* |
| 256 | * Possible sw/hw IRQ protected versions of the interfaces. | 180 | * These macros triggered gcc-3.x compile-time problems. We think these are |
| 181 | * OK now. Be cautious. | ||
| 257 | */ | 182 | */ |
| 258 | #define write_seqlock_irqsave(lock, flags) \ | 183 | #define __SEQLOCK_UNLOCKED(lockname) \ |
| 259 | do { local_irq_save(flags); write_seqlock(lock); } while (0) | 184 | { \ |
| 260 | #define write_seqlock_irq(lock) \ | 185 | .seqcount = SEQCNT_ZERO, \ |
| 261 | do { local_irq_disable(); write_seqlock(lock); } while (0) | 186 | .lock = __SPIN_LOCK_UNLOCKED(lockname) \ |
| 262 | #define write_seqlock_bh(lock) \ | 187 | } |
| 263 | do { local_bh_disable(); write_seqlock(lock); } while (0) | 188 | |
| 189 | #define seqlock_init(x) \ | ||
| 190 | do { \ | ||
| 191 | seqcount_init(&(x)->seqcount); \ | ||
| 192 | spin_lock_init(&(x)->lock); \ | ||
| 193 | } while (0) | ||
| 264 | 194 | ||
| 265 | #define write_sequnlock_irqrestore(lock, flags) \ | 195 | #define DEFINE_SEQLOCK(x) \ |
| 266 | do { write_sequnlock(lock); local_irq_restore(flags); } while(0) | 196 | seqlock_t x = __SEQLOCK_UNLOCKED(x) |
| 267 | #define write_sequnlock_irq(lock) \ | ||
| 268 | do { write_sequnlock(lock); local_irq_enable(); } while(0) | ||
| 269 | #define write_sequnlock_bh(lock) \ | ||
| 270 | do { write_sequnlock(lock); local_bh_enable(); } while(0) | ||
| 271 | 197 | ||
| 272 | #define read_seqbegin_irqsave(lock, flags) \ | 198 | /* |
| 273 | ({ local_irq_save(flags); read_seqbegin(lock); }) | 199 | * Read side functions for starting and finalizing a read side section. |
| 200 | */ | ||
| 201 | static inline unsigned read_seqbegin(const seqlock_t *sl) | ||
| 202 | { | ||
| 203 | return read_seqcount_begin(&sl->seqcount); | ||
| 204 | } | ||
| 274 | 205 | ||
| 275 | #define read_seqretry_irqrestore(lock, iv, flags) \ | 206 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) |
| 276 | ({ \ | 207 | { |
| 277 | int ret = read_seqretry(lock, iv); \ | 208 | return read_seqcount_retry(&sl->seqcount, start); |
| 278 | local_irq_restore(flags); \ | 209 | } |
| 279 | ret; \ | 210 | |
| 280 | }) | 211 | /* |
| 212 | * Lock out other writers and update the count. | ||
| 213 | * Acts like a normal spin_lock/unlock. | ||
| 214 | * Don't need preempt_disable() because that is in the spin_lock already. | ||
| 215 | */ | ||
| 216 | static inline void write_seqlock(seqlock_t *sl) | ||
| 217 | { | ||
| 218 | spin_lock(&sl->lock); | ||
| 219 | write_seqcount_begin(&sl->seqcount); | ||
| 220 | } | ||
| 221 | |||
| 222 | static inline void write_sequnlock(seqlock_t *sl) | ||
| 223 | { | ||
| 224 | write_seqcount_end(&sl->seqcount); | ||
| 225 | spin_unlock(&sl->lock); | ||
| 226 | } | ||
| 227 | |||
| 228 | static inline void write_seqlock_bh(seqlock_t *sl) | ||
| 229 | { | ||
| 230 | spin_lock_bh(&sl->lock); | ||
| 231 | write_seqcount_begin(&sl->seqcount); | ||
| 232 | } | ||
| 233 | |||
| 234 | static inline void write_sequnlock_bh(seqlock_t *sl) | ||
| 235 | { | ||
| 236 | write_seqcount_end(&sl->seqcount); | ||
| 237 | spin_unlock_bh(&sl->lock); | ||
| 238 | } | ||
| 239 | |||
| 240 | static inline void write_seqlock_irq(seqlock_t *sl) | ||
| 241 | { | ||
| 242 | spin_lock_irq(&sl->lock); | ||
| 243 | write_seqcount_begin(&sl->seqcount); | ||
| 244 | } | ||
| 245 | |||
| 246 | static inline void write_sequnlock_irq(seqlock_t *sl) | ||
| 247 | { | ||
| 248 | write_seqcount_end(&sl->seqcount); | ||
| 249 | spin_unlock_irq(&sl->lock); | ||
| 250 | } | ||
| 251 | |||
| 252 | static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | ||
| 253 | { | ||
| 254 | unsigned long flags; | ||
| 255 | |||
| 256 | spin_lock_irqsave(&sl->lock, flags); | ||
| 257 | write_seqcount_begin(&sl->seqcount); | ||
| 258 | return flags; | ||
| 259 | } | ||
| 260 | |||
| 261 | #define write_seqlock_irqsave(lock, flags) \ | ||
| 262 | do { flags = __write_seqlock_irqsave(lock); } while (0) | ||
| 263 | |||
| 264 | static inline void | ||
| 265 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | ||
| 266 | { | ||
| 267 | write_seqcount_end(&sl->seqcount); | ||
| 268 | spin_unlock_irqrestore(&sl->lock, flags); | ||
| 269 | } | ||
| 281 | 270 | ||
| 282 | #endif /* __LINUX_SEQLOCK_H */ | 271 | #endif /* __LINUX_SEQLOCK_H */ |
diff --git a/kernel/futex.c b/kernel/futex.c index 9618b6e9fb36..fbc07a29ec53 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
| 2472 | if (!futex_cmpxchg_enabled) | 2472 | if (!futex_cmpxchg_enabled) |
| 2473 | return -ENOSYS; | 2473 | return -ENOSYS; |
| 2474 | 2474 | ||
| 2475 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
| 2476 | |||
| 2477 | rcu_read_lock(); | 2475 | rcu_read_lock(); |
| 2478 | 2476 | ||
| 2479 | ret = -ESRCH; | 2477 | ret = -ESRCH; |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 83e368b005fc..a9642d528630 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -142,8 +142,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
| 142 | if (!futex_cmpxchg_enabled) | 142 | if (!futex_cmpxchg_enabled) |
| 143 | return -ENOSYS; | 143 | return -ENOSYS; |
| 144 | 144 | ||
| 145 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
| 146 | |||
| 147 | rcu_read_lock(); | 145 | rcu_read_lock(); |
| 148 | 146 | ||
| 149 | ret = -ESRCH; | 147 | ret = -ESRCH; |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 7981e5b2350d..8a0efac4f99d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3190 | #endif | 3190 | #endif |
| 3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { | 3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { |
| 3192 | debug_locks_off(); | 3192 | debug_locks_off(); |
| 3193 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); | 3193 | printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", |
| 3194 | curr->lockdep_depth, MAX_LOCK_DEPTH); | ||
| 3194 | printk("turning off the locking correctness validator.\n"); | 3195 | printk("turning off the locking correctness validator.\n"); |
| 3196 | |||
| 3197 | lockdep_print_held_locks(current); | ||
| 3198 | debug_show_all_locks(); | ||
| 3195 | dump_stack(); | 3199 | dump_stack(); |
| 3200 | |||
| 3196 | return 0; | 3201 | return 0; |
| 3197 | } | 3202 | } |
| 3198 | 3203 | ||
| @@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3203 | } | 3208 | } |
| 3204 | 3209 | ||
| 3205 | static int | 3210 | static int |
| 3206 | print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | 3211 | print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, |
| 3207 | unsigned long ip) | 3212 | unsigned long ip) |
| 3208 | { | 3213 | { |
| 3209 | if (!debug_locks_off()) | 3214 | if (!debug_locks_off()) |
| @@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, | |||
| 3246 | return 0; | 3251 | return 0; |
| 3247 | 3252 | ||
| 3248 | if (curr->lockdep_depth <= 0) | 3253 | if (curr->lockdep_depth <= 0) |
| 3249 | return print_unlock_inbalance_bug(curr, lock, ip); | 3254 | return print_unlock_imbalance_bug(curr, lock, ip); |
| 3250 | 3255 | ||
| 3251 | return 1; | 3256 | return 1; |
| 3252 | } | 3257 | } |
| @@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, | |||
| 3317 | goto found_it; | 3322 | goto found_it; |
| 3318 | prev_hlock = hlock; | 3323 | prev_hlock = hlock; |
| 3319 | } | 3324 | } |
| 3320 | return print_unlock_inbalance_bug(curr, lock, ip); | 3325 | return print_unlock_imbalance_bug(curr, lock, ip); |
| 3321 | 3326 | ||
| 3322 | found_it: | 3327 | found_it: |
| 3323 | lockdep_init_map(lock, name, key, 0); | 3328 | lockdep_init_map(lock, name, key, 0); |
| @@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr, | |||
| 3384 | goto found_it; | 3389 | goto found_it; |
| 3385 | prev_hlock = hlock; | 3390 | prev_hlock = hlock; |
| 3386 | } | 3391 | } |
| 3387 | return print_unlock_inbalance_bug(curr, lock, ip); | 3392 | return print_unlock_imbalance_bug(curr, lock, ip); |
| 3388 | 3393 | ||
| 3389 | found_it: | 3394 | found_it: |
| 3390 | if (hlock->instance == lock) | 3395 | if (hlock->instance == lock) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b10a42bb0165..072bb066bb7d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | * NTP timekeeping variables: | 23 | * NTP timekeeping variables: |
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | DEFINE_SPINLOCK(ntp_lock); | 26 | DEFINE_RAW_SPINLOCK(ntp_lock); |
| 27 | 27 | ||
| 28 | 28 | ||
| 29 | /* USER_HZ period (usecs): */ | 29 | /* USER_HZ period (usecs): */ |
| @@ -348,7 +348,7 @@ void ntp_clear(void) | |||
| 348 | { | 348 | { |
| 349 | unsigned long flags; | 349 | unsigned long flags; |
| 350 | 350 | ||
| 351 | spin_lock_irqsave(&ntp_lock, flags); | 351 | raw_spin_lock_irqsave(&ntp_lock, flags); |
| 352 | 352 | ||
| 353 | time_adjust = 0; /* stop active adjtime() */ | 353 | time_adjust = 0; /* stop active adjtime() */ |
| 354 | time_status |= STA_UNSYNC; | 354 | time_status |= STA_UNSYNC; |
| @@ -362,7 +362,7 @@ void ntp_clear(void) | |||
| 362 | 362 | ||
| 363 | /* Clear PPS state variables */ | 363 | /* Clear PPS state variables */ |
| 364 | pps_clear(); | 364 | pps_clear(); |
| 365 | spin_unlock_irqrestore(&ntp_lock, flags); | 365 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 366 | 366 | ||
| 367 | } | 367 | } |
| 368 | 368 | ||
| @@ -372,9 +372,9 @@ u64 ntp_tick_length(void) | |||
| 372 | unsigned long flags; | 372 | unsigned long flags; |
| 373 | s64 ret; | 373 | s64 ret; |
| 374 | 374 | ||
| 375 | spin_lock_irqsave(&ntp_lock, flags); | 375 | raw_spin_lock_irqsave(&ntp_lock, flags); |
| 376 | ret = tick_length; | 376 | ret = tick_length; |
| 377 | spin_unlock_irqrestore(&ntp_lock, flags); | 377 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 378 | return ret; | 378 | return ret; |
| 379 | } | 379 | } |
| 380 | 380 | ||
| @@ -395,7 +395,7 @@ int second_overflow(unsigned long secs) | |||
| 395 | int leap = 0; | 395 | int leap = 0; |
| 396 | unsigned long flags; | 396 | unsigned long flags; |
| 397 | 397 | ||
| 398 | spin_lock_irqsave(&ntp_lock, flags); | 398 | raw_spin_lock_irqsave(&ntp_lock, flags); |
| 399 | 399 | ||
| 400 | /* | 400 | /* |
| 401 | * Leap second processing. If in leap-insert state at the end of the | 401 | * Leap second processing. If in leap-insert state at the end of the |
| @@ -479,7 +479,7 @@ int second_overflow(unsigned long secs) | |||
| 479 | time_adjust = 0; | 479 | time_adjust = 0; |
| 480 | 480 | ||
| 481 | out: | 481 | out: |
| 482 | spin_unlock_irqrestore(&ntp_lock, flags); | 482 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 483 | 483 | ||
| 484 | return leap; | 484 | return leap; |
| 485 | } | 485 | } |
| @@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc) | |||
| 672 | 672 | ||
| 673 | getnstimeofday(&ts); | 673 | getnstimeofday(&ts); |
| 674 | 674 | ||
| 675 | spin_lock_irq(&ntp_lock); | 675 | raw_spin_lock_irq(&ntp_lock); |
| 676 | 676 | ||
| 677 | if (txc->modes & ADJ_ADJTIME) { | 677 | if (txc->modes & ADJ_ADJTIME) { |
| 678 | long save_adjust = time_adjust; | 678 | long save_adjust = time_adjust; |
| @@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc) | |||
| 714 | /* fill PPS status fields */ | 714 | /* fill PPS status fields */ |
| 715 | pps_fill_timex(txc); | 715 | pps_fill_timex(txc); |
| 716 | 716 | ||
| 717 | spin_unlock_irq(&ntp_lock); | 717 | raw_spin_unlock_irq(&ntp_lock); |
| 718 | 718 | ||
| 719 | txc->time.tv_sec = ts.tv_sec; | 719 | txc->time.tv_sec = ts.tv_sec; |
| 720 | txc->time.tv_usec = ts.tv_nsec; | 720 | txc->time.tv_usec = ts.tv_nsec; |
| @@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 912 | 912 | ||
| 913 | pts_norm = pps_normalize_ts(*phase_ts); | 913 | pts_norm = pps_normalize_ts(*phase_ts); |
| 914 | 914 | ||
| 915 | spin_lock_irqsave(&ntp_lock, flags); | 915 | raw_spin_lock_irqsave(&ntp_lock, flags); |
| 916 | 916 | ||
| 917 | /* clear the error bits, they will be set again if needed */ | 917 | /* clear the error bits, they will be set again if needed */ |
| 918 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); | 918 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); |
| @@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 925 | * just start the frequency interval */ | 925 | * just start the frequency interval */ |
| 926 | if (unlikely(pps_fbase.tv_sec == 0)) { | 926 | if (unlikely(pps_fbase.tv_sec == 0)) { |
| 927 | pps_fbase = *raw_ts; | 927 | pps_fbase = *raw_ts; |
| 928 | spin_unlock_irqrestore(&ntp_lock, flags); | 928 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 929 | return; | 929 | return; |
| 930 | } | 930 | } |
| 931 | 931 | ||
| @@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 940 | time_status |= STA_PPSJITTER; | 940 | time_status |= STA_PPSJITTER; |
| 941 | /* restart the frequency calibration interval */ | 941 | /* restart the frequency calibration interval */ |
| 942 | pps_fbase = *raw_ts; | 942 | pps_fbase = *raw_ts; |
| 943 | spin_unlock_irqrestore(&ntp_lock, flags); | 943 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 944 | pr_err("hardpps: PPSJITTER: bad pulse\n"); | 944 | pr_err("hardpps: PPSJITTER: bad pulse\n"); |
| 945 | return; | 945 | return; |
| 946 | } | 946 | } |
| @@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 957 | 957 | ||
| 958 | hardpps_update_phase(pts_norm.nsec); | 958 | hardpps_update_phase(pts_norm.nsec); |
| 959 | 959 | ||
| 960 | spin_unlock_irqrestore(&ntp_lock, flags); | 960 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
| 961 | } | 961 | } |
| 962 | EXPORT_SYMBOL(hardpps); | 962 | EXPORT_SYMBOL(hardpps); |
| 963 | 963 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 27689422aa92..4a944676358e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -113,9 +113,9 @@ static int get_softlockup_thresh(void) | |||
| 113 | * resolution, and we don't need to waste time with a big divide when | 113 | * resolution, and we don't need to waste time with a big divide when |
| 114 | * 2^30ns == 1.074s. | 114 | * 2^30ns == 1.074s. |
| 115 | */ | 115 | */ |
| 116 | static unsigned long get_timestamp(int this_cpu) | 116 | static unsigned long get_timestamp(void) |
| 117 | { | 117 | { |
| 118 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | 118 | return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ |
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | static void set_sample_period(void) | 121 | static void set_sample_period(void) |
| @@ -133,9 +133,7 @@ static void set_sample_period(void) | |||
| 133 | /* Commands for resetting the watchdog */ | 133 | /* Commands for resetting the watchdog */ |
| 134 | static void __touch_watchdog(void) | 134 | static void __touch_watchdog(void) |
| 135 | { | 135 | { |
| 136 | int this_cpu = smp_processor_id(); | 136 | __this_cpu_write(watchdog_touch_ts, get_timestamp()); |
| 137 | |||
| 138 | __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); | ||
| 139 | } | 137 | } |
| 140 | 138 | ||
| 141 | void touch_softlockup_watchdog(void) | 139 | void touch_softlockup_watchdog(void) |
| @@ -196,7 +194,7 @@ static int is_hardlockup(void) | |||
| 196 | 194 | ||
| 197 | static int is_softlockup(unsigned long touch_ts) | 195 | static int is_softlockup(unsigned long touch_ts) |
| 198 | { | 196 | { |
| 199 | unsigned long now = get_timestamp(smp_processor_id()); | 197 | unsigned long now = get_timestamp(); |
| 200 | 198 | ||
| 201 | /* Warn about unreasonable delays: */ | 199 | /* Warn about unreasonable delays: */ |
| 202 | if (time_after(now, touch_ts + get_softlockup_thresh())) | 200 | if (time_after(now, touch_ts + get_softlockup_thresh())) |
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7aae0f2a5e0a..c3eb261a7df3 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c | |||
| @@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); | |||
| 47 | * Normal standalone locks, for the circular and irq-context | 47 | * Normal standalone locks, for the circular and irq-context |
| 48 | * dependency tests: | 48 | * dependency tests: |
| 49 | */ | 49 | */ |
| 50 | static DEFINE_SPINLOCK(lock_A); | 50 | static DEFINE_RAW_SPINLOCK(lock_A); |
| 51 | static DEFINE_SPINLOCK(lock_B); | 51 | static DEFINE_RAW_SPINLOCK(lock_B); |
| 52 | static DEFINE_SPINLOCK(lock_C); | 52 | static DEFINE_RAW_SPINLOCK(lock_C); |
| 53 | static DEFINE_SPINLOCK(lock_D); | 53 | static DEFINE_RAW_SPINLOCK(lock_D); |
| 54 | 54 | ||
| 55 | static DEFINE_RWLOCK(rwlock_A); | 55 | static DEFINE_RWLOCK(rwlock_A); |
| 56 | static DEFINE_RWLOCK(rwlock_B); | 56 | static DEFINE_RWLOCK(rwlock_B); |
| @@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D); | |||
| 73 | * but X* and Y* are different classes. We do this so that | 73 | * but X* and Y* are different classes. We do this so that |
| 74 | * we do not trigger a real lockup: | 74 | * we do not trigger a real lockup: |
| 75 | */ | 75 | */ |
| 76 | static DEFINE_SPINLOCK(lock_X1); | 76 | static DEFINE_RAW_SPINLOCK(lock_X1); |
| 77 | static DEFINE_SPINLOCK(lock_X2); | 77 | static DEFINE_RAW_SPINLOCK(lock_X2); |
| 78 | static DEFINE_SPINLOCK(lock_Y1); | 78 | static DEFINE_RAW_SPINLOCK(lock_Y1); |
| 79 | static DEFINE_SPINLOCK(lock_Y2); | 79 | static DEFINE_RAW_SPINLOCK(lock_Y2); |
| 80 | static DEFINE_SPINLOCK(lock_Z1); | 80 | static DEFINE_RAW_SPINLOCK(lock_Z1); |
| 81 | static DEFINE_SPINLOCK(lock_Z2); | 81 | static DEFINE_RAW_SPINLOCK(lock_Z2); |
| 82 | 82 | ||
| 83 | static DEFINE_RWLOCK(rwlock_X1); | 83 | static DEFINE_RWLOCK(rwlock_X1); |
| 84 | static DEFINE_RWLOCK(rwlock_X2); | 84 | static DEFINE_RWLOCK(rwlock_X2); |
| @@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2); | |||
| 107 | */ | 107 | */ |
| 108 | #define INIT_CLASS_FUNC(class) \ | 108 | #define INIT_CLASS_FUNC(class) \ |
| 109 | static noinline void \ | 109 | static noinline void \ |
| 110 | init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ | 110 | init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ |
| 111 | struct rw_semaphore *rwsem) \ | 111 | struct mutex *mutex, struct rw_semaphore *rwsem)\ |
| 112 | { \ | 112 | { \ |
| 113 | spin_lock_init(lock); \ | 113 | raw_spin_lock_init(lock); \ |
| 114 | rwlock_init(rwlock); \ | 114 | rwlock_init(rwlock); \ |
| 115 | mutex_init(mutex); \ | 115 | mutex_init(mutex); \ |
| 116 | init_rwsem(rwsem); \ | 116 | init_rwsem(rwsem); \ |
| @@ -168,10 +168,10 @@ static void init_shared_classes(void) | |||
| 168 | * Shortcuts for lock/unlock API variants, to keep | 168 | * Shortcuts for lock/unlock API variants, to keep |
| 169 | * the testcases compact: | 169 | * the testcases compact: |
| 170 | */ | 170 | */ |
| 171 | #define L(x) spin_lock(&lock_##x) | 171 | #define L(x) raw_spin_lock(&lock_##x) |
| 172 | #define U(x) spin_unlock(&lock_##x) | 172 | #define U(x) raw_spin_unlock(&lock_##x) |
| 173 | #define LU(x) L(x); U(x) | 173 | #define LU(x) L(x); U(x) |
| 174 | #define SI(x) spin_lock_init(&lock_##x) | 174 | #define SI(x) raw_spin_lock_init(&lock_##x) |
| 175 | 175 | ||
| 176 | #define WL(x) write_lock(&rwlock_##x) | 176 | #define WL(x) write_lock(&rwlock_##x) |
| 177 | #define WU(x) write_unlock(&rwlock_##x) | 177 | #define WU(x) write_unlock(&rwlock_##x) |
| @@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |||
| 911 | 911 | ||
| 912 | #define I2(x) \ | 912 | #define I2(x) \ |
| 913 | do { \ | 913 | do { \ |
| 914 | spin_lock_init(&lock_##x); \ | 914 | raw_spin_lock_init(&lock_##x); \ |
| 915 | rwlock_init(&rwlock_##x); \ | 915 | rwlock_init(&rwlock_##x); \ |
| 916 | mutex_init(&mutex_##x); \ | 916 | mutex_init(&mutex_##x); \ |
| 917 | init_rwsem(&rwsem_##x); \ | 917 | init_rwsem(&rwsem_##x); \ |
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 7e0d6a58fc83..7542afbb22b3 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c | |||
| @@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | |||
| 73 | goto dont_wake_writers; | 73 | goto dont_wake_writers; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | /* if we are allowed to wake writers try to grant a single write lock | 76 | /* |
| 77 | * if there's a writer at the front of the queue | 77 | * as we support write lock stealing, we can't set sem->activity |
| 78 | * - we leave the 'waiting count' incremented to signify potential | 78 | * to -1 here to indicate we get the lock. Instead, we wake it up |
| 79 | * contention | 79 | * to let it go get it again. |
| 80 | */ | 80 | */ |
| 81 | if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { | 81 | if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { |
| 82 | sem->activity = -1; | 82 | wake_up_process(waiter->task); |
| 83 | list_del(&waiter->list); | ||
| 84 | tsk = waiter->task; | ||
| 85 | /* Don't touch waiter after ->task has been NULLed */ | ||
| 86 | smp_mb(); | ||
| 87 | waiter->task = NULL; | ||
| 88 | wake_up_process(tsk); | ||
| 89 | put_task_struct(tsk); | ||
| 90 | goto out; | 83 | goto out; |
| 91 | } | 84 | } |
| 92 | 85 | ||
| @@ -121,18 +114,10 @@ static inline struct rw_semaphore * | |||
| 121 | __rwsem_wake_one_writer(struct rw_semaphore *sem) | 114 | __rwsem_wake_one_writer(struct rw_semaphore *sem) |
| 122 | { | 115 | { |
| 123 | struct rwsem_waiter *waiter; | 116 | struct rwsem_waiter *waiter; |
| 124 | struct task_struct *tsk; | ||
| 125 | |||
| 126 | sem->activity = -1; | ||
| 127 | 117 | ||
| 128 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 118 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
| 129 | list_del(&waiter->list); | 119 | wake_up_process(waiter->task); |
| 130 | 120 | ||
| 131 | tsk = waiter->task; | ||
| 132 | smp_mb(); | ||
| 133 | waiter->task = NULL; | ||
| 134 | wake_up_process(tsk); | ||
| 135 | put_task_struct(tsk); | ||
| 136 | return sem; | 121 | return sem; |
| 137 | } | 122 | } |
| 138 | 123 | ||
| @@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem) | |||
| 204 | 189 | ||
| 205 | /* | 190 | /* |
| 206 | * get a write lock on the semaphore | 191 | * get a write lock on the semaphore |
| 207 | * - we increment the waiting count anyway to indicate an exclusive lock | ||
| 208 | */ | 192 | */ |
| 209 | void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | 193 | void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) |
| 210 | { | 194 | { |
| @@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 214 | 198 | ||
| 215 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 199 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 216 | 200 | ||
| 217 | if (sem->activity == 0 && list_empty(&sem->wait_list)) { | ||
| 218 | /* granted */ | ||
| 219 | sem->activity = -1; | ||
| 220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 221 | goto out; | ||
| 222 | } | ||
| 223 | |||
| 224 | tsk = current; | ||
| 225 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
| 226 | |||
| 227 | /* set up my own style of waitqueue */ | 201 | /* set up my own style of waitqueue */ |
| 202 | tsk = current; | ||
| 228 | waiter.task = tsk; | 203 | waiter.task = tsk; |
| 229 | waiter.flags = RWSEM_WAITING_FOR_WRITE; | 204 | waiter.flags = RWSEM_WAITING_FOR_WRITE; |
| 230 | get_task_struct(tsk); | ||
| 231 | |||
| 232 | list_add_tail(&waiter.list, &sem->wait_list); | 205 | list_add_tail(&waiter.list, &sem->wait_list); |
| 233 | 206 | ||
| 234 | /* we don't need to touch the semaphore struct anymore */ | 207 | /* wait for someone to release the lock */ |
| 235 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 236 | |||
| 237 | /* wait to be given the lock */ | ||
| 238 | for (;;) { | 208 | for (;;) { |
| 239 | if (!waiter.task) | 209 | /* |
| 210 | * That is the key to support write lock stealing: allows the | ||
| 211 | * task already on CPU to get the lock soon rather than put | ||
| 212 | * itself into sleep and waiting for system woke it or someone | ||
| 213 | * else in the head of the wait list up. | ||
| 214 | */ | ||
| 215 | if (sem->activity == 0) | ||
| 240 | break; | 216 | break; |
| 241 | schedule(); | ||
| 242 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 217 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 218 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 219 | schedule(); | ||
| 220 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
| 243 | } | 221 | } |
| 222 | /* got the lock */ | ||
| 223 | sem->activity = -1; | ||
| 224 | list_del(&waiter.list); | ||
| 244 | 225 | ||
| 245 | tsk->state = TASK_RUNNING; | 226 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 246 | out: | ||
| 247 | ; | ||
| 248 | } | 227 | } |
| 249 | 228 | ||
| 250 | void __sched __down_write(struct rw_semaphore *sem) | 229 | void __sched __down_write(struct rw_semaphore *sem) |
| @@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem) | |||
| 262 | 241 | ||
| 263 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 242 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 264 | 243 | ||
| 265 | if (sem->activity == 0 && list_empty(&sem->wait_list)) { | 244 | if (sem->activity == 0) { |
| 266 | /* granted */ | 245 | /* got the lock */ |
| 267 | sem->activity = -1; | 246 | sem->activity = -1; |
| 268 | ret = 1; | 247 | ret = 1; |
| 269 | } | 248 | } |
diff --git a/lib/rwsem.c b/lib/rwsem.c index 8337e1b9bb8d..ad5e0df16ab4 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | * | 2 | * |
| 3 | * Written by David Howells (dhowells@redhat.com). | 3 | * Written by David Howells (dhowells@redhat.com). |
| 4 | * Derived from arch/i386/kernel/semaphore.c | 4 | * Derived from arch/i386/kernel/semaphore.c |
| 5 | * | ||
| 6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | ||
| 5 | */ | 7 | */ |
| 6 | #include <linux/rwsem.h> | 8 | #include <linux/rwsem.h> |
| 7 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
| @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |||
| 60 | struct rwsem_waiter *waiter; | 62 | struct rwsem_waiter *waiter; |
| 61 | struct task_struct *tsk; | 63 | struct task_struct *tsk; |
| 62 | struct list_head *next; | 64 | struct list_head *next; |
| 63 | signed long oldcount, woken, loop, adjustment; | 65 | signed long woken, loop, adjustment; |
| 64 | 66 | ||
| 65 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 67 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
| 66 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) | 68 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) |
| @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |||
| 72 | */ | 74 | */ |
| 73 | goto out; | 75 | goto out; |
| 74 | 76 | ||
| 75 | /* There's a writer at the front of the queue - try to grant it the | 77 | /* Wake up the writing waiter and let the task grab the sem: */ |
| 76 | * write lock. However, we only wake this writer if we can transition | 78 | wake_up_process(waiter->task); |
| 77 | * the active part of the count from 0 -> 1 | ||
| 78 | */ | ||
| 79 | adjustment = RWSEM_ACTIVE_WRITE_BIAS; | ||
| 80 | if (waiter->list.next == &sem->wait_list) | ||
| 81 | adjustment -= RWSEM_WAITING_BIAS; | ||
| 82 | |||
| 83 | try_again_write: | ||
| 84 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | ||
| 85 | if (oldcount & RWSEM_ACTIVE_MASK) | ||
| 86 | /* Someone grabbed the sem already */ | ||
| 87 | goto undo_write; | ||
| 88 | |||
| 89 | /* We must be careful not to touch 'waiter' after we set ->task = NULL. | ||
| 90 | * It is an allocated on the waiter's stack and may become invalid at | ||
| 91 | * any time after that point (due to a wakeup from another source). | ||
| 92 | */ | ||
| 93 | list_del(&waiter->list); | ||
| 94 | tsk = waiter->task; | ||
| 95 | smp_mb(); | ||
| 96 | waiter->task = NULL; | ||
| 97 | wake_up_process(tsk); | ||
| 98 | put_task_struct(tsk); | ||
| 99 | goto out; | 79 | goto out; |
| 100 | 80 | ||
| 101 | readers_only: | 81 | readers_only: |
| @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |||
| 157 | 137 | ||
| 158 | out: | 138 | out: |
| 159 | return sem; | 139 | return sem; |
| 140 | } | ||
| 141 | |||
| 142 | /* Try to get write sem, caller holds sem->wait_lock: */ | ||
| 143 | static int try_get_writer_sem(struct rw_semaphore *sem, | ||
| 144 | struct rwsem_waiter *waiter) | ||
| 145 | { | ||
| 146 | struct rwsem_waiter *fwaiter; | ||
| 147 | long oldcount, adjustment; | ||
| 160 | 148 | ||
| 161 | /* undo the change to the active count, but check for a transition | 149 | /* only steal when first waiter is writing */ |
| 162 | * 1->0 */ | 150 | fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
| 163 | undo_write: | 151 | if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) |
| 152 | return 0; | ||
| 153 | |||
| 154 | adjustment = RWSEM_ACTIVE_WRITE_BIAS; | ||
| 155 | /* Only one waiter in the queue: */ | ||
| 156 | if (fwaiter == waiter && waiter->list.next == &sem->wait_list) | ||
| 157 | adjustment -= RWSEM_WAITING_BIAS; | ||
| 158 | |||
| 159 | try_again_write: | ||
| 160 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | ||
| 161 | if (!(oldcount & RWSEM_ACTIVE_MASK)) { | ||
| 162 | /* No active lock: */ | ||
| 163 | struct task_struct *tsk = waiter->task; | ||
| 164 | |||
| 165 | list_del(&waiter->list); | ||
| 166 | smp_mb(); | ||
| 167 | put_task_struct(tsk); | ||
| 168 | tsk->state = TASK_RUNNING; | ||
| 169 | return 1; | ||
| 170 | } | ||
| 171 | /* some one grabbed the sem already */ | ||
| 164 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) | 172 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
| 165 | goto out; | 173 | return 0; |
| 166 | goto try_again_write; | 174 | goto try_again_write; |
| 167 | } | 175 | } |
| 168 | 176 | ||
| @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem, | |||
| 210 | for (;;) { | 218 | for (;;) { |
| 211 | if (!waiter.task) | 219 | if (!waiter.task) |
| 212 | break; | 220 | break; |
| 221 | |||
| 222 | raw_spin_lock_irq(&sem->wait_lock); | ||
| 223 | /* Try to get the writer sem, may steal from the head writer: */ | ||
| 224 | if (flags == RWSEM_WAITING_FOR_WRITE) | ||
| 225 | if (try_get_writer_sem(sem, &waiter)) { | ||
| 226 | raw_spin_unlock_irq(&sem->wait_lock); | ||
| 227 | return sem; | ||
| 228 | } | ||
| 229 | raw_spin_unlock_irq(&sem->wait_lock); | ||
| 213 | schedule(); | 230 | schedule(); |
| 214 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 231 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 215 | } | 232 | } |
