aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/boot/Makefile23
-rw-r--r--arch/x86/boot/header.S29
-rw-r--r--arch/x86/boot/pm.c44
-rw-r--r--arch/x86/boot/tools/build.c9
-rw-r--r--arch/x86/include/asm/boot.h4
-rwxr-xr-xarch/x86/include/asm/cpu_debug.h193
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/include/asm/linkage.h13
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rwxr-xr-xarch/x86/kernel/cpu/cpu_debug.c785
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c4
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c17
-rw-r--r--arch/x86/kernel/machine_kexec_64.c99
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S24
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S189
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
-rw-r--r--arch/x86/lib/memcpy_64.S143
-rw-r--r--arch/x86/mm/highmem_32.c19
-rw-r--r--arch/x86/mm/iomap_32.c13
-rw-r--r--arch/x86/mm/kmmio.c2
28 files changed, 1408 insertions, 269 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0a638b4199a..1a3150570785 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -933,6 +933,12 @@ config X86_CPUID
933 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 933 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
934 /dev/cpu/31/cpuid. 934 /dev/cpu/31/cpuid.
935 935
936config X86_CPU_DEBUG
937 tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
938 ---help---
939 If you select this option, this will provide various x86 CPUs
940 information through debugfs.
941
936choice 942choice
937 prompt "High Memory Support" 943 prompt "High Memory Support"
938 default HIGHMEM4G if !X86_NUMAQ 944 default HIGHMEM4G if !X86_NUMAQ
@@ -1433,7 +1439,7 @@ config CRASH_DUMP
1433config KEXEC_JUMP 1439config KEXEC_JUMP
1434 bool "kexec jump (EXPERIMENTAL)" 1440 bool "kexec jump (EXPERIMENTAL)"
1435 depends on EXPERIMENTAL 1441 depends on EXPERIMENTAL
1436 depends on KEXEC && HIBERNATION && X86_32 1442 depends on KEXEC && HIBERNATION
1437 ---help--- 1443 ---help---
1438 Jump between original kernel and kexeced kernel and invoke 1444 Jump between original kernel and kexeced kernel and invoke
1439 code in physical address mode via KEXEC 1445 code in physical address mode via KEXEC
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c70eff69a1fb..57a29fecf6bb 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -6,26 +6,23 @@
6# for more details. 6# for more details.
7# 7#
8# Copyright (C) 1994 by Linus Torvalds 8# Copyright (C) 1994 by Linus Torvalds
9# Changed by many, many contributors over the years.
9# 10#
10 11
11# ROOT_DEV specifies the default root-device when making the image. 12# ROOT_DEV specifies the default root-device when making the image.
12# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case 13# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
13# the default of FLOPPY is used by 'build'. 14# the default of FLOPPY is used by 'build'.
14 15
15ROOT_DEV := CURRENT 16ROOT_DEV := CURRENT
16 17
17# If you want to preset the SVGA mode, uncomment the next line and 18# If you want to preset the SVGA mode, uncomment the next line and
18# set SVGA_MODE to whatever number you want. 19# set SVGA_MODE to whatever number you want.
19# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. 20# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
20# The number is the same as you would ordinarily press at bootup. 21# The number is the same as you would ordinarily press at bootup.
21 22
22SVGA_MODE := -DSVGA_MODE=NORMAL_VGA 23SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
23 24
24# If you want the RAM disk device, define this to be the size in blocks. 25targets := vmlinux.bin setup.bin setup.elf bzImage
25
26#RAMDISK := -DRAMDISK=512
27
28targets := vmlinux.bin setup.bin setup.elf zImage bzImage
29subdir- := compressed 26subdir- := compressed
30 27
31setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o 28setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
@@ -71,17 +68,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
71KBUILD_CFLAGS += $(call cc-option,-m32) 68KBUILD_CFLAGS += $(call cc-option,-m32)
72KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 69KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
73 70
74$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) 71$(obj)/bzImage: asflags-y := $(SVGA_MODE)
75$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
76$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
77$(obj)/bzImage: BUILDFLAGS := -b
78 72
79quiet_cmd_image = BUILD $@ 73quiet_cmd_image = BUILD $@
80cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ 74cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
81 $(obj)/vmlinux.bin $(ROOT_DEV) > $@ 75 $(ROOT_DEV) > $@
82 76
83$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ 77$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
84 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
85 $(call if_changed,image) 78 $(call if_changed,image)
86 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 79 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
87 80
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 7ccff4884a23..5d84d1c74e4c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -24,12 +24,8 @@
24#include "boot.h" 24#include "boot.h"
25#include "offsets.h" 25#include "offsets.h"
26 26
27SETUPSECTS = 4 /* default nr of setup-sectors */
28BOOTSEG = 0x07C0 /* original address of boot-sector */ 27BOOTSEG = 0x07C0 /* original address of boot-sector */
29SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ 28SYSSEG = 0x1000 /* historical load address >> 4 */
30SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
31 /* to be loaded */
32ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
33 29
34#ifndef SVGA_MODE 30#ifndef SVGA_MODE
35#define SVGA_MODE ASK_VGA 31#define SVGA_MODE ASK_VGA
@@ -97,12 +93,12 @@ bugger_off_msg:
97 .section ".header", "a" 93 .section ".header", "a"
98 .globl hdr 94 .globl hdr
99hdr: 95hdr:
100setup_sects: .byte SETUPSECTS 96setup_sects: .byte 0 /* Filled in by build.c */
101root_flags: .word ROOT_RDONLY 97root_flags: .word ROOT_RDONLY
102syssize: .long SYSSIZE 98syssize: .long 0 /* Filled in by build.c */
103ram_size: .word RAMDISK 99ram_size: .word 0 /* Obsolete */
104vid_mode: .word SVGA_MODE 100vid_mode: .word SVGA_MODE
105root_dev: .word ROOT_DEV 101root_dev: .word 0 /* Filled in by build.c */
106boot_flag: .word 0xAA55 102boot_flag: .word 0xAA55
107 103
108 # offset 512, entry point 104 # offset 512, entry point
@@ -123,14 +119,15 @@ _start:
123 # or else old loadlin-1.5 will fail) 119 # or else old loadlin-1.5 will fail)
124 .globl realmode_swtch 120 .globl realmode_swtch
125realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 121realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
126start_sys_seg: .word SYSSEG 122start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
123 # in case something decided to "use" it
127 .word kernel_version-512 # pointing to kernel version string 124 .word kernel_version-512 # pointing to kernel version string
128 # above section of header is compatible 125 # above section of header is compatible
129 # with loadlin-1.5 (header v1.5). Don't 126 # with loadlin-1.5 (header v1.5). Don't
130 # change it. 127 # change it.
131 128
132type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, 129type_of_loader: .byte 0 # 0 means ancient bootloader, newer
133 # Bootlin, SYSLX, bootsect...) 130 # bootloaders know to change this.
134 # See Documentation/i386/boot.txt for 131 # See Documentation/i386/boot.txt for
135 # assigned ids 132 # assigned ids
136 133
@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
142 # space behind setup.S can be used for 139 # space behind setup.S can be used for
143 # heap purposes. 140 # heap purposes.
144 # Only the loader knows what is free 141 # Only the loader knows what is free
145#ifndef __BIG_KERNEL__
146 .byte 0
147#else
148 .byte LOADED_HIGH 142 .byte LOADED_HIGH
149#endif
150 143
151setup_move_size: .word 0x8000 # size to move, when setup is not 144setup_move_size: .word 0x8000 # size to move, when setup is not
152 # loaded at 0x90000. We will move setup 145 # loaded at 0x90000. We will move setup
@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
157 150
158code32_start: # here loaders can put a different 151code32_start: # here loaders can put a different
159 # start address for 32-bit code. 152 # start address for 32-bit code.
160#ifndef __BIG_KERNEL__
161 .long 0x1000 # 0x1000 = default for zImage
162#else
163 .long 0x100000 # 0x100000 = default for big kernel 153 .long 0x100000 # 0x100000 = default for big kernel
164#endif
165 154
166ramdisk_image: .long 0 # address of loaded ramdisk image 155ramdisk_image: .long 0 # address of loaded ramdisk image
167 # Here the loader puts the 32-bit 156 # Here the loader puts the 32-bit
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 85a1cd8a8ff8..8062f8915250 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -33,47 +33,6 @@ static void realmode_switch_hook(void)
33} 33}
34 34
35/* 35/*
36 * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
37 * A bzImage kernel is loaded and runs at 0x100000.
38 */
39static void move_kernel_around(void)
40{
41 /* Note: rely on the compile-time option here rather than
42 the LOADED_HIGH flag. The Qemu kernel loader unconditionally
43 sets the loadflags to zero. */
44#ifndef __BIG_KERNEL__
45 u16 dst_seg, src_seg;
46 u32 syssize;
47
48 dst_seg = 0x1000 >> 4;
49 src_seg = 0x10000 >> 4;
50 syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
51
52 while (syssize) {
53 int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
54 int dwords = paras << 2;
55
56 asm volatile("pushw %%es ; "
57 "pushw %%ds ; "
58 "movw %1,%%es ; "
59 "movw %2,%%ds ; "
60 "xorw %%di,%%di ; "
61 "xorw %%si,%%si ; "
62 "rep;movsl ; "
63 "popw %%ds ; "
64 "popw %%es"
65 : "+c" (dwords)
66 : "r" (dst_seg), "r" (src_seg)
67 : "esi", "edi");
68
69 syssize -= paras;
70 dst_seg += paras;
71 src_seg += paras;
72 }
73#endif
74}
75
76/*
77 * Disable all interrupts at the legacy PIC. 36 * Disable all interrupts at the legacy PIC.
78 */ 37 */
79static void mask_all_interrupts(void) 38static void mask_all_interrupts(void)
@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
147 /* Hook before leaving real mode, also disables interrupts */ 106 /* Hook before leaving real mode, also disables interrupts */
148 realmode_switch_hook(); 107 realmode_switch_hook();
149 108
150 /* Move the kernel/setup to their final resting places */
151 move_kernel_around();
152
153 /* Enable the A20 gate */ 109 /* Enable the A20 gate */
154 if (enable_a20()) { 110 if (enable_a20()) {
155 puts("A20 gate not responding, unable to boot...\n"); 111 puts("A20 gate not responding, unable to boot...\n");
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 44dc1923c0e3..ee3a4ea923ac 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -130,7 +130,7 @@ static void die(const char * str, ...)
130 130
131static void usage(void) 131static void usage(void)
132{ 132{
133 die("Usage: build [-b] setup system [rootdev] [> image]"); 133 die("Usage: build setup system [rootdev] [> image]");
134} 134}
135 135
136int main(int argc, char ** argv) 136int main(int argc, char ** argv)
@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
145 void *kernel; 145 void *kernel;
146 u32 crc = 0xffffffffUL; 146 u32 crc = 0xffffffffUL;
147 147
148 if (argc > 2 && !strcmp(argv[1], "-b"))
149 {
150 is_big_kernel = 1;
151 argc--, argv++;
152 }
153 if ((argc < 3) || (argc > 4)) 148 if ((argc < 3) || (argc > 4))
154 usage(); 149 usage();
155 if (argc > 3) { 150 if (argc > 3) {
@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
216 die("Unable to mmap '%s': %m", argv[2]); 211 die("Unable to mmap '%s': %m", argv[2]);
217 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ 212 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
218 sys_size = (sz + 15 + 4) / 16; 213 sys_size = (sz + 15 + 4) / 16;
219 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
220 die("System is too big. Try using bzImage or modules.");
221 214
222 /* Patch the setup code with the appropriate size parameters */ 215 /* Patch the setup code with the appropriate size parameters */
223 buf[0x1f1] = setup_sectors-1; 216 buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6526cf08b0e4..6ba23dd9fc92 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,10 +1,6 @@
1#ifndef _ASM_X86_BOOT_H 1#ifndef _ASM_X86_BOOT_H
2#define _ASM_X86_BOOT_H 2#define _ASM_X86_BOOT_H
3 3
4/* Don't touch these, unless you really know what you're doing. */
5#define DEF_SYSSEG 0x1000
6#define DEF_SYSSIZE 0x7F00
7
8/* Internal svga startup constants */ 4/* Internal svga startup constants */
9#define NORMAL_VGA 0xffff /* 80x25 mode */ 5#define NORMAL_VGA 0xffff /* 80x25 mode */
10#define EXTENDED_VGA 0xfffe /* 80x50 mode */ 6#define EXTENDED_VGA 0xfffe /* 80x50 mode */
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
new file mode 100755
index 000000000000..d24d64fcee04
--- /dev/null
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -0,0 +1,193 @@
1#ifndef _ASM_X86_CPU_DEBUG_H
2#define _ASM_X86_CPU_DEBUG_H
3
4/*
5 * CPU x86 architecture debug
6 *
7 * Copyright(C) 2009 Jaswinder Singh Rajput
8 */
9
10/* Register flags */
11enum cpu_debug_bit {
12/* Model Specific Registers (MSRs) */
13 CPU_MC_BIT, /* Machine Check */
14 CPU_MONITOR_BIT, /* Monitor */
15 CPU_TIME_BIT, /* Time */
16 CPU_PMC_BIT, /* Performance Monitor */
17 CPU_PLATFORM_BIT, /* Platform */
18 CPU_APIC_BIT, /* APIC */
19 CPU_POWERON_BIT, /* Power-on */
20 CPU_CONTROL_BIT, /* Control */
21 CPU_FEATURES_BIT, /* Features control */
22 CPU_LBRANCH_BIT, /* Last Branch */
23 CPU_BIOS_BIT, /* BIOS */
24 CPU_FREQ_BIT, /* Frequency */
25 CPU_MTTR_BIT, /* MTRR */
26 CPU_PERF_BIT, /* Performance */
27 CPU_CACHE_BIT, /* Cache */
28 CPU_SYSENTER_BIT, /* Sysenter */
29 CPU_THERM_BIT, /* Thermal */
30 CPU_MISC_BIT, /* Miscellaneous */
31 CPU_DEBUG_BIT, /* Debug */
32 CPU_PAT_BIT, /* PAT */
33 CPU_VMX_BIT, /* VMX */
34 CPU_CALL_BIT, /* System Call */
35 CPU_BASE_BIT, /* BASE Address */
36 CPU_SMM_BIT, /* System mgmt mode */
37 CPU_SVM_BIT, /*Secure Virtual Machine*/
38 CPU_OSVM_BIT, /* OS-Visible Workaround*/
39/* Standard Registers */
40 CPU_TSS_BIT, /* Task Stack Segment */
41 CPU_CR_BIT, /* Control Registers */
42 CPU_DT_BIT, /* Descriptor Table */
43/* End of Registers flags */
44 CPU_REG_ALL_BIT, /* Select all Registers */
45};
46
47#define CPU_REG_ALL (~0) /* Select all Registers */
48
49#define CPU_MC (1 << CPU_MC_BIT)
50#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
51#define CPU_TIME (1 << CPU_TIME_BIT)
52#define CPU_PMC (1 << CPU_PMC_BIT)
53#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
54#define CPU_APIC (1 << CPU_APIC_BIT)
55#define CPU_POWERON (1 << CPU_POWERON_BIT)
56#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
57#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
58#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
59#define CPU_BIOS (1 << CPU_BIOS_BIT)
60#define CPU_FREQ (1 << CPU_FREQ_BIT)
61#define CPU_MTRR (1 << CPU_MTTR_BIT)
62#define CPU_PERF (1 << CPU_PERF_BIT)
63#define CPU_CACHE (1 << CPU_CACHE_BIT)
64#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
65#define CPU_THERM (1 << CPU_THERM_BIT)
66#define CPU_MISC (1 << CPU_MISC_BIT)
67#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
68#define CPU_PAT (1 << CPU_PAT_BIT)
69#define CPU_VMX (1 << CPU_VMX_BIT)
70#define CPU_CALL (1 << CPU_CALL_BIT)
71#define CPU_BASE (1 << CPU_BASE_BIT)
72#define CPU_SMM (1 << CPU_SMM_BIT)
73#define CPU_SVM (1 << CPU_SVM_BIT)
74#define CPU_OSVM (1 << CPU_OSVM_BIT)
75#define CPU_TSS (1 << CPU_TSS_BIT)
76#define CPU_CR (1 << CPU_CR_BIT)
77#define CPU_DT (1 << CPU_DT_BIT)
78
79/* Register file flags */
80enum cpu_file_bit {
81 CPU_INDEX_BIT, /* index */
82 CPU_VALUE_BIT, /* value */
83};
84
85#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
86
87/*
88 * DisplayFamily_DisplayModel Processor Families/Processor Number Series
89 * -------------------------- ------------------------------------------
90 * 05_01, 05_02, 05_04 Pentium, Pentium with MMX
91 *
92 * 06_01 Pentium Pro
93 * 06_03, 06_05 Pentium II Xeon, Pentium II
94 * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
95 *
96 * 06_09, 060D Pentium M
97 *
98 * 06_0E Core Duo, Core Solo
99 *
100 * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
101 * Core 2 Quad, Core 2 Extreme, Core 2 Duo,
102 * Pentium dual-core
103 * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
104 *
105 * 06_1C Atom
106 *
107 * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
108 * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
109 *
110 * 0F_06 Xeon 7100, 5000 Series, Xeon MP,
111 * Pentium 4, Pentium D
112 */
113
114/* Register processors bits */
115enum cpu_processor_bit {
116 CPU_NONE,
117/* Intel */
118 CPU_INTEL_PENTIUM_BIT,
119 CPU_INTEL_P6_BIT,
120 CPU_INTEL_PENTIUM_M_BIT,
121 CPU_INTEL_CORE_BIT,
122 CPU_INTEL_CORE2_BIT,
123 CPU_INTEL_ATOM_BIT,
124 CPU_INTEL_XEON_P4_BIT,
125 CPU_INTEL_XEON_MP_BIT,
126};
127
128#define CPU_ALL (~0) /* Select all CPUs */
129
130#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
131#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
132#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
133#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
134#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
135#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
136#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
137#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
138
139#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
140#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
141#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
142#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
143#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
144#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
145#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
146#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
147#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
148#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
149#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
150#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
151#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
152#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
153#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
154#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
155#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
156#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
157#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
158
159/* Select all Intel CPUs*/
160#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
161
162#define MAX_CPU_FILES 512
163
164struct cpu_private {
165 unsigned cpu;
166 unsigned type;
167 unsigned reg;
168 unsigned file;
169};
170
171struct cpu_debug_base {
172 char *name; /* Register name */
173 unsigned flag; /* Register flag */
174};
175
176struct cpu_cpuX_base {
177 struct dentry *dentry; /* Register dentry */
178 int init; /* Register index file */
179};
180
181struct cpu_file_base {
182 char *name; /* Register file name */
183 unsigned flag; /* Register file flag */
184};
185
186struct cpu_debug_range {
187 unsigned min; /* Register range min */
188 unsigned max; /* Register range max */
189 unsigned flag; /* Supported flags */
190 unsigned model; /* Supported models */
191};
192
193#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index dc27705f5443..5623c50d67b2 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr)
91#define store_gdt(dtr) native_store_gdt(dtr) 91#define store_gdt(dtr) native_store_gdt(dtr)
92#define store_idt(dtr) native_store_idt(dtr) 92#define store_idt(dtr) native_store_idt(dtr)
93#define store_tr(tr) (tr = native_store_tr()) 93#define store_tr(tr) (tr = native_store_tr())
94#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
95 94
96#define load_TLS(t, cpu) native_load_tls(t, cpu) 95#define load_TLS(t, cpu) native_load_tls(t, cpu)
97#define set_ldt native_set_ldt 96#define set_ldt native_set_ldt
@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
112} 111}
113#endif /* CONFIG_PARAVIRT */ 112#endif /* CONFIG_PARAVIRT */
114 113
114#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
115
115static inline void native_write_idt_entry(gate_desc *idt, int entry, 116static inline void native_write_idt_entry(gate_desc *idt, int entry,
116 const gate_desc *gate) 117 const gate_desc *gate)
117{ 118{
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index bf9276bea660..014c2b85ae45 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
63void *kmap_atomic(struct page *page, enum km_type type); 63void *kmap_atomic(struct page *page, enum km_type type);
64void kunmap_atomic(void *kvaddr, enum km_type type); 64void kunmap_atomic(void *kvaddr, enum km_type type);
65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); 65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
66void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
66struct page *kmap_atomic_to_page(void *ptr); 67struct page *kmap_atomic_to_page(void *ptr);
67 68
68#ifndef CONFIG_PARAVIRT 69#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0ceb6d19ed30..317ff1703d0b 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,13 +9,13 @@
9# define PAGES_NR 4 9# define PAGES_NR 4
10#else 10#else
11# define PA_CONTROL_PAGE 0 11# define PA_CONTROL_PAGE 0
12# define PA_TABLE_PAGE 1 12# define VA_CONTROL_PAGE 1
13# define PAGES_NR 2 13# define PA_TABLE_PAGE 2
14# define PA_SWAP_PAGE 3
15# define PAGES_NR 4
14#endif 16#endif
15 17
16#ifdef CONFIG_X86_32
17# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 18# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
18#endif
19 19
20#ifndef __ASSEMBLY__ 20#ifndef __ASSEMBLY__
21 21
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
136 unsigned int has_pae, 136 unsigned int has_pae,
137 unsigned int preserve_context); 137 unsigned int preserve_context);
138#else 138#else
139NORET_TYPE void 139unsigned long
140relocate_kernel(unsigned long indirection_page, 140relocate_kernel(unsigned long indirection_page,
141 unsigned long page_list, 141 unsigned long page_list,
142 unsigned long start_address) ATTRIB_NORET; 142 unsigned long start_address,
143 unsigned int preserve_context);
143#endif 144#endif
144 145
145#define ARCH_HAS_KIMAGE_ARCH 146#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index a0d70b46c27c..12d55e773eb6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_LINKAGE_H 1#ifndef _ASM_X86_LINKAGE_H
2#define _ASM_X86_LINKAGE_H 2#define _ASM_X86_LINKAGE_H
3 3
4#include <linux/stringify.h>
5
4#undef notrace 6#undef notrace
5#define notrace __attribute__((no_instrument_function)) 7#define notrace __attribute__((no_instrument_function))
6 8
@@ -53,14 +55,9 @@
53 .globl name; \ 55 .globl name; \
54 name: 56 name:
55 57
56#ifdef CONFIG_X86_64 58#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
57#define __ALIGN .p2align 4,,15 59#define __ALIGN .p2align 4, 0x90
58#define __ALIGN_STR ".p2align 4,,15" 60#define __ALIGN_STR __stringify(__ALIGN)
59#endif
60
61#ifdef CONFIG_X86_ALIGNMENT_16
62#define __ALIGN .align 16,0x90
63#define __ALIGN_STR ".align 16,0x90"
64#endif 61#endif
65 62
66#endif /* __ASSEMBLY__ */ 63#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2de..d4356f8b7522 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o
14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
15obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
16 16
17obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
18
17obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 19obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
18obj-$(CONFIG_CPU_SUP_AMD) += amd.o 20obj-$(CONFIG_CPU_SUP_AMD) += amd.o
19obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 21obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 826d5c876278..f8869978bbb7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1078,8 +1078,7 @@ void __cpuinit cpu_init(void)
1078 1078
1079 atomic_inc(&init_mm.mm_count); 1079 atomic_inc(&init_mm.mm_count);
1080 me->active_mm = &init_mm; 1080 me->active_mm = &init_mm;
1081 if (me->mm) 1081 BUG_ON(me->mm);
1082 BUG();
1083 enter_lazy_tlb(&init_mm, me); 1082 enter_lazy_tlb(&init_mm, me);
1084 1083
1085 load_sp0(t, &current->thread); 1084 load_sp0(t, &current->thread);
@@ -1145,8 +1144,7 @@ void __cpuinit cpu_init(void)
1145 */ 1144 */
1146 atomic_inc(&init_mm.mm_count); 1145 atomic_inc(&init_mm.mm_count);
1147 curr->active_mm = &init_mm; 1146 curr->active_mm = &init_mm;
1148 if (curr->mm) 1147 BUG_ON(curr->mm);
1149 BUG();
1150 enter_lazy_tlb(&init_mm, curr); 1148 enter_lazy_tlb(&init_mm, curr);
1151 1149
1152 load_sp0(t, thread); 1150 load_sp0(t, thread);
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 000000000000..9abbcbd933cc
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,785 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/percpu.h>
17#include <linux/signal.h>
18#include <linux/errno.h>
19#include <linux/sched.h>
20#include <linux/types.h>
21#include <linux/init.h>
22#include <linux/slab.h>
23#include <linux/smp.h>
24
25#include <asm/cpu_debug.h>
26#include <asm/paravirt.h>
27#include <asm/system.h>
28#include <asm/traps.h>
29#include <asm/apic.h>
30#include <asm/desc.h>
31
32static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
33static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
34static DEFINE_PER_CPU(unsigned, cpu_modelflag);
35static DEFINE_PER_CPU(int, cpu_priv_count);
36static DEFINE_PER_CPU(unsigned, cpu_model);
37
38static DEFINE_MUTEX(cpu_debug_lock);
39
40static struct dentry *cpu_debugfs_dir;
41
42static struct cpu_debug_base cpu_base[] = {
43 { "mc", CPU_MC }, /* Machine Check */
44 { "monitor", CPU_MONITOR }, /* Monitor */
45 { "time", CPU_TIME }, /* Time */
46 { "pmc", CPU_PMC }, /* Performance Monitor */
47 { "platform", CPU_PLATFORM }, /* Platform */
48 { "apic", CPU_APIC }, /* APIC */
49 { "poweron", CPU_POWERON }, /* Power-on */
50 { "control", CPU_CONTROL }, /* Control */
51 { "features", CPU_FEATURES }, /* Features control */
52 { "lastbranch", CPU_LBRANCH }, /* Last Branch */
53 { "bios", CPU_BIOS }, /* BIOS */
54 { "freq", CPU_FREQ }, /* Frequency */
55 { "mtrr", CPU_MTRR }, /* MTRR */
56 { "perf", CPU_PERF }, /* Performance */
57 { "cache", CPU_CACHE }, /* Cache */
58 { "sysenter", CPU_SYSENTER }, /* Sysenter */
59 { "therm", CPU_THERM }, /* Thermal */
60 { "misc", CPU_MISC }, /* Miscellaneous */
61 { "debug", CPU_DEBUG }, /* Debug */
62 { "pat", CPU_PAT }, /* PAT */
63 { "vmx", CPU_VMX }, /* VMX */
64 { "call", CPU_CALL }, /* System Call */
65 { "base", CPU_BASE }, /* BASE Address */
66 { "smm", CPU_SMM }, /* System mgmt mode */
67 { "svm", CPU_SVM }, /*Secure Virtial Machine*/
68 { "osvm", CPU_OSVM }, /* OS-Visible Workaround*/
69 { "tss", CPU_TSS }, /* Task Stack Segment */
70 { "cr", CPU_CR }, /* Control Registers */
71 { "dt", CPU_DT }, /* Descriptor Table */
72 { "registers", CPU_REG_ALL }, /* Select all Registers */
73};
74
75static struct cpu_file_base cpu_file[] = {
76 { "index", CPU_REG_ALL }, /* index */
77 { "value", CPU_REG_ALL }, /* value */
78};
79
80/* Intel Registers Range */
81static struct cpu_debug_range cpu_intel_range[] = {
82 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
83 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
84 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
85 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
86 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
87 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
88
89 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
90 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
91 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
92 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
93
94 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
95 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
96 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
97 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
98
99 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
100 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
101 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
102 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
103
104 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
105 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
106 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
107 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
108
109 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
110 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
111 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
112 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
113 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
114
115 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
116 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
117 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
118 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
119 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
120 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
121 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
122 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
123
124 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
125 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
126 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
127 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
128 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
129 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
130 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
131 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
132
133 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
134 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
135 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
136 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
137 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
138 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
139
140 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
141 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
142 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
143 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
144 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
145 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
146 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
147 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
148 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
149 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
150 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
151 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
152
153 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
154 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
155 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
156 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
157 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
158 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
159 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
160 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
161 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
162 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
163 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
164
165 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
166 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
167 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
168
169 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
170
171 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
172 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
173 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
174 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
175};
176
177/* AMD Registers Range */
178static struct cpu_debug_range cpu_amd_range[] = {
179 { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, },
180 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, },
181 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, },
182
183 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, },
184 { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, },
185 { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, },
186 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, },
187 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, },
188
189 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, },
190 { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, },
191 { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, },
192 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, },
193 { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, },
194 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, },
195
196 { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, },
197
198 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, },
199 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, },
200 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, },
201 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, },
202
203 { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, },
204
205 { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, },
206 { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, },
207 { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, },
208 { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, },
209 { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, },
210 { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, },
211 { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, },
212 { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, },
213 { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, },
214 { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, },
215 { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, },
216 { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, },
217 { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, },
218};
219
220
221static int get_cpu_modelflag(unsigned cpu)
222{
223 int flag;
224
225 switch (per_cpu(cpu_model, cpu)) {
226 /* Intel */
227 case 0x0501:
228 case 0x0502:
229 case 0x0504:
230 flag = CPU_INTEL_PENTIUM;
231 break;
232 case 0x0601:
233 case 0x0603:
234 case 0x0605:
235 case 0x0607:
236 case 0x0608:
237 case 0x060A:
238 case 0x060B:
239 flag = CPU_INTEL_P6;
240 break;
241 case 0x0609:
242 case 0x060D:
243 flag = CPU_INTEL_PENTIUM_M;
244 break;
245 case 0x060E:
246 flag = CPU_INTEL_CORE;
247 break;
248 case 0x060F:
249 case 0x0617:
250 flag = CPU_INTEL_CORE2;
251 break;
252 case 0x061C:
253 flag = CPU_INTEL_ATOM;
254 break;
255 case 0x0F00:
256 case 0x0F01:
257 case 0x0F02:
258 case 0x0F03:
259 case 0x0F04:
260 flag = CPU_INTEL_XEON_P4;
261 break;
262 case 0x0F06:
263 flag = CPU_INTEL_XEON_MP;
264 break;
265 default:
266 flag = CPU_NONE;
267 break;
268 }
269
270 return flag;
271}
272
273static int get_cpu_range_count(unsigned cpu)
274{
275 int index;
276
277 switch (per_cpu(cpu_model, cpu) >> 16) {
278 case X86_VENDOR_INTEL:
279 index = ARRAY_SIZE(cpu_intel_range);
280 break;
281 case X86_VENDOR_AMD:
282 index = ARRAY_SIZE(cpu_amd_range);
283 break;
284 default:
285 index = 0;
286 break;
287 }
288
289 return index;
290}
291
292static int is_typeflag_valid(unsigned cpu, unsigned flag)
293{
294 unsigned vendor, modelflag;
295 int i, index;
296
297 /* Standard Registers should be always valid */
298 if (flag >= CPU_TSS)
299 return 1;
300
301 modelflag = per_cpu(cpu_modelflag, cpu);
302 vendor = per_cpu(cpu_model, cpu) >> 16;
303 index = get_cpu_range_count(cpu);
304
305 for (i = 0; i < index; i++) {
306 switch (vendor) {
307 case X86_VENDOR_INTEL:
308 if ((cpu_intel_range[i].model & modelflag) &&
309 (cpu_intel_range[i].flag & flag))
310 return 1;
311 break;
312 case X86_VENDOR_AMD:
313 if (cpu_amd_range[i].flag & flag)
314 return 1;
315 break;
316 }
317 }
318
319 /* Invalid */
320 return 0;
321}
322
323static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
324 int index, unsigned flag)
325{
326 unsigned modelflag;
327
328 modelflag = per_cpu(cpu_modelflag, cpu);
329 *max = 0;
330 switch (per_cpu(cpu_model, cpu) >> 16) {
331 case X86_VENDOR_INTEL:
332 if ((cpu_intel_range[index].model & modelflag) &&
333 (cpu_intel_range[index].flag & flag)) {
334 *min = cpu_intel_range[index].min;
335 *max = cpu_intel_range[index].max;
336 }
337 break;
338 case X86_VENDOR_AMD:
339 if (cpu_amd_range[index].flag & flag) {
340 *min = cpu_amd_range[index].min;
341 *max = cpu_amd_range[index].max;
342 }
343 break;
344 }
345
346 return *max;
347}
348
349/* This function can also be called with seq = NULL for printk */
350static void print_cpu_data(struct seq_file *seq, unsigned type,
351 u32 low, u32 high)
352{
353 struct cpu_private *priv;
354 u64 val = high;
355
356 if (seq) {
357 priv = seq->private;
358 if (priv->file) {
359 val = (val << 32) | low;
360 seq_printf(seq, "0x%llx\n", val);
361 } else
362 seq_printf(seq, " %08x: %08x_%08x\n",
363 type, high, low);
364 } else
365 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
366}
367
368/* This function can also be called with seq = NULL for printk */
369static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
370{
371 unsigned msr, msr_min, msr_max;
372 struct cpu_private *priv;
373 u32 low, high;
374 int i, range;
375
376 if (seq) {
377 priv = seq->private;
378 if (priv->file) {
379 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
380 &low, &high))
381 print_cpu_data(seq, priv->reg, low, high);
382 return;
383 }
384 }
385
386 range = get_cpu_range_count(cpu);
387
388 for (i = 0; i < range; i++) {
389 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
390 continue;
391
392 for (msr = msr_min; msr <= msr_max; msr++) {
393 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
394 continue;
395 print_cpu_data(seq, msr, low, high);
396 }
397 }
398}
399
400static void print_tss(void *arg)
401{
402 struct pt_regs *regs = task_pt_regs(current);
403 struct seq_file *seq = arg;
404 unsigned int seg;
405
406 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
407 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
408 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
409 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
410
411 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
412 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
413 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
414 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
415
416#ifdef CONFIG_X86_64
417 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
418 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
419 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
420 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
421 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
422 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
423 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
424 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
425#endif
426
427 asm("movl %%cs,%0" : "=r" (seg));
428 seq_printf(seq, " CS\t: %04x\n", seg);
429 asm("movl %%ds,%0" : "=r" (seg));
430 seq_printf(seq, " DS\t: %04x\n", seg);
431 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
432 asm("movl %%es,%0" : "=r" (seg));
433 seq_printf(seq, " ES\t: %04x\n", seg);
434 asm("movl %%fs,%0" : "=r" (seg));
435 seq_printf(seq, " FS\t: %04x\n", seg);
436 asm("movl %%gs,%0" : "=r" (seg));
437 seq_printf(seq, " GS\t: %04x\n", seg);
438
439 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
440
441 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
442}
443
444static void print_cr(void *arg)
445{
446 struct seq_file *seq = arg;
447
448 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
449 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
450 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
451 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
452#ifdef CONFIG_X86_64
453 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
454#endif
455}
456
457static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
458{
459 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
460}
461
462static void print_dt(void *seq)
463{
464 struct desc_ptr dt;
465 unsigned long ldt;
466
467 /* IDT */
468 store_idt((struct desc_ptr *)&dt);
469 print_desc_ptr("IDT", seq, dt);
470
471 /* GDT */
472 store_gdt((struct desc_ptr *)&dt);
473 print_desc_ptr("GDT", seq, dt);
474
475 /* LDT */
476 store_ldt(ldt);
477 seq_printf(seq, " LDT\t: %016lx\n", ldt);
478
479 /* TR */
480 store_tr(ldt);
481 seq_printf(seq, " TR\t: %016lx\n", ldt);
482}
483
484static void print_dr(void *arg)
485{
486 struct seq_file *seq = arg;
487 unsigned long dr;
488 int i;
489
490 for (i = 0; i < 8; i++) {
491 /* Ignore db4, db5 */
492 if ((i == 4) || (i == 5))
493 continue;
494 get_debugreg(dr, i);
495 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
496 }
497
498 seq_printf(seq, "\n MSR\t:\n");
499}
500
501static void print_apic(void *arg)
502{
503 struct seq_file *seq = arg;
504
505#ifdef CONFIG_X86_LOCAL_APIC
506 seq_printf(seq, " LAPIC\t:\n");
507 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
508 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
509 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
510 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
511 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
512 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
513 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
514 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
515 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
516 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
517 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
518 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
519 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
520 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
521 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
522 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
523 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
524 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
525 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
526 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
527 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
528#endif /* CONFIG_X86_LOCAL_APIC */
529
530 seq_printf(seq, "\n MSR\t:\n");
531}
532
533static int cpu_seq_show(struct seq_file *seq, void *v)
534{
535 struct cpu_private *priv = seq->private;
536
537 if (priv == NULL)
538 return -EINVAL;
539
540 switch (cpu_base[priv->type].flag) {
541 case CPU_TSS:
542 smp_call_function_single(priv->cpu, print_tss, seq, 1);
543 break;
544 case CPU_CR:
545 smp_call_function_single(priv->cpu, print_cr, seq, 1);
546 break;
547 case CPU_DT:
548 smp_call_function_single(priv->cpu, print_dt, seq, 1);
549 break;
550 case CPU_DEBUG:
551 if (priv->file == CPU_INDEX_BIT)
552 smp_call_function_single(priv->cpu, print_dr, seq, 1);
553 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
554 break;
555 case CPU_APIC:
556 if (priv->file == CPU_INDEX_BIT)
557 smp_call_function_single(priv->cpu, print_apic, seq, 1);
558 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
559 break;
560
561 default:
562 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
563 break;
564 }
565 seq_printf(seq, "\n");
566
567 return 0;
568}
569
570static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
571{
572 if (*pos == 0) /* One time is enough ;-) */
573 return seq;
574
575 return NULL;
576}
577
578static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
579{
580 (*pos)++;
581
582 return cpu_seq_start(seq, pos);
583}
584
585static void cpu_seq_stop(struct seq_file *seq, void *v)
586{
587}
588
589static const struct seq_operations cpu_seq_ops = {
590 .start = cpu_seq_start,
591 .next = cpu_seq_next,
592 .stop = cpu_seq_stop,
593 .show = cpu_seq_show,
594};
595
596static int cpu_seq_open(struct inode *inode, struct file *file)
597{
598 struct cpu_private *priv = inode->i_private;
599 struct seq_file *seq;
600 int err;
601
602 err = seq_open(file, &cpu_seq_ops);
603 if (!err) {
604 seq = file->private_data;
605 seq->private = priv;
606 }
607
608 return err;
609}
610
611static const struct file_operations cpu_fops = {
612 .open = cpu_seq_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = seq_release,
616};
617
618static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
619 unsigned file, struct dentry *dentry)
620{
621 struct cpu_private *priv = NULL;
622
623 /* Already intialized */
624 if (file == CPU_INDEX_BIT)
625 if (per_cpu(cpu_arr[type].init, cpu))
626 return 0;
627
628 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
629 if (priv == NULL)
630 return -ENOMEM;
631
632 priv->cpu = cpu;
633 priv->type = type;
634 priv->reg = reg;
635 priv->file = file;
636 mutex_lock(&cpu_debug_lock);
637 per_cpu(priv_arr[type], cpu) = priv;
638 per_cpu(cpu_priv_count, cpu)++;
639 mutex_unlock(&cpu_debug_lock);
640
641 if (file)
642 debugfs_create_file(cpu_file[file].name, S_IRUGO,
643 dentry, (void *)priv, &cpu_fops);
644 else {
645 debugfs_create_file(cpu_base[type].name, S_IRUGO,
646 per_cpu(cpu_arr[type].dentry, cpu),
647 (void *)priv, &cpu_fops);
648 mutex_lock(&cpu_debug_lock);
649 per_cpu(cpu_arr[type].init, cpu) = 1;
650 mutex_unlock(&cpu_debug_lock);
651 }
652
653 return 0;
654}
655
656static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
657 struct dentry *dentry)
658{
659 unsigned file;
660 int err = 0;
661
662 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
663 err = cpu_create_file(cpu, type, reg, file, dentry);
664 if (err)
665 return err;
666 }
667
668 return err;
669}
670
671static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
672{
673 struct dentry *cpu_dentry = NULL;
674 unsigned reg, reg_min, reg_max;
675 int i, range, err = 0;
676 char reg_dir[12];
677 u32 low, high;
678
679 range = get_cpu_range_count(cpu);
680
681 for (i = 0; i < range; i++) {
682 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
683 cpu_base[type].flag))
684 continue;
685
686 for (reg = reg_min; reg <= reg_max; reg++) {
687 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
688 continue;
689
690 sprintf(reg_dir, "0x%x", reg);
691 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
692 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
693 if (err)
694 return err;
695 }
696 }
697
698 return err;
699}
700
701static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
702{
703 struct dentry *cpu_dentry = NULL;
704 unsigned type;
705 int err = 0;
706
707 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
708 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
709 continue;
710 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
711 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
712
713 if (type < CPU_TSS_BIT)
714 err = cpu_init_msr(cpu, type, cpu_dentry);
715 else
716 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
717 cpu_dentry);
718 if (err)
719 return err;
720 }
721
722 return err;
723}
724
725static int cpu_init_cpu(void)
726{
727 struct dentry *cpu_dentry = NULL;
728 struct cpuinfo_x86 *cpui;
729 char cpu_dir[12];
730 unsigned cpu;
731 int err = 0;
732
733 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
734 cpui = &cpu_data(cpu);
735 if (!cpu_has(cpui, X86_FEATURE_MSR))
736 continue;
737 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
738 (cpui->x86 << 8) |
739 (cpui->x86_model));
740 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
741
742 sprintf(cpu_dir, "cpu%d", cpu);
743 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
744 err = cpu_init_allreg(cpu, cpu_dentry);
745
746 pr_info("cpu%d(%d) debug files %d\n",
747 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
748 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
749 pr_err("Register files count %d exceeds limit %d\n",
750 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
751 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
752 err = -ENFILE;
753 }
754 if (err)
755 return err;
756 }
757
758 return err;
759}
760
761static int __init cpu_debug_init(void)
762{
763 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
764
765 return cpu_init_cpu();
766}
767
768static void __exit cpu_debug_exit(void)
769{
770 int i, cpu;
771
772 if (cpu_debugfs_dir)
773 debugfs_remove_recursive(cpu_debugfs_dir);
774
775 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
776 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
777 kfree(per_cpu(priv_arr[i], cpu));
778}
779
780module_init(cpu_debug_init);
781module_exit(cpu_debug_exit);
782
783MODULE_AUTHOR("Jaswinder Singh Rajput");
784MODULE_DESCRIPTION("CPU Debug module");
785MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index bfbd5323a635..ca14604611ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -639,7 +639,7 @@ static void mce_init_timer(void)
639 if (!next_interval) 639 if (!next_interval)
640 return; 640 return;
641 setup_timer(t, mcheck_timer, smp_processor_id()); 641 setup_timer(t, mcheck_timer, smp_processor_id());
642 t->expires = round_jiffies_relative(jiffies + next_interval); 642 t->expires = round_jiffies(jiffies + next_interval);
643 add_timer(t); 643 add_timer(t);
644} 644}
645 645
@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
1110 break; 1110 break;
1111 case CPU_DOWN_FAILED: 1111 case CPU_DOWN_FAILED:
1112 case CPU_DOWN_FAILED_FROZEN: 1112 case CPU_DOWN_FAILED_FROZEN:
1113 t->expires = round_jiffies_relative(jiffies + next_interval); 1113 t->expires = round_jiffies(jiffies + next_interval);
1114 add_timer_on(t, cpu); 1114 add_timer_on(t, cpu);
1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1116 break; 1116 break;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 899e8938e79f..c929add475c9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -442,8 +442,7 @@ sysenter_past_esp:
442 442
443 GET_THREAD_INFO(%ebp) 443 GET_THREAD_INFO(%ebp)
444 444
445 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 445 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
446 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
447 jnz sysenter_audit 446 jnz sysenter_audit
448sysenter_do_call: 447sysenter_do_call:
449 cmpl $(nr_syscalls), %eax 448 cmpl $(nr_syscalls), %eax
@@ -454,7 +453,7 @@ sysenter_do_call:
454 DISABLE_INTERRUPTS(CLBR_ANY) 453 DISABLE_INTERRUPTS(CLBR_ANY)
455 TRACE_IRQS_OFF 454 TRACE_IRQS_OFF
456 movl TI_flags(%ebp), %ecx 455 movl TI_flags(%ebp), %ecx
457 testw $_TIF_ALLWORK_MASK, %cx 456 testl $_TIF_ALLWORK_MASK, %ecx
458 jne sysexit_audit 457 jne sysexit_audit
459sysenter_exit: 458sysenter_exit:
460/* if something modifies registers it must also disable sysexit */ 459/* if something modifies registers it must also disable sysexit */
@@ -468,7 +467,7 @@ sysenter_exit:
468 467
469#ifdef CONFIG_AUDITSYSCALL 468#ifdef CONFIG_AUDITSYSCALL
470sysenter_audit: 469sysenter_audit:
471 testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 470 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
472 jnz syscall_trace_entry 471 jnz syscall_trace_entry
473 addl $4,%esp 472 addl $4,%esp
474 CFI_ADJUST_CFA_OFFSET -4 473 CFI_ADJUST_CFA_OFFSET -4
@@ -485,7 +484,7 @@ sysenter_audit:
485 jmp sysenter_do_call 484 jmp sysenter_do_call
486 485
487sysexit_audit: 486sysexit_audit:
488 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 487 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
489 jne syscall_exit_work 488 jne syscall_exit_work
490 TRACE_IRQS_ON 489 TRACE_IRQS_ON
491 ENABLE_INTERRUPTS(CLBR_ANY) 490 ENABLE_INTERRUPTS(CLBR_ANY)
@@ -498,7 +497,7 @@ sysexit_audit:
498 DISABLE_INTERRUPTS(CLBR_ANY) 497 DISABLE_INTERRUPTS(CLBR_ANY)
499 TRACE_IRQS_OFF 498 TRACE_IRQS_OFF
500 movl TI_flags(%ebp), %ecx 499 movl TI_flags(%ebp), %ecx
501 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 500 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
502 jne syscall_exit_work 501 jne syscall_exit_work
503 movl PT_EAX(%esp),%eax /* reload syscall return value */ 502 movl PT_EAX(%esp),%eax /* reload syscall return value */
504 jmp sysenter_exit 503 jmp sysenter_exit
@@ -523,8 +522,7 @@ ENTRY(system_call)
523 SAVE_ALL 522 SAVE_ALL
524 GET_THREAD_INFO(%ebp) 523 GET_THREAD_INFO(%ebp)
525 # system call tracing in operation / emulation 524 # system call tracing in operation / emulation
526 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 525 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
527 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
528 jnz syscall_trace_entry 526 jnz syscall_trace_entry
529 cmpl $(nr_syscalls), %eax 527 cmpl $(nr_syscalls), %eax
530 jae syscall_badsys 528 jae syscall_badsys
@@ -538,7 +536,7 @@ syscall_exit:
538 # between sampling and the iret 536 # between sampling and the iret
539 TRACE_IRQS_OFF 537 TRACE_IRQS_OFF
540 movl TI_flags(%ebp), %ecx 538 movl TI_flags(%ebp), %ecx
541 testw $_TIF_ALLWORK_MASK, %cx # current->work 539 testl $_TIF_ALLWORK_MASK, %ecx # current->work
542 jne syscall_exit_work 540 jne syscall_exit_work
543 541
544restore_all: 542restore_all:
@@ -673,7 +671,7 @@ END(syscall_trace_entry)
673 # perform syscall exit tracing 671 # perform syscall exit tracing
674 ALIGN 672 ALIGN
675syscall_exit_work: 673syscall_exit_work:
676 testb $_TIF_WORK_SYSCALL_EXIT, %cl 674 testl $_TIF_WORK_SYSCALL_EXIT, %ecx
677 jz work_pending 675 jz work_pending
678 TRACE_IRQS_ON 676 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 677 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7ba4621c0dfa..a331ec38af9e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -368,6 +368,7 @@ ENTRY(save_rest)
368END(save_rest) 368END(save_rest)
369 369
370/* save complete stack frame */ 370/* save complete stack frame */
371 .pushsection .kprobes.text, "ax"
371ENTRY(save_paranoid) 372ENTRY(save_paranoid)
372 XCPT_FRAME 1 RDI+8 373 XCPT_FRAME 1 RDI+8
373 cld 374 cld
@@ -396,6 +397,7 @@ ENTRY(save_paranoid)
3961: ret 3971: ret
397 CFI_ENDPROC 398 CFI_ENDPROC
398END(save_paranoid) 399END(save_paranoid)
400 .popsection
399 401
400/* 402/*
401 * A newly forked process directly context switches into this address. 403 * A newly forked process directly context switches into this address.
@@ -416,7 +418,6 @@ ENTRY(ret_from_fork)
416 418
417 GET_THREAD_INFO(%rcx) 419 GET_THREAD_INFO(%rcx)
418 420
419 CFI_REMEMBER_STATE
420 RESTORE_REST 421 RESTORE_REST
421 422
422 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 423 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
@@ -428,7 +429,6 @@ ENTRY(ret_from_fork)
428 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 429 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
429 jmp ret_from_sys_call # go to the SYSRET fastpath 430 jmp ret_from_sys_call # go to the SYSRET fastpath
430 431
431 CFI_RESTORE_STATE
432 CFI_ENDPROC 432 CFI_ENDPROC
433END(ret_from_fork) 433END(ret_from_fork)
434 434
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a62..e7368c1da01d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h> 15#include <linux/suspend.h>
16#include <linux/gfp.h> 16#include <linux/gfp.h>
17#include <linux/io.h>
17 18
18#include <asm/pgtable.h> 19#include <asm/pgtable.h>
19#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
20#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
21#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
22#include <asm/io.h>
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <asm/cpufeature.h> 24#include <asm/cpufeature.h>
25#include <asm/desc.h> 25#include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
63 "\tmovl %%eax,%%fs\n" 63 "\tmovl %%eax,%%fs\n"
64 "\tmovl %%eax,%%gs\n" 64 "\tmovl %%eax,%%gs\n"
65 "\tmovl %%eax,%%ss\n" 65 "\tmovl %%eax,%%ss\n"
66 ::: "eax", "memory"); 66 : : : "eax", "memory");
67#undef STR 67#undef STR
68#undef __STR 68#undef __STR
69} 69}
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
205 205
206 if (image->preserve_context) { 206 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 207#ifdef CONFIG_X86_IO_APIC
208 /* We need to put APICs in legacy mode so that we can 208 /*
209 * We need to put APICs in legacy mode so that we can
209 * get timer interrupts in second kernel. kexec/kdump 210 * get timer interrupts in second kernel. kexec/kdump
210 * paths already have calls to disable_IO_APIC() in 211 * paths already have calls to disable_IO_APIC() in
211 * one form or other. kexec jump path also need 212 * one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
227 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) 228 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
228 << PAGE_SHIFT); 229 << PAGE_SHIFT);
229 230
230 /* The segment registers are funny things, they have both a 231 /*
232 * The segment registers are funny things, they have both a
231 * visible and an invisible part. Whenever the visible part is 233 * visible and an invisible part. Whenever the visible part is
232 * set to a specific selector, the invisible part is loaded 234 * set to a specific selector, the invisible part is loaded
233 * with from a table in memory. At no other time is the 235 * with from a table in memory. At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
237 * segments, before I zap the gdt with an invalid value. 239 * segments, before I zap the gdt with an invalid value.
238 */ 240 */
239 load_segments(); 241 load_segments();
240 /* The gdt & idt are now invalid. 242 /*
243 * The gdt & idt are now invalid.
241 * If you want to load them you must set up your own idt & gdt. 244 * If you want to load them you must set up your own idt & gdt.
242 */ 245 */
243 set_gdt(phys_to_virt(0),0); 246 set_gdt(phys_to_virt(0), 0);
244 set_idt(phys_to_virt(0),0); 247 set_idt(phys_to_virt(0), 0);
245 248
246 /* now call it */ 249 /* now call it */
247 image->start = relocate_kernel_ptr((unsigned long)image->head, 250 image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd8..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/io.h>
16#include <linux/suspend.h>
15 17
16#include <asm/pgtable.h> 18#include <asm/pgtable.h>
17#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
18#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
19#include <asm/io.h> 21
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr)
24{
25 pud_t *pud;
26 pmd_t *pmd;
27 struct page *page;
28 int result = -ENOMEM;
29
30 addr &= PMD_MASK;
31 pgd += pgd_index(addr);
32 if (!pgd_present(*pgd)) {
33 page = kimage_alloc_control_pages(image, 0);
34 if (!page)
35 goto out;
36 pud = (pud_t *)page_address(page);
37 memset(pud, 0, PAGE_SIZE);
38 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
39 }
40 pud = pud_offset(pgd, addr);
41 if (!pud_present(*pud)) {
42 page = kimage_alloc_control_pages(image, 0);
43 if (!page)
44 goto out;
45 pmd = (pmd_t *)page_address(page);
46 memset(pmd, 0, PAGE_SIZE);
47 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
48 }
49 pmd = pmd_offset(pud, addr);
50 if (!pmd_present(*pmd))
51 set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
52 result = 0;
53out:
54 return result;
55}
20 56
21static void init_level2_page(pmd_t *level2p, unsigned long addr) 57static void init_level2_page(pmd_t *level2p, unsigned long addr)
22{ 58{
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
83 } 119 }
84 level3p = (pud_t *)page_address(page); 120 level3p = (pud_t *)page_address(page);
85 result = init_level3_page(image, level3p, addr, last_addr); 121 result = init_level3_page(image, level3p, addr, last_addr);
86 if (result) { 122 if (result)
87 goto out; 123 goto out;
88 }
89 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); 124 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
90 addr += PGDIR_SIZE; 125 addr += PGDIR_SIZE;
91 } 126 }
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
156 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 191 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157 if (result) 192 if (result)
158 return result; 193 return result;
194 /*
195 * image->start may be outside 0 ~ max_pfn, for example when
196 * jump back to original kernel from kexeced kernel
197 */
198 result = init_one_level2_page(image, level4p, image->start);
199 if (result)
200 return result;
159 return init_transition_pgtable(image, level4p); 201 return init_transition_pgtable(image, level4p);
160} 202}
161 203
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
229{ 271{
230 unsigned long page_list[PAGES_NR]; 272 unsigned long page_list[PAGES_NR];
231 void *control_page; 273 void *control_page;
274 int save_ftrace_enabled;
232 275
233 tracer_disable(); 276#ifdef CONFIG_KEXEC_JUMP
277 if (kexec_image->preserve_context)
278 save_processor_state();
279#endif
280
281 save_ftrace_enabled = __ftrace_enabled_save();
234 282
235 /* Interrupts aren't acceptable while we reboot */ 283 /* Interrupts aren't acceptable while we reboot */
236 local_irq_disable(); 284 local_irq_disable();
237 285
286 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC
288 /*
289 * We need to put APICs in legacy mode so that we can
290 * get timer interrupts in second kernel. kexec/kdump
291 * paths already have calls to disable_IO_APIC() in
292 * one form or other. kexec jump path also need
293 * one.
294 */
295 disable_IO_APIC();
296#endif
297 }
298
238 control_page = page_address(image->control_code_page) + PAGE_SIZE; 299 control_page = page_address(image->control_code_page) + PAGE_SIZE;
239 memcpy(control_page, relocate_kernel, PAGE_SIZE); 300 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
240 301
241 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 302 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
303 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
242 page_list[PA_TABLE_PAGE] = 304 page_list[PA_TABLE_PAGE] =
243 (unsigned long)__pa(page_address(image->control_code_page)); 305 (unsigned long)__pa(page_address(image->control_code_page));
244 306
245 /* The segment registers are funny things, they have both a 307 if (image->type == KEXEC_TYPE_DEFAULT)
308 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
309 << PAGE_SHIFT);
310
311 /*
312 * The segment registers are funny things, they have both a
246 * visible and an invisible part. Whenever the visible part is 313 * visible and an invisible part. Whenever the visible part is
247 * set to a specific selector, the invisible part is loaded 314 * set to a specific selector, the invisible part is loaded
248 * with from a table in memory. At no other time is the 315 * with from a table in memory. At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
252 * segments, before I zap the gdt with an invalid value. 319 * segments, before I zap the gdt with an invalid value.
253 */ 320 */
254 load_segments(); 321 load_segments();
255 /* The gdt & idt are now invalid. 322 /*
323 * The gdt & idt are now invalid.
256 * If you want to load them you must set up your own idt & gdt. 324 * If you want to load them you must set up your own idt & gdt.
257 */ 325 */
258 set_gdt(phys_to_virt(0),0); 326 set_gdt(phys_to_virt(0), 0);
259 set_idt(phys_to_virt(0),0); 327 set_idt(phys_to_virt(0), 0);
260 328
261 /* now call it */ 329 /* now call it */
262 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 330 image->start = relocate_kernel((unsigned long)image->head,
263 image->start); 331 (unsigned long)page_list,
332 image->start,
333 image->preserve_context);
334
335#ifdef CONFIG_KEXEC_JUMP
336 if (kexec_image->preserve_context)
337 restore_processor_state();
338#endif
339
340 __ftrace_enabled_restore(save_ftrace_enabled);
264} 341}
265 342
266void arch_crash_save_vmcoreinfo(void) 343void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 309949e9e1c1..6a5a2970f4c5 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void)
74 if (!force_hpet_address) 74 if (!force_hpet_address)
75 return; 75 return;
76 76
77 if (rcba_base == NULL) 77 BUG_ON(rcba_base == NULL);
78 BUG();
79 78
80 /* read the Function Disable register, dword mode only */ 79 /* read the Function Disable register, dword mode only */
81 val = readl(rcba_base + 0x3404); 80 val = readl(rcba_base + 0x3404);
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d28..41235531b11c 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
17 17
18#define PTR(x) (x << 2) 18#define PTR(x) (x << 2)
19 19
20/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE 20/*
21 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
21 * ~ control_page + PAGE_SIZE are used as data storage and stack for 22 * ~ control_page + PAGE_SIZE are used as data storage and stack for
22 * jumping back 23 * jumping back
23 */ 24 */
@@ -76,8 +77,10 @@ relocate_kernel:
76 movl %eax, CP_PA_SWAP_PAGE(%edi) 77 movl %eax, CP_PA_SWAP_PAGE(%edi)
77 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) 78 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
78 79
79 /* get physical address of control page now */ 80 /*
80 /* this is impossible after page table switch */ 81 * get physical address of control page now
82 * this is impossible after page table switch
83 */
81 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 84 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
82 85
83 /* switch to new set of page tables */ 86 /* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
97 /* store the start address on the stack */ 100 /* store the start address on the stack */
98 pushl %edx 101 pushl %edx
99 102
100 /* Set cr0 to a known state: 103 /*
104 * Set cr0 to a known state:
101 * - Paging disabled 105 * - Paging disabled
102 * - Alignment check disabled 106 * - Alignment check disabled
103 * - Write protect disabled 107 * - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
113 /* clear cr4 if applicable */ 117 /* clear cr4 if applicable */
114 testl %ecx, %ecx 118 testl %ecx, %ecx
115 jz 1f 119 jz 1f
116 /* Set cr4 to a known state: 120 /*
121 * Set cr4 to a known state:
117 * Setting everything to zero seems safe. 122 * Setting everything to zero seems safe.
118 */ 123 */
119 xorl %eax, %eax 124 xorl %eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
132 call swap_pages 137 call swap_pages
133 addl $8, %esp 138 addl $8, %esp
134 139
135 /* To be certain of avoiding problems with self-modifying code 140 /*
141 * To be certain of avoiding problems with self-modifying code
136 * I need to execute a serializing instruction here. 142 * I need to execute a serializing instruction here.
137 * So I flush the TLB, it's handy, and not processor dependent. 143 * So I flush the TLB, it's handy, and not processor dependent.
138 */ 144 */
139 xorl %eax, %eax 145 xorl %eax, %eax
140 movl %eax, %cr3 146 movl %eax, %cr3
141 147
142 /* set all of the registers to known values */ 148 /*
143 /* leave %esp alone */ 149 * set all of the registers to known values
150 * leave %esp alone
151 */
144 152
145 testl %esi, %esi 153 testl %esi, %esi
146 jnz 1f 154 jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a479..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
19#define PTR(x) (x << 3) 19#define PTR(x) (x << 3)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21 21
22/*
23 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
26 */
27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
28
29/* Minimal CPU state */
30#define RSP DATA(0x0)
31#define CR0 DATA(0x8)
32#define CR3 DATA(0x10)
33#define CR4 DATA(0x18)
34
35/* other data */
36#define CP_PA_TABLE_PAGE DATA(0x20)
37#define CP_PA_SWAP_PAGE DATA(0x28)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
39
22 .text 40 .text
23 .align PAGE_SIZE 41 .align PAGE_SIZE
24 .code64 42 .code64
25 .globl relocate_kernel 43 .globl relocate_kernel
26relocate_kernel: 44relocate_kernel:
27 /* %rdi indirection_page 45 /*
46 * %rdi indirection_page
28 * %rsi page_list 47 * %rsi page_list
29 * %rdx start address 48 * %rdx start address
49 * %rcx preserve_context
30 */ 50 */
31 51
52 /* Save the CPU context, used for jumping back */
53 pushq %rbx
54 pushq %rbp
55 pushq %r12
56 pushq %r13
57 pushq %r14
58 pushq %r15
59 pushf
60
61 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
62 movq %rsp, RSP(%r11)
63 movq %cr0, %rax
64 movq %rax, CR0(%r11)
65 movq %cr3, %rax
66 movq %rax, CR3(%r11)
67 movq %cr4, %rax
68 movq %rax, CR4(%r11)
69
32 /* zero out flags, and disable interrupts */ 70 /* zero out flags, and disable interrupts */
33 pushq $0 71 pushq $0
34 popfq 72 popfq
35 73
36 /* get physical address of control page now */ 74 /*
37 /* this is impossible after page table switch */ 75 * get physical address of control page now
76 * this is impossible after page table switch
77 */
38 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 78 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
39 79
40 /* get physical address of page table now too */ 80 /* get physical address of page table now too */
41 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 81 movq PTR(PA_TABLE_PAGE)(%rsi), %r9
82
83 /* get physical address of swap page now */
84 movq PTR(PA_SWAP_PAGE)(%rsi), %r10
85
86 /* save some information for jumping back */
87 movq %r9, CP_PA_TABLE_PAGE(%r11)
88 movq %r10, CP_PA_SWAP_PAGE(%r11)
89 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
42 90
43 /* Switch to the identity mapped page tables */ 91 /* Switch to the identity mapped page tables */
44 movq %rcx, %cr3 92 movq %r9, %cr3
45 93
46 /* setup a new stack at the end of the physical control page */ 94 /* setup a new stack at the end of the physical control page */
47 lea PAGE_SIZE(%r8), %rsp 95 lea PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
55 /* store the start address on the stack */ 103 /* store the start address on the stack */
56 pushq %rdx 104 pushq %rdx
57 105
58 /* Set cr0 to a known state: 106 /*
107 * Set cr0 to a known state:
59 * - Paging enabled 108 * - Paging enabled
60 * - Alignment check disabled 109 * - Alignment check disabled
61 * - Write protect disabled 110 * - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
68 orl $(X86_CR0_PG | X86_CR0_PE), %eax 117 orl $(X86_CR0_PG | X86_CR0_PE), %eax
69 movq %rax, %cr0 118 movq %rax, %cr0
70 119
71 /* Set cr4 to a known state: 120 /*
121 * Set cr4 to a known state:
72 * - physical address extension enabled 122 * - physical address extension enabled
73 */ 123 */
74 movq $X86_CR4_PAE, %rax 124 movq $X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
781: 1281:
79 129
80 /* Flush the TLB (needed?) */ 130 /* Flush the TLB (needed?) */
81 movq %rcx, %cr3 131 movq %r9, %cr3
132
133 movq %rcx, %r11
134 call swap_pages
135
136 /*
137 * To be certain of avoiding problems with self-modifying code
138 * I need to execute a serializing instruction here.
139 * So I flush the TLB by reloading %cr3 here, it's handy,
140 * and not processor dependent.
141 */
142 movq %cr3, %rax
143 movq %rax, %cr3
144
145 /*
146 * set all of the registers to known values
147 * leave %rsp alone
148 */
149
150 testq %r11, %r11
151 jnz 1f
152 xorq %rax, %rax
153 xorq %rbx, %rbx
154 xorq %rcx, %rcx
155 xorq %rdx, %rdx
156 xorq %rsi, %rsi
157 xorq %rdi, %rdi
158 xorq %rbp, %rbp
159 xorq %r8, %r8
160 xorq %r9, %r9
161 xorq %r10, %r9
162 xorq %r11, %r11
163 xorq %r12, %r12
164 xorq %r13, %r13
165 xorq %r14, %r14
166 xorq %r15, %r15
167
168 ret
169
1701:
171 popq %rdx
172 leaq PAGE_SIZE(%r10), %rsp
173 call *%rdx
174
175 /* get the re-entry point of the peer system */
176 movq 0(%rsp), %rbp
177 call 1f
1781:
179 popq %r8
180 subq $(1b - relocate_kernel), %r8
181 movq CP_PA_SWAP_PAGE(%r8), %r10
182 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
183 movq CP_PA_TABLE_PAGE(%r8), %rax
184 movq %rax, %cr3
185 lea PAGE_SIZE(%r8), %rsp
186 call swap_pages
187 movq $virtual_mapped, %rax
188 pushq %rax
189 ret
190
191virtual_mapped:
192 movq RSP(%r8), %rsp
193 movq CR4(%r8), %rax
194 movq %rax, %cr4
195 movq CR3(%r8), %rax
196 movq CR0(%r8), %r8
197 movq %rax, %cr3
198 movq %r8, %cr0
199 movq %rbp, %rax
200
201 popf
202 popq %r15
203 popq %r14
204 popq %r13
205 popq %r12
206 popq %rbp
207 popq %rbx
208 ret
82 209
83 /* Do the copies */ 210 /* Do the copies */
211swap_pages:
84 movq %rdi, %rcx /* Put the page_list in %rcx */ 212 movq %rdi, %rcx /* Put the page_list in %rcx */
85 xorq %rdi, %rdi 213 xorq %rdi, %rdi
86 xorq %rsi, %rsi 214 xorq %rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
112 movq %rcx, %rsi /* For ever source page do a copy */ 240 movq %rcx, %rsi /* For ever source page do a copy */
113 andq $0xfffffffffffff000, %rsi 241 andq $0xfffffffffffff000, %rsi
114 242
243 movq %rdi, %rdx
244 movq %rsi, %rax
245
246 movq %r10, %rdi
115 movq $512, %rcx 247 movq $512, %rcx
116 rep ; movsq 248 rep ; movsq
117 jmp 0b
1183:
119
120 /* To be certain of avoiding problems with self-modifying code
121 * I need to execute a serializing instruction here.
122 * So I flush the TLB by reloading %cr3 here, it's handy,
123 * and not processor dependent.
124 */
125 movq %cr3, %rax
126 movq %rax, %cr3
127 249
128 /* set all of the registers to known values */ 250 movq %rax, %rdi
129 /* leave %rsp alone */ 251 movq %rdx, %rsi
252 movq $512, %rcx
253 rep ; movsq
130 254
131 xorq %rax, %rax 255 movq %rdx, %rdi
132 xorq %rbx, %rbx 256 movq %r10, %rsi
133 xorq %rcx, %rcx 257 movq $512, %rcx
134 xorq %rdx, %rdx 258 rep ; movsq
135 xorq %rsi, %rsi
136 xorq %rdi, %rdi
137 xorq %rbp, %rbp
138 xorq %r8, %r8
139 xorq %r9, %r9
140 xorq %r10, %r9
141 xorq %r11, %r11
142 xorq %r12, %r12
143 xorq %r13, %r13
144 xorq %r14, %r14
145 xorq %r15, %r15
146 259
260 lea PAGE_SIZE(%rax), %rsi
261 jmp 0b
2623:
147 ret 263 ret
264
265 .globl kexec_control_code_size
266.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e87..31ffc24eec4d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
578static irqreturn_t piix4_master_intr(int irq, void *dev_id) 578static irqreturn_t piix4_master_intr(int irq, void *dev_id)
579{ 579{
580 int realirq; 580 int realirq;
581 irq_desc_t *desc; 581 struct irq_desc *desc;
582 unsigned long flags; 582 unsigned long flags;
583 583
584 spin_lock_irqsave(&i8259A_lock, flags); 584 spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..5bf54e40c6ef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
275ASSERT((per_cpu__irq_stack_union == 0), 275ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area"); 276 "irq_stack_union is not at start of per-cpu area");
277#endif 277#endif
278
279#ifdef CONFIG_KEXEC
280#include <asm/kexec.h>
281
282ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
283 "kexec control code size is too big")
284#endif
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index c22981fa2f3a..ad5441ed1b57 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,30 +1,38 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h>
6 7
7/* 8/*
8 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
9 * 10 *
10 * Input: 11 * Input:
11 * rdi destination 12 * rdi destination
12 * rsi source 13 * rsi source
13 * rdx count 14 * rdx count
14 * 15 *
15 * Output: 16 * Output:
16 * rax original destination 17 * rax original destination
17 */ 18 */
18 19
20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 *
23 * Calls to this get patched into the kernel image via the
24 * alternative instructions framework:
25 */
19 ALIGN 26 ALIGN
20memcpy_c: 27memcpy_c:
21 CFI_STARTPROC 28 CFI_STARTPROC
22 movq %rdi,%rax 29 movq %rdi, %rax
23 movl %edx,%ecx 30
24 shrl $3,%ecx 31 movl %edx, %ecx
25 andl $7,%edx 32 shrl $3, %ecx
33 andl $7, %edx
26 rep movsq 34 rep movsq
27 movl %edx,%ecx 35 movl %edx, %ecx
28 rep movsb 36 rep movsb
29 ret 37 ret
30 CFI_ENDPROC 38 CFI_ENDPROC
@@ -33,99 +41,110 @@ ENDPROC(memcpy_c)
33ENTRY(__memcpy) 41ENTRY(__memcpy)
34ENTRY(memcpy) 42ENTRY(memcpy)
35 CFI_STARTPROC 43 CFI_STARTPROC
36 pushq %rbx
37 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
39 movq %rdi,%rax
40 44
41 movl %edx,%ecx 45 /*
42 shrl $6,%ecx 46 * Put the number of full 64-byte blocks into %ecx.
47 * Tail portion is handled at the end:
48 */
49 movq %rdi, %rax
50 movl %edx, %ecx
51 shrl $6, %ecx
43 jz .Lhandle_tail 52 jz .Lhandle_tail
44 53
45 .p2align 4 54 .p2align 4
46.Lloop_64: 55.Lloop_64:
56 /*
57 * We decrement the loop index here - and the zero-flag is
58 * checked at the end of the loop (instructions inbetween do
59 * not change the zero flag):
60 */
47 decl %ecx 61 decl %ecx
48 62
49 movq (%rsi),%r11 63 /*
50 movq 8(%rsi),%r8 64 * Move in blocks of 4x16 bytes:
65 */
66 movq 0*8(%rsi), %r11
67 movq 1*8(%rsi), %r8
68 movq %r11, 0*8(%rdi)
69 movq %r8, 1*8(%rdi)
51 70
52 movq %r11,(%rdi) 71 movq 2*8(%rsi), %r9
53 movq %r8,1*8(%rdi) 72 movq 3*8(%rsi), %r10
73 movq %r9, 2*8(%rdi)
74 movq %r10, 3*8(%rdi)
54 75
55 movq 2*8(%rsi),%r9 76 movq 4*8(%rsi), %r11
56 movq 3*8(%rsi),%r10 77 movq 5*8(%rsi), %r8
78 movq %r11, 4*8(%rdi)
79 movq %r8, 5*8(%rdi)
57 80
58 movq %r9,2*8(%rdi) 81 movq 6*8(%rsi), %r9
59 movq %r10,3*8(%rdi) 82 movq 7*8(%rsi), %r10
83 movq %r9, 6*8(%rdi)
84 movq %r10, 7*8(%rdi)
60 85
61 movq 4*8(%rsi),%r11 86 leaq 64(%rsi), %rsi
62 movq 5*8(%rsi),%r8 87 leaq 64(%rdi), %rdi
63 88
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64 89 jnz .Lloop_64
76 90
77.Lhandle_tail: 91.Lhandle_tail:
78 movl %edx,%ecx 92 movl %edx, %ecx
79 andl $63,%ecx 93 andl $63, %ecx
80 shrl $3,%ecx 94 shrl $3, %ecx
81 jz .Lhandle_7 95 jz .Lhandle_7
96
82 .p2align 4 97 .p2align 4
83.Lloop_8: 98.Lloop_8:
84 decl %ecx 99 decl %ecx
85 movq (%rsi),%r8 100 movq (%rsi), %r8
86 movq %r8,(%rdi) 101 movq %r8, (%rdi)
87 leaq 8(%rdi),%rdi 102 leaq 8(%rdi), %rdi
88 leaq 8(%rsi),%rsi 103 leaq 8(%rsi), %rsi
89 jnz .Lloop_8 104 jnz .Lloop_8
90 105
91.Lhandle_7: 106.Lhandle_7:
92 movl %edx,%ecx 107 movl %edx, %ecx
93 andl $7,%ecx 108 andl $7, %ecx
94 jz .Lende 109 jz .Lend
110
95 .p2align 4 111 .p2align 4
96.Lloop_1: 112.Lloop_1:
97 movb (%rsi),%r8b 113 movb (%rsi), %r8b
98 movb %r8b,(%rdi) 114 movb %r8b, (%rdi)
99 incq %rdi 115 incq %rdi
100 incq %rsi 116 incq %rsi
101 decl %ecx 117 decl %ecx
102 jnz .Lloop_1 118 jnz .Lloop_1
103 119
104.Lende: 120.Lend:
105 popq %rbx
106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
108 ret 121 ret
109.Lfinal:
110 CFI_ENDPROC 122 CFI_ENDPROC
111ENDPROC(memcpy) 123ENDPROC(memcpy)
112ENDPROC(__memcpy) 124ENDPROC(__memcpy)
113 125
114 /* Some CPUs run faster using the string copy instructions. 126 /*
115 It is also a lot simpler. Use this when possible */ 127 * Some CPUs run faster using the string copy instructions.
128 * It is also a lot simpler. Use this when possible:
129 */
116 130
117 .section .altinstr_replacement,"ax" 131 .section .altinstr_replacement, "ax"
1181: .byte 0xeb /* jmp <disp8> */ 1321: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202: 1342:
121 .previous 135 .previous
122 .section .altinstructions,"a" 136
137 .section .altinstructions, "a"
123 .align 8 138 .align 8
124 .quad memcpy 139 .quad memcpy
125 .quad 1b 140 .quad 1b
126 .byte X86_FEATURE_REP_GOOD 141 .byte X86_FEATURE_REP_GOOD
127 /* Replace only beginning, memcpy is used to apply alternatives, so it 142
128 * is silly to overwrite itself with nops - reboot is only outcome... */ 143 /*
144 * Replace only beginning, memcpy is used to apply alternatives,
145 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome...
147 */
129 .byte 2b - 1b 148 .byte 2b - 1b
130 .byte 2b - 1b 149 .byte 2b - 1b
131 .previous 150 .previous
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index d11745334a67..f256e73542d7 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -121,23 +121,30 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
121 pagefault_enable(); 121 pagefault_enable();
122} 122}
123 123
124/* This is the same as kmap_atomic() but can map memory that doesn't 124void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
125 * have a struct page associated with it.
126 */
127void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
128{ 125{
129 enum fixed_addresses idx; 126 enum fixed_addresses idx;
130 unsigned long vaddr; 127 unsigned long vaddr;
131 128
132 pagefault_disable(); 129 pagefault_disable();
133 130
134 idx = type + KM_TYPE_NR*smp_processor_id(); 131 debug_kmap_atomic_prot(type);
132
133 idx = type + KM_TYPE_NR * smp_processor_id();
135 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 134 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
136 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); 135 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
137 arch_flush_lazy_mmu_mode(); 136 arch_flush_lazy_mmu_mode();
138 137
139 return (void*) vaddr; 138 return (void*) vaddr;
140} 139}
140
141/* This is the same as kmap_atomic() but can map memory that doesn't
142 * have a struct page associated with it.
143 */
144void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
145{
146 return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
147}
141EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ 148EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
142 149
143struct page *kmap_atomic_to_page(void *ptr) 150struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d42ff42..592984e5496b 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -18,6 +18,7 @@
18 18
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <asm/highmem.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
23int is_io_mapping_possible(resource_size_t base, unsigned long size) 24int is_io_mapping_possible(resource_size_t base, unsigned long size)
@@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible);
36void * 37void *
37iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) 38iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38{ 39{
39 enum fixed_addresses idx;
40 unsigned long vaddr;
41
42 pagefault_disable();
43
44 /* 40 /*
45 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 41 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
46 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 42 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
50 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) 46 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
51 prot = PAGE_KERNEL_UC_MINUS; 47 prot = PAGE_KERNEL_UC_MINUS;
52 48
53 idx = type + KM_TYPE_NR*smp_processor_id(); 49 return kmap_atomic_prot_pfn(pfn, type, prot);
54 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
55 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
56 arch_flush_lazy_mmu_mode();
57
58 return (void*) vaddr;
59} 50}
60EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 51EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
61 52
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 311
312 if (!ctx->active) { 312 if (!ctx->active) {
313 pr_warning("kmmio: spurious debug trap on CPU %d.\n", 313 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
314 smp_processor_id()); 314 smp_processor_id());
315 goto out; 315 goto out;
316 } 316 }