aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/socket.h2
-rw-r--r--arch/arm/include/asm/socket.h3
-rw-r--r--arch/arm/mach-ixp4xx/common.c1
-rw-r--r--arch/avr32/include/asm/socket.h3
-rw-r--r--arch/cris/include/asm/socket.h3
-rw-r--r--arch/frv/include/asm/socket.h3
-rw-r--r--arch/h8300/include/asm/socket.h3
-rw-r--r--arch/ia64/hp/sim/simeth.c2
-rw-r--r--arch/ia64/include/asm/socket.h3
-rw-r--r--arch/m32r/include/asm/socket.h3
-rw-r--r--arch/m68k/include/asm/socket.h3
-rw-r--r--arch/mips/ar7/platform.c15
-rw-r--r--arch/mips/include/asm/socket.h2
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S4
-rw-r--r--arch/mn10300/include/asm/socket.h3
-rw-r--r--arch/parisc/include/asm/socket.h2
-rw-r--r--arch/powerpc/include/asm/qe.h1
-rw-r--r--arch/powerpc/include/asm/socket.h3
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c13
-rw-r--r--arch/s390/include/asm/socket.h3
-rw-r--r--arch/sh/include/asm/sh_eth.h3
-rw-r--r--arch/sparc/include/asm/socket.h3
-rw-r--r--arch/sparc/kernel/sys32.S2
-rw-r--r--arch/um/drivers/net_kern.c4
-rw-r--r--arch/x86/Kconfig.cpu19
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/Makefile_32.cpu2
-rw-r--r--arch/x86/boot/video-vesa.c7
-rw-r--r--arch/x86/boot/video-vga.c10
-rw-r--r--arch/x86/boot/video.c5
-rw-r--r--arch/x86/boot/video.h20
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c17
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c14
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/apicdef.h3
-rw-r--r--arch/x86/include/asm/asm.h10
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h13
-rw-r--r--arch/x86/include/asm/desc_defs.h6
-rw-r--r--arch/x86/include/asm/dwarf2.h18
-rw-r--r--arch/x86/include/asm/i387.h9
-rw-r--r--arch/x86/include/asm/io_apic.h13
-rw-r--r--arch/x86/include/asm/ioctls.h95
-rw-r--r--arch/x86/include/asm/ipcbuf.h29
-rw-r--r--arch/x86/include/asm/irqflags.h9
-rw-r--r--arch/x86/include/asm/lguest.h5
-rw-r--r--arch/x86/include/asm/mman.h14
-rw-r--r--arch/x86/include/asm/module.h15
-rw-r--r--arch/x86/include/asm/msgbuf.h40
-rw-r--r--arch/x86/include/asm/msr.h75
-rw-r--r--arch/x86/include/asm/param.h23
-rw-r--r--arch/x86/include/asm/paravirt.h746
-rw-r--r--arch/x86/include/asm/paravirt_types.h721
-rw-r--r--arch/x86/include/asm/percpu.h26
-rw-r--r--arch/x86/include/asm/pgtable.h16
-rw-r--r--arch/x86/include/asm/processor.h28
-rw-r--r--arch/x86/include/asm/scatterlist.h27
-rw-r--r--arch/x86/include/asm/shmbuf.h52
-rw-r--r--arch/x86/include/asm/socket.h61
-rw-r--r--arch/x86/include/asm/sockios.h14
-rw-r--r--arch/x86/include/asm/stackprotector.h10
-rw-r--r--arch/x86/include/asm/system.h29
-rw-r--r--arch/x86/include/asm/termbits.h199
-rw-r--r--arch/x86/include/asm/termios.h115
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/types.h12
-rw-r--r--arch/x86/include/asm/ucontext.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c105
-rw-r--r--arch/x86/kernel/alternative.c58
-rw-r--r--arch/x86/kernel/apic/apic.c110
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c324
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/probe_64.c6
-rw-r--r--arch/x86/kernel/apm_32.c31
-rw-r--r--arch/x86/kernel/cpu/amd.c117
-rw-r--r--arch/x86/kernel/cpu/bugs.c10
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/cyrix.c19
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c5
-rw-r--r--arch/x86/kernel/cpu/intel.c11
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c148
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c97
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c168
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c390
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c94
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c304
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c135
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c455
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h19
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c68
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c45
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c18
-rw-r--r--arch/x86/kernel/doublefault_32.c4
-rw-r--r--arch/x86/kernel/ds.c6
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/irq_32.c5
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c61
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process_32.c30
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/traps.c54
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/msr-reg-export.c5
-rw-r--r--arch/x86/lib/msr-reg.S102
-rw-r--r--arch/x86/lib/msr.c49
-rw-r--r--arch/x86/mm/Makefile6
-rw-r--r--arch/x86/mm/fault.c51
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/ioremap.c72
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/mm/physaddr.c70
-rw-r--r--arch/x86/mm/physaddr.h10
-rw-r--r--arch/x86/mm/srat_32.c4
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c133
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/spinlock.c28
-rw-r--r--arch/xtensa/include/asm/socket.h3
-rw-r--r--arch/xtensa/platforms/iss/network.c2
135 files changed, 3066 insertions, 3196 deletions
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 3641ec1452f4..26773e3246e2 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -32,6 +32,8 @@
32#define SO_RCVTIMEO 0x1012 32#define SO_RCVTIMEO 0x1012
33#define SO_SNDTIMEO 0x1013 33#define SO_SNDTIMEO 0x1013
34#define SO_ACCEPTCONN 0x1014 34#define SO_ACCEPTCONN 0x1014
35#define SO_PROTOCOL 0x1028
36#define SO_DOMAIN 0x1029
35 37
36/* linux-specific, might as well be the same as on i386 */ 38/* linux-specific, might as well be the same as on i386 */
37#define SO_NO_CHECK 11 39#define SO_NO_CHECK 11
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 537de4e0ef50..92ac61d294fd 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 1e93dfee7543..5083f03e9b5e 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -416,6 +416,7 @@ static struct clocksource clocksource_ixp4xx = {
416}; 416};
417 417
418unsigned long ixp4xx_timer_freq = FREQ; 418unsigned long ixp4xx_timer_freq = FREQ;
419EXPORT_SYMBOL(ixp4xx_timer_freq);
419static int __init ixp4xx_clocksource_init(void) 420static int __init ixp4xx_clocksource_init(void)
420{ 421{
421 clocksource_ixp4xx.mult = 422 clocksource_ixp4xx.mult =
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 04c860619700..fe863f9794d5 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* __ASM_AVR32_SOCKET_H */ 63#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index d5cf74005408..45ec49bdb7b1 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -59,6 +59,9 @@
59#define SO_TIMESTAMPING 37 59#define SO_TIMESTAMPING 37
60#define SCM_TIMESTAMPING SO_TIMESTAMPING 60#define SCM_TIMESTAMPING SO_TIMESTAMPING
61 61
62#define SO_PROTOCOL 38
63#define SO_DOMAIN 39
64
62#endif /* _ASM_SOCKET_H */ 65#endif /* _ASM_SOCKET_H */
63 66
64 67
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 57c3d4054e8b..2dea726095c2 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -57,5 +57,8 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
61 64
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 602518a70a1a..1547f01c8e22 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index e4d8fde68103..7e81966ce481 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -412,7 +412,7 @@ simeth_tx(struct sk_buff *skb, struct net_device *dev)
412 */ 412 */
413 413
414 dev_kfree_skb(skb); 414 dev_kfree_skb(skb);
415 return 0; 415 return NETDEV_TX_OK;
416} 416}
417 417
418static inline struct sk_buff * 418static inline struct sk_buff *
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 745421225ec6..0b0d5ff062e5 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -66,4 +66,7 @@
66#define SO_TIMESTAMPING 37 66#define SO_TIMESTAMPING 37
67#define SCM_TIMESTAMPING SO_TIMESTAMPING 67#define SCM_TIMESTAMPING SO_TIMESTAMPING
68 68
69#define SO_PROTOCOL 38
70#define SO_DOMAIN 39
71
69#endif /* _ASM_IA64_SOCKET_H */ 72#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index be7ed589af5c..3390a864f224 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_M32R_SOCKET_H */ 63#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index ca87f938b03f..eee01cce921b 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 2ecab6155932..cf50fa29b198 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -32,6 +32,8 @@
32#include <linux/leds.h> 32#include <linux/leds.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/etherdevice.h> 34#include <linux/etherdevice.h>
35#include <linux/phy.h>
36#include <linux/phy_fixed.h>
35 37
36#include <asm/addrspace.h> 38#include <asm/addrspace.h>
37#include <asm/mach-ar7/ar7.h> 39#include <asm/mach-ar7/ar7.h>
@@ -208,6 +210,12 @@ static struct physmap_flash_data physmap_flash_data = {
208 .width = 2, 210 .width = 2,
209}; 211};
210 212
213static struct fixed_phy_status fixed_phy_status __initdata = {
214 .link = 1,
215 .speed = 100,
216 .duplex = 1,
217};
218
211static struct plat_cpmac_data cpmac_low_data = { 219static struct plat_cpmac_data cpmac_low_data = {
212 .reset_bit = 17, 220 .reset_bit = 17,
213 .power_bit = 20, 221 .power_bit = 20,
@@ -530,6 +538,9 @@ static int __init ar7_register_devices(void)
530 } 538 }
531 539
532 if (ar7_has_high_cpmac()) { 540 if (ar7_has_high_cpmac()) {
541 res = fixed_phy_add(PHY_POLL, cpmac_high.id, &fixed_phy_status);
542 if (res && res != -ENODEV)
543 return res;
533 cpmac_get_mac(1, cpmac_high_data.dev_addr); 544 cpmac_get_mac(1, cpmac_high_data.dev_addr);
534 res = platform_device_register(&cpmac_high); 545 res = platform_device_register(&cpmac_high);
535 if (res) 546 if (res)
@@ -538,6 +549,10 @@ static int __init ar7_register_devices(void)
538 cpmac_low_data.phy_mask = 0xffffffff; 549 cpmac_low_data.phy_mask = 0xffffffff;
539 } 550 }
540 551
552 res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status);
553 if (res && res != -ENODEV)
554 return res;
555
541 cpmac_get_mac(0, cpmac_low_data.dev_addr); 556 cpmac_get_mac(0, cpmac_low_data.dev_addr);
542 res = platform_device_register(&cpmac_low); 557 res = platform_device_register(&cpmac_low);
543 if (res) 558 if (res)
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 2abca1780169..ae05accd9fe4 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -42,6 +42,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
42#define SO_SNDTIMEO 0x1005 /* send timeout */ 42#define SO_SNDTIMEO 0x1005 /* send timeout */
43#define SO_RCVTIMEO 0x1006 /* receive timeout */ 43#define SO_RCVTIMEO 0x1006 /* receive timeout */
44#define SO_ACCEPTCONN 0x1009 44#define SO_ACCEPTCONN 0x1009
45#define SO_PROTOCOL 0x1028 /* protocol type */
46#define SO_DOMAIN 0x1029 /* domain/socket family */
45 47
46/* linux-specific, might as well be the same as on i386 */ 48/* linux-specific, might as well be the same as on i386 */
47#define SO_NO_CHECK 11 49#define SO_NO_CHECK 11
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index e855b118a079..1a6ae124635b 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -164,7 +164,7 @@ EXPORT(sysn32_call_table)
164 PTR sys_connect 164 PTR sys_connect
165 PTR sys_accept 165 PTR sys_accept
166 PTR sys_sendto 166 PTR sys_sendto
167 PTR sys_recvfrom 167 PTR compat_sys_recvfrom
168 PTR compat_sys_sendmsg /* 6045 */ 168 PTR compat_sys_sendmsg /* 6045 */
169 PTR compat_sys_recvmsg 169 PTR compat_sys_recvmsg
170 PTR sys_shutdown 170 PTR sys_shutdown
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 0c49f1a660be..cd31087a651f 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -378,8 +378,8 @@ sys_call_table:
378 PTR sys_getsockname 378 PTR sys_getsockname
379 PTR sys_getsockopt 379 PTR sys_getsockopt
380 PTR sys_listen 380 PTR sys_listen
381 PTR sys_recv /* 4175 */ 381 PTR compat_sys_recv /* 4175 */
382 PTR sys_recvfrom 382 PTR compat_sys_recvfrom
383 PTR compat_sys_recvmsg 383 PTR compat_sys_recvmsg
384 PTR sys_send 384 PTR sys_send
385 PTR compat_sys_sendmsg 385 PTR compat_sys_sendmsg
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index fb5daf438ec9..4df75af29d76 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -57,4 +57,7 @@
57#define SO_TIMESTAMPING 37 57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING 58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59 59
60#define SO_PROTOCOL 38
61#define SO_DOMAIN 39
62
60#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 885472bf7b78..960b1e5d8e16 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -24,6 +24,8 @@
24#define SO_RCVTIMEO 0x1006 24#define SO_RCVTIMEO 0x1006
25#define SO_ERROR 0x1007 25#define SO_ERROR 0x1007
26#define SO_TYPE 0x1008 26#define SO_TYPE 0x1008
27#define SO_PROTOCOL 0x1028
28#define SO_DOMAIN 0x1029
27#define SO_PEERNAME 0x2000 29#define SO_PEERNAME 0x2000
28 30
29#define SO_NO_CHECK 0x400b 31#define SO_NO_CHECK 0x400b
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 157c5ca581c8..f388f0ab193f 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -154,6 +154,7 @@ int qe_get_snum(void);
154void qe_put_snum(u8 snum); 154void qe_put_snum(u8 snum);
155unsigned int qe_get_num_of_risc(void); 155unsigned int qe_get_num_of_risc(void);
156unsigned int qe_get_num_of_snums(void); 156unsigned int qe_get_num_of_snums(void);
157int qe_alive_during_sleep(void);
157 158
158/* we actually use cpm_muram implementation, define this for convenience */ 159/* we actually use cpm_muram implementation, define this for convenience */
159#define qe_muram_init cpm_muram_init 160#define qe_muram_init cpm_muram_init
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 1e5cfad0e3f7..3ab8b3e6feb0 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -64,4 +64,7 @@
64#define SO_TIMESTAMPING 37 64#define SO_TIMESTAMPING 37
65#define SCM_TIMESTAMPING SO_TIMESTAMPING 65#define SCM_TIMESTAMPING SO_TIMESTAMPING
66 66
67#define SO_PROTOCOL 38
68#define SO_DOMAIN 39
69
67#endif /* _ASM_POWERPC_SOCKET_H */ 70#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 237e3654f48c..464271bea6c9 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -65,6 +65,19 @@ static unsigned int qe_num_of_snum;
65 65
66static phys_addr_t qebase = -1; 66static phys_addr_t qebase = -1;
67 67
68int qe_alive_during_sleep(void)
69{
70 static int ret = -1;
71
72 if (ret != -1)
73 return ret;
74
75 ret = !of_find_compatible_node(NULL, NULL, "fsl,mpc8569-pmc");
76
77 return ret;
78}
79EXPORT_SYMBOL(qe_alive_during_sleep);
80
68phys_addr_t get_qe_base(void) 81phys_addr_t get_qe_base(void)
69{ 82{
70 struct device_node *qe; 83 struct device_node *qe;
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index 02330c50241b..e42df89a0b85 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -65,4 +65,7 @@
65#define SO_TIMESTAMPING 37 65#define SO_TIMESTAMPING 37
66#define SCM_TIMESTAMPING SO_TIMESTAMPING 66#define SCM_TIMESTAMPING SO_TIMESTAMPING
67 67
68#define SO_PROTOCOL 38
69#define SO_DOMAIN 39
70
68#endif /* _ASM_SOCKET_H */ 71#endif /* _ASM_SOCKET_H */
diff --git a/arch/sh/include/asm/sh_eth.h b/arch/sh/include/asm/sh_eth.h
index bb832584f3c1..acf99700deed 100644
--- a/arch/sh/include/asm/sh_eth.h
+++ b/arch/sh/include/asm/sh_eth.h
@@ -6,6 +6,9 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
6struct sh_eth_plat_data { 6struct sh_eth_plat_data {
7 int phy; 7 int phy;
8 int edmac_endian; 8 int edmac_endian;
9
10 unsigned no_ether_link:1;
11 unsigned ether_link_active_low:1;
9}; 12};
10 13
11#endif 14#endif
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 982a12f959f4..3a5ae3d12088 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -29,6 +29,9 @@
29#define SO_RCVBUFFORCE 0x100b 29#define SO_RCVBUFFORCE 0x100b
30#define SO_ERROR 0x1007 30#define SO_ERROR 0x1007
31#define SO_TYPE 0x1008 31#define SO_TYPE 0x1008
32#define SO_PROTOCOL 0x1028
33#define SO_DOMAIN 0x1029
34
32 35
33/* Linux specific, keep the same. */ 36/* Linux specific, keep the same. */
34#define SO_NO_CHECK 0x000b 37#define SO_NO_CHECK 0x000b
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index aed94869ad6a..e7061138c98a 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -121,7 +121,7 @@ SIGN2(sys32_syslog, sys_syslog, %o0, %o2)
121SIGN1(sys32_umask, sys_umask, %o0) 121SIGN1(sys32_umask, sys_umask, %o0)
122SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2) 122SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2)
123SIGN1(sys32_sendto, sys_sendto, %o0) 123SIGN1(sys32_sendto, sys_sendto, %o0)
124SIGN1(sys32_recvfrom, sys_recvfrom, %o0) 124SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
125SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2) 125SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2)
126SIGN2(sys32_connect, sys_connect, %o0, %o2) 126SIGN2(sys32_connect, sys_connect, %o0, %o2)
127SIGN2(sys32_bind, sys_bind, %o0, %o2) 127SIGN2(sys32_bind, sys_bind, %o0, %o2)
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 3b44b47c7e1d..f114813ae258 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -245,7 +245,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
245 245
246 dev_kfree_skb(skb); 246 dev_kfree_skb(skb);
247 247
248 return 0; 248 return NETDEV_TX_OK;
249} 249}
250 250
251static void uml_net_set_multicast_list(struct net_device *dev) 251static void uml_net_set_multicast_list(struct net_device *dev)
@@ -285,7 +285,7 @@ static void uml_net_get_drvinfo(struct net_device *dev,
285 strcpy(info->version, "42"); 285 strcpy(info->version, "42");
286} 286}
287 287
288static struct ethtool_ops uml_net_ethtool_ops = { 288static const struct ethtool_ops uml_net_ethtool_ops = {
289 .get_drvinfo = uml_net_get_drvinfo, 289 .get_drvinfo = uml_net_get_drvinfo,
290 .get_link = ethtool_op_get_link, 290 .get_link = ethtool_op_get_link,
291}; 291};
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8130334329c0..527519b8a9f9 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -262,6 +262,15 @@ config MCORE2
262 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 262 family in /proc/cpuinfo. Newer ones have 6 and older ones 15
263 (not a typo) 263 (not a typo)
264 264
265config MATOM
266 bool "Intel Atom"
267 ---help---
268
269 Select this for the Intel Atom platform. Intel Atom CPUs have an
270 in-order pipelining architecture and thus can benefit from
271 accordingly optimized code. Use a recent GCC with specific Atom
272 support in order to fully benefit from selecting this option.
273
265config GENERIC_CPU 274config GENERIC_CPU
266 bool "Generic-x86-64" 275 bool "Generic-x86-64"
267 depends on X86_64 276 depends on X86_64
@@ -295,7 +304,7 @@ config X86_CPU
295config X86_L1_CACHE_BYTES 304config X86_L1_CACHE_BYTES
296 int 305 int
297 default "128" if MPSC 306 default "128" if MPSC
298 default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 307 default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32
299 308
300config X86_INTERNODE_CACHE_BYTES 309config X86_INTERNODE_CACHE_BYTES
301 int 310 int
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
310 default "7" if MPENTIUM4 || MPSC 319 default "7" if MPENTIUM4 || MPSC
311 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 320 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
312 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 321 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
313 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU 322 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
314 323
315config X86_XADD 324config X86_XADD
316 def_bool y 325 def_bool y
@@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY
359 368
360config X86_USE_PPRO_CHECKSUM 369config X86_USE_PPRO_CHECKSUM
361 def_bool y 370 def_bool y
362 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 371 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
363 372
364config X86_USE_3DNOW 373config X86_USE_3DNOW
365 def_bool y 374 def_bool y
@@ -387,7 +396,7 @@ config X86_P6_NOP
387 396
388config X86_TSC 397config X86_TSC
389 def_bool y 398 def_bool y
390 depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 399 depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
391 400
392config X86_CMPXCHG64 401config X86_CMPXCHG64
393 def_bool y 402 def_bool y
@@ -397,7 +406,7 @@ config X86_CMPXCHG64
397# generates cmov. 406# generates cmov.
398config X86_CMOV 407config X86_CMOV
399 def_bool y 408 def_bool y
400 depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) 409 depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
401 410
402config X86_MINIMUM_CPU_FAMILY 411config X86_MINIMUM_CPU_FAMILY
403 int 412 int
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1b68659c41b4..7983c420eaf2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -32,8 +32,8 @@ ifeq ($(CONFIG_X86_32),y)
32 32
33 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use 33 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
34 # a lot more stack due to the lack of sharing of stacklots: 34 # a lot more stack due to the lack of sharing of stacklots:
35 KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ 35 KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
36 echo $(call cc-option,-fno-unit-at-a-time); fi ;) 36 $(call cc-option,-fno-unit-at-a-time))
37 37
38 # CPU-specific tuning. Anything which can be shared with UML should go here. 38 # CPU-specific tuning. Anything which can be shared with UML should go here.
39 include $(srctree)/arch/x86/Makefile_32.cpu 39 include $(srctree)/arch/x86/Makefile_32.cpu
@@ -55,6 +55,8 @@ else
55 55
56 cflags-$(CONFIG_MCORE2) += \ 56 cflags-$(CONFIG_MCORE2) += \
57 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) 57 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
58 cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
59 $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
58 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) 60 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
59 KBUILD_CFLAGS += $(cflags-y) 61 KBUILD_CFLAGS += $(cflags-y)
60 62
@@ -72,7 +74,7 @@ endif
72 74
73ifdef CONFIG_CC_STACKPROTECTOR 75ifdef CONFIG_CC_STACKPROTECTOR
74 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh 76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
75 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) 77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
76 stackp-y := -fstack-protector 78 stackp-y := -fstack-protector
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all 79 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
78 KBUILD_CFLAGS += $(stackp-y) 80 KBUILD_CFLAGS += $(stackp-y)
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 80177ec052f0..30e9a264f69d 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -33,6 +33,8 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-f
33cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) 33cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
34cflags-$(CONFIG_MVIAC7) += -march=i686 34cflags-$(CONFIG_MVIAC7) += -march=i686
35cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) 35cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
36cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
37 $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
36 38
37# AMD Elan support 39# AMD Elan support
38cflags-$(CONFIG_X86_ELAN) += -march=i486 40cflags-$(CONFIG_X86_ELAN) += -march=i486
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 275dd177f198..11e8c6eb80a1 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -31,7 +31,6 @@ static inline void vesa_store_mode_params_graphics(void) {}
31 31
32static int vesa_probe(void) 32static int vesa_probe(void)
33{ 33{
34#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
35 struct biosregs ireg, oreg; 34 struct biosregs ireg, oreg;
36 u16 mode; 35 u16 mode;
37 addr_t mode_ptr; 36 addr_t mode_ptr;
@@ -49,8 +48,7 @@ static int vesa_probe(void)
49 vginfo.signature != VESA_MAGIC || 48 vginfo.signature != VESA_MAGIC ||
50 vginfo.version < 0x0102) 49 vginfo.version < 0x0102)
51 return 0; /* Not present */ 50 return 0; /* Not present */
52#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */ 51
53#ifdef CONFIG_VIDEO_VESA
54 set_fs(vginfo.video_mode_ptr.seg); 52 set_fs(vginfo.video_mode_ptr.seg);
55 mode_ptr = vginfo.video_mode_ptr.off; 53 mode_ptr = vginfo.video_mode_ptr.off;
56 54
@@ -102,9 +100,6 @@ static int vesa_probe(void)
102 } 100 }
103 101
104 return nmodes; 102 return nmodes;
105#else
106 return 0;
107#endif /* CONFIG_VIDEO_VESA */
108} 103}
109 104
110static int vesa_set_mode(struct mode_info *mode) 105static int vesa_set_mode(struct mode_info *mode)
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 8f8d827e254d..819caa1f2008 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -47,14 +47,6 @@ static u8 vga_set_basic_mode(void)
47 47
48 initregs(&ireg); 48 initregs(&ireg);
49 49
50#ifdef CONFIG_VIDEO_400_HACK
51 if (adapter >= ADAPTER_VGA) {
52 ireg.ax = 0x1202;
53 ireg.bx = 0x0030;
54 intcall(0x10, &ireg, NULL);
55 }
56#endif
57
58 ax = 0x0f00; 50 ax = 0x0f00;
59 intcall(0x10, &ireg, &oreg); 51 intcall(0x10, &ireg, &oreg);
60 mode = oreg.al; 52 mode = oreg.al;
@@ -62,11 +54,9 @@ static u8 vga_set_basic_mode(void)
62 set_fs(0); 54 set_fs(0);
63 rows = rdfs8(0x484); /* rows minus one */ 55 rows = rdfs8(0x484); /* rows minus one */
64 56
65#ifndef CONFIG_VIDEO_400_HACK
66 if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && 57 if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
67 (rows == 0 || rows == 24)) 58 (rows == 0 || rows == 24))
68 return mode; 59 return mode;
69#endif
70 60
71 if (mode != 3 && mode != 7) 61 if (mode != 3 && mode != 7)
72 mode = 3; 62 mode = 3;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index bad728b76fc2..d42da3802499 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -221,7 +221,6 @@ static unsigned int mode_menu(void)
221 } 221 }
222} 222}
223 223
224#ifdef CONFIG_VIDEO_RETAIN
225/* Save screen content to the heap */ 224/* Save screen content to the heap */
226static struct saved_screen { 225static struct saved_screen {
227 int x, y; 226 int x, y;
@@ -299,10 +298,6 @@ static void restore_screen(void)
299 ireg.dl = saved.curx; 298 ireg.dl = saved.curx;
300 intcall(0x10, &ireg, NULL); 299 intcall(0x10, &ireg, NULL);
301} 300}
302#else
303#define save_screen() ((void)0)
304#define restore_screen() ((void)0)
305#endif
306 301
307void set_video(void) 302void set_video(void)
308{ 303{
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 5bb174a997fc..ff339c5db311 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -17,19 +17,8 @@
17 17
18#include <linux/types.h> 18#include <linux/types.h>
19 19
20/* Enable autodetection of SVGA adapters and modes. */ 20/*
21#undef CONFIG_VIDEO_SVGA 21 * This code uses an extended set of video mode numbers. These include:
22
23/* Enable autodetection of VESA modes */
24#define CONFIG_VIDEO_VESA
25
26/* Retain screen contents when switching modes */
27#define CONFIG_VIDEO_RETAIN
28
29/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
30#undef CONFIG_VIDEO_400_HACK
31
32/* This code uses an extended set of video mode numbers. These include:
33 * Aliases for standard modes 22 * Aliases for standard modes
34 * NORMAL_VGA (-1) 23 * NORMAL_VGA (-1)
35 * EXTENDED_VGA (-2) 24 * EXTENDED_VGA (-2)
@@ -67,13 +56,8 @@
67/* The "recalculate timings" flag */ 56/* The "recalculate timings" flag */
68#define VIDEO_RECALC 0x8000 57#define VIDEO_RECALC 0x8000
69 58
70/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
71#ifdef CONFIG_VIDEO_RETAIN
72void store_screen(void); 59void store_screen(void);
73#define DO_STORE() store_screen() 60#define DO_STORE() store_screen()
74#else
75#define DO_STORE() ((void)0)
76#endif /* CONFIG_VIDEO_RETAIN */
77 61
78/* 62/*
79 * Mode table structures 63 * Mode table structures
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index d3ec8d588d4b..585edebe12cf 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -59,13 +59,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
59asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 59asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
60 const u8 *in, unsigned int len, u8 *iv); 60 const u8 *in, unsigned int len, u8 *iv);
61 61
62static inline int kernel_fpu_using(void)
63{
64 if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
65 return 1;
66 return 0;
67}
68
69static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) 62static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
70{ 63{
71 unsigned long addr = (unsigned long)raw_ctx; 64 unsigned long addr = (unsigned long)raw_ctx;
@@ -89,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
89 return -EINVAL; 82 return -EINVAL;
90 } 83 }
91 84
92 if (kernel_fpu_using()) 85 if (irq_fpu_usable())
93 err = crypto_aes_expand_key(ctx, in_key, key_len); 86 err = crypto_aes_expand_key(ctx, in_key, key_len);
94 else { 87 else {
95 kernel_fpu_begin(); 88 kernel_fpu_begin();
@@ -110,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
110{ 103{
111 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
112 105
113 if (kernel_fpu_using()) 106 if (irq_fpu_usable())
114 crypto_aes_encrypt_x86(ctx, dst, src); 107 crypto_aes_encrypt_x86(ctx, dst, src);
115 else { 108 else {
116 kernel_fpu_begin(); 109 kernel_fpu_begin();
@@ -123,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
123{ 116{
124 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
125 118
126 if (kernel_fpu_using()) 119 if (irq_fpu_usable())
127 crypto_aes_decrypt_x86(ctx, dst, src); 120 crypto_aes_decrypt_x86(ctx, dst, src);
128 else { 121 else {
129 kernel_fpu_begin(); 122 kernel_fpu_begin();
@@ -349,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
349 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
350 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
351 344
352 if (kernel_fpu_using()) { 345 if (irq_fpu_usable()) {
353 struct ablkcipher_request *cryptd_req = 346 struct ablkcipher_request *cryptd_req =
354 ablkcipher_request_ctx(req); 347 ablkcipher_request_ctx(req);
355 memcpy(cryptd_req, req, sizeof(*req)); 348 memcpy(cryptd_req, req, sizeof(*req));
@@ -370,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
370 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
371 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
372 365
373 if (kernel_fpu_using()) { 366 if (irq_fpu_usable()) {
374 struct ablkcipher_request *cryptd_req = 367 struct ablkcipher_request *cryptd_req =
375 ablkcipher_request_ctx(req); 368 ablkcipher_request_ctx(req);
376 memcpy(cryptd_req, req, sizeof(*req)); 369 memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e590261ba059..ba331bfd1112 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -537,7 +537,7 @@ ia32_sys_call_table:
537 .quad sys_mkdir 537 .quad sys_mkdir
538 .quad sys_rmdir /* 40 */ 538 .quad sys_rmdir /* 40 */
539 .quad sys_dup 539 .quad sys_dup
540 .quad sys32_pipe 540 .quad sys_pipe
541 .quad compat_sys_times 541 .quad compat_sys_times
542 .quad quiet_ni_syscall /* old prof syscall holder */ 542 .quad quiet_ni_syscall /* old prof syscall holder */
543 .quad sys_brk /* 45 */ 543 .quad sys_brk /* 45 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 085a8c35f149..9f5527198825 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -189,20 +189,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
189 return sys_mprotect(start, len, prot); 189 return sys_mprotect(start, len, prot);
190} 190}
191 191
192asmlinkage long sys32_pipe(int __user *fd)
193{
194 int retval;
195 int fds[2];
196
197 retval = do_pipe_flags(fds, 0);
198 if (retval)
199 goto out;
200 if (copy_to_user(fd, fds, sizeof(fds)))
201 retval = -EFAULT;
202out:
203 return retval;
204}
205
206asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, 192asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
207 struct sigaction32 __user *oact, 193 struct sigaction32 __user *oact,
208 unsigned int sigsetsize) 194 unsigned int sigsetsize)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1a37bcdc8606..c240efc74e00 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
73static inline void alternatives_smp_switch(int smp) {} 73static inline void alternatives_smp_switch(int smp) {}
74#endif /* CONFIG_SMP */ 74#endif /* CONFIG_SMP */
75 75
76const unsigned char *const *find_nop_table(void);
77
78/* alternative assembly primitive: */ 76/* alternative assembly primitive: */
79#define ALTERNATIVE(oldinstr, newinstr, feature) \ 77#define ALTERNATIVE(oldinstr, newinstr, feature) \
80 \ 78 \
@@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
144#define __parainstructions_end NULL 142#define __parainstructions_end NULL
145#endif 143#endif
146 144
147extern void add_nops(void *insns, unsigned int len);
148
149/* 145/*
150 * Clear and restore the kernel write-protection flag on the local CPU. 146 * Clear and restore the kernel write-protection flag on the local CPU.
151 * Allows the kernel to edit read-only pages. 147 * Allows the kernel to edit read-only pages.
@@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len);
161 * Intel's errata. 157 * Intel's errata.
162 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 158 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
163 * inconsistent instruction while you patch. 159 * inconsistent instruction while you patch.
164 * The _early version expects the memory to already be RW.
165 */ 160 */
166
167extern void *text_poke(void *addr, const void *opcode, size_t len); 161extern void *text_poke(void *addr, const void *opcode, size_t len);
168extern void *text_poke_early(void *addr, const void *opcode, size_t len);
169 162
170#endif /* _ASM_X86_ALTERNATIVE_H */ 163#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bb7d47925847..586b7adb8e53 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -183,6 +183,10 @@ static inline int x2apic_enabled(void)
183} 183}
184 184
185#define x2apic_supported() (cpu_has_x2apic) 185#define x2apic_supported() (cpu_has_x2apic)
186static inline void x2apic_force_phys(void)
187{
188 x2apic_phys = 1;
189}
186#else 190#else
187static inline void check_x2apic(void) 191static inline void check_x2apic(void)
188{ 192{
@@ -194,6 +198,9 @@ static inline int x2apic_enabled(void)
194{ 198{
195 return 0; 199 return 0;
196} 200}
201static inline void x2apic_force_phys(void)
202{
203}
197 204
198#define x2apic_preenabled 0 205#define x2apic_preenabled 0
199#define x2apic_supported() 0 206#define x2apic_supported() 0
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7ddb36ab933b..7386bfa4f4bc 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -8,7 +8,8 @@
8 * Ingo Molnar <mingo@redhat.com>, 1999, 2000 8 * Ingo Molnar <mingo@redhat.com>, 1999, 2000
9 */ 9 */
10 10
11#define APIC_DEFAULT_PHYS_BASE 0xfee00000 11#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000
12#define APIC_DEFAULT_PHYS_BASE 0xfee00000
12 13
13#define APIC_ID 0x20 14#define APIC_ID 0x20
14 15
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 56be78f582f0..b3ed1e1460ff 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,7 +3,7 @@
3 3
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5# define __ASM_FORM(x) x 5# define __ASM_FORM(x) x
6# define __ASM_EX_SEC .section __ex_table 6# define __ASM_EX_SEC .section __ex_table, "a"
7#else 7#else
8# define __ASM_FORM(x) " " #x " " 8# define __ASM_FORM(x) " " #x " "
9# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" 9# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
@@ -38,10 +38,18 @@
38#define _ASM_DI __ASM_REG(di) 38#define _ASM_DI __ASM_REG(di)
39 39
40/* Exception table entry */ 40/* Exception table entry */
41#ifdef __ASSEMBLY__
42# define _ASM_EXTABLE(from,to) \
43 __ASM_EX_SEC ; \
44 _ASM_ALIGN ; \
45 _ASM_PTR from , to ; \
46 .previous
47#else
41# define _ASM_EXTABLE(from,to) \ 48# define _ASM_EXTABLE(from,to) \
42 __ASM_EX_SEC \ 49 __ASM_EX_SEC \
43 _ASM_ALIGN "\n" \ 50 _ASM_ALIGN "\n" \
44 _ASM_PTR #from "," #to "\n" \ 51 _ASM_PTR #from "," #to "\n" \
45 " .previous\n" 52 " .previous\n"
53#endif
46 54
47#endif /* _ASM_X86_ASM_H */ 55#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4a28d22d4793..847fee6493a2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -95,6 +95,7 @@
95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ 95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
96#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ 96#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
97#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ 97#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
98#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
98 99
99/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 100/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
100#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 101#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index c68c361697e1..4d447b732d82 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
11 11
12static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
13{ 13{
14 return percpu_read(current_task); 14 return percpu_read_stable(current_task);
15} 15}
16 16
17#define current get_current() 17#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c993e9e0fed4..e8de2f6f5ca5 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -291,11 +291,24 @@ static inline unsigned long get_desc_base(const struct desc_struct *desc)
291 return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); 291 return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
292} 292}
293 293
294static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
295{
296 desc->base0 = base & 0xffff;
297 desc->base1 = (base >> 16) & 0xff;
298 desc->base2 = (base >> 24) & 0xff;
299}
300
294static inline unsigned long get_desc_limit(const struct desc_struct *desc) 301static inline unsigned long get_desc_limit(const struct desc_struct *desc)
295{ 302{
296 return desc->limit0 | (desc->limit << 16); 303 return desc->limit0 | (desc->limit << 16);
297} 304}
298 305
306static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
307{
308 desc->limit0 = limit & 0xffff;
309 desc->limit = (limit >> 16) & 0xf;
310}
311
299static inline void _set_gate(int gate, unsigned type, void *addr, 312static inline void _set_gate(int gate, unsigned type, void *addr,
300 unsigned dpl, unsigned ist, unsigned seg) 313 unsigned dpl, unsigned ist, unsigned seg)
301{ 314{
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a6adefa28b94..9d6684849fd9 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -34,6 +34,12 @@ struct desc_struct {
34 }; 34 };
35} __attribute__((packed)); 35} __attribute__((packed));
36 36
37#define GDT_ENTRY_INIT(flags, base, limit) { { { \
38 .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
39 .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
40 ((limit) & 0xf0000) | ((base) & 0xff000000), \
41 } } }
42
37enum { 43enum {
38 GATE_INTERRUPT = 0xE, 44 GATE_INTERRUPT = 0xE,
39 GATE_TRAP = 0xF, 45 GATE_TRAP = 0xF,
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 3afc5e87cfdd..ae6253ab9029 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -87,9 +87,25 @@
87 CFI_RESTORE \reg 87 CFI_RESTORE \reg
88 .endm 88 .endm
89#else /*!CONFIG_X86_64*/ 89#else /*!CONFIG_X86_64*/
90 .macro pushl_cfi reg
91 pushl \reg
92 CFI_ADJUST_CFA_OFFSET 4
93 .endm
90 94
91 /* 32bit defenitions are missed yet */ 95 .macro popl_cfi reg
96 popl \reg
97 CFI_ADJUST_CFA_OFFSET -4
98 .endm
92 99
100 .macro movl_cfi reg offset=0
101 movl %\reg, \offset(%esp)
102 CFI_REL_OFFSET \reg, \offset
103 .endm
104
105 .macro movl_cfi_restore offset reg
106 movl \offset(%esp), %\reg
107 CFI_RESTORE \reg
108 .endm
93#endif /*!CONFIG_X86_64*/ 109#endif /*!CONFIG_X86_64*/
94#endif /*__ASSEMBLY__*/ 110#endif /*__ASSEMBLY__*/
95 111
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 175adf58dd4f..0b20bbb758f2 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -26,6 +26,7 @@ extern void fpu_init(void);
26extern void mxcsr_feature_mask_init(void); 26extern void mxcsr_feature_mask_init(void);
27extern int init_fpu(struct task_struct *child); 27extern int init_fpu(struct task_struct *child);
28extern asmlinkage void math_state_restore(void); 28extern asmlinkage void math_state_restore(void);
29extern void __math_state_restore(void);
29extern void init_thread_xstate(void); 30extern void init_thread_xstate(void);
30extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 31extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
31 32
@@ -301,6 +302,14 @@ static inline void kernel_fpu_end(void)
301 preempt_enable(); 302 preempt_enable();
302} 303}
303 304
305static inline bool irq_fpu_usable(void)
306{
307 struct pt_regs *regs;
308
309 return !in_interrupt() || !(regs = get_irq_regs()) || \
310 user_mode(regs) || (read_cr0() & X86_CR0_TS);
311}
312
304/* 313/*
305 * Some instructions like VIA's padlock instructions generate a spurious 314 * Some instructions like VIA's padlock instructions generate a spurious
306 * DNA fault but don't modify SSE registers. And these instructions 315 * DNA fault but don't modify SSE registers. And these instructions
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 330ee807f89e..85232d32fcb8 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,11 +150,10 @@ extern int timer_through_8259;
150#define io_apic_assign_pci_irqs \ 150#define io_apic_assign_pci_irqs \
151 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 151 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
152 152
153#ifdef CONFIG_ACPI 153extern u8 io_apic_unique_id(u8 id);
154extern int io_apic_get_unique_id(int ioapic, int apic_id); 154extern int io_apic_get_unique_id(int ioapic, int apic_id);
155extern int io_apic_get_version(int ioapic); 155extern int io_apic_get_version(int ioapic);
156extern int io_apic_get_redir_entries(int ioapic); 156extern int io_apic_get_redir_entries(int ioapic);
157#endif /* CONFIG_ACPI */
158 157
159struct io_apic_irq_attr; 158struct io_apic_irq_attr;
160extern int io_apic_set_pci_routing(struct device *dev, int irq, 159extern int io_apic_set_pci_routing(struct device *dev, int irq,
@@ -177,6 +176,16 @@ extern int setup_ioapic_entry(int apic, int irq,
177 int polarity, int vector, int pin); 176 int polarity, int vector, int pin);
178extern void ioapic_write_entry(int apic, int pin, 177extern void ioapic_write_entry(int apic, int pin,
179 struct IO_APIC_route_entry e); 178 struct IO_APIC_route_entry e);
179
180struct mp_ioapic_gsi{
181 int gsi_base;
182 int gsi_end;
183};
184extern struct mp_ioapic_gsi mp_gsi_routing[];
185int mp_find_ioapic(int gsi);
186int mp_find_ioapic_pin(int ioapic, int gsi);
187void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
188
180#else /* !CONFIG_X86_IO_APIC */ 189#else /* !CONFIG_X86_IO_APIC */
181#define io_apic_assign_pci_irqs 0 190#define io_apic_assign_pci_irqs 0
182static const int timer_through_8259 = 0; 191static const int timer_through_8259 = 0;
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h
index 0d5b23b7b06e..ec34c760665e 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/asm/ioctls.h
@@ -1,94 +1 @@
1#ifndef _ASM_X86_IOCTLS_H #include <asm-generic/ioctls.h>
2#define _ASM_X86_IOCTLS_H
3
4#include <asm/ioctl.h>
5
6/* 0x54 is just a magic number to make these relatively unique ('T') */
7
8#define TCGETS 0x5401
9#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
10#define TCSETSW 0x5403
11#define TCSETSF 0x5404
12#define TCGETA 0x5405
13#define TCSETA 0x5406
14#define TCSETAW 0x5407
15#define TCSETAF 0x5408
16#define TCSBRK 0x5409
17#define TCXONC 0x540A
18#define TCFLSH 0x540B
19#define TIOCEXCL 0x540C
20#define TIOCNXCL 0x540D
21#define TIOCSCTTY 0x540E
22#define TIOCGPGRP 0x540F
23#define TIOCSPGRP 0x5410
24#define TIOCOUTQ 0x5411
25#define TIOCSTI 0x5412
26#define TIOCGWINSZ 0x5413
27#define TIOCSWINSZ 0x5414
28#define TIOCMGET 0x5415
29#define TIOCMBIS 0x5416
30#define TIOCMBIC 0x5417
31#define TIOCMSET 0x5418
32#define TIOCGSOFTCAR 0x5419
33#define TIOCSSOFTCAR 0x541A
34#define FIONREAD 0x541B
35#define TIOCINQ FIONREAD
36#define TIOCLINUX 0x541C
37#define TIOCCONS 0x541D
38#define TIOCGSERIAL 0x541E
39#define TIOCSSERIAL 0x541F
40#define TIOCPKT 0x5420
41#define FIONBIO 0x5421
42#define TIOCNOTTY 0x5422
43#define TIOCSETD 0x5423
44#define TIOCGETD 0x5424
45#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
46/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
47#define TIOCSBRK 0x5427 /* BSD compatibility */
48#define TIOCCBRK 0x5428 /* BSD compatibility */
49#define TIOCGSID 0x5429 /* Return the session ID of FD */
50#define TCGETS2 _IOR('T', 0x2A, struct termios2)
51#define TCSETS2 _IOW('T', 0x2B, struct termios2)
52#define TCSETSW2 _IOW('T', 0x2C, struct termios2)
53#define TCSETSF2 _IOW('T', 0x2D, struct termios2)
54#define TIOCGRS485 0x542E
55#define TIOCSRS485 0x542F
56#define TIOCGPTN _IOR('T', 0x30, unsigned int)
57 /* Get Pty Number (of pty-mux device) */
58#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */
59#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */
60#define TCSETX 0x5433
61#define TCSETXF 0x5434
62#define TCSETXW 0x5435
63
64#define FIONCLEX 0x5450
65#define FIOCLEX 0x5451
66#define FIOASYNC 0x5452
67#define TIOCSERCONFIG 0x5453
68#define TIOCSERGWILD 0x5454
69#define TIOCSERSWILD 0x5455
70#define TIOCGLCKTRMIOS 0x5456
71#define TIOCSLCKTRMIOS 0x5457
72#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
73#define TIOCSERGETLSR 0x5459 /* Get line status register */
74#define TIOCSERGETMULTI 0x545A /* Get multiport config */
75#define TIOCSERSETMULTI 0x545B /* Set multiport config */
76
77#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
78#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
79#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */
80#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */
81#define FIOQSIZE 0x5460
82
83/* Used for packet mode */
84#define TIOCPKT_DATA 0
85#define TIOCPKT_FLUSHREAD 1
86#define TIOCPKT_FLUSHWRITE 2
87#define TIOCPKT_STOP 4
88#define TIOCPKT_START 8
89#define TIOCPKT_NOSTOP 16
90#define TIOCPKT_DOSTOP 32
91
92#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
93
94#endif /* _ASM_X86_IOCTLS_H */
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h
index ee678fd51594..84c7e51cb6d0 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/asm/ipcbuf.h
@@ -1,28 +1 @@
1#ifndef _ASM_X86_IPCBUF_H #include <asm-generic/ipcbuf.h>
2#define _ASM_X86_IPCBUF_H
3
4/*
5 * The ipc64_perm structure for x86 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space is left for:
10 * - 32-bit mode_t and seq
11 * - 2 miscellaneous 32-bit values
12 */
13
14struct ipc64_perm {
15 __kernel_key_t key;
16 __kernel_uid32_t uid;
17 __kernel_gid32_t gid;
18 __kernel_uid32_t cuid;
19 __kernel_gid32_t cgid;
20 __kernel_mode_t mode;
21 unsigned short __pad1;
22 unsigned short seq;
23 unsigned short __pad2;
24 unsigned long __unused1;
25 unsigned long __unused2;
26};
27
28#endif /* _ASM_X86_IPCBUF_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c6ccbe7e81ad..9e2b952f810a 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,14 +13,13 @@ static inline unsigned long native_save_fl(void)
13 unsigned long flags; 13 unsigned long flags;
14 14
15 /* 15 /*
16 * Note: this needs to be "=r" not "=rm", because we have the 16 * "=rm" is safe here, because "pop" adjusts the stack before
17 * stack offset from what gcc expects at the time the "pop" is 17 * it evaluates its effective address -- this is part of the
18 * executed, and so a memory reference with respect to the stack 18 * documented behavior of the "pop" instruction.
19 * would end up using the wrong address.
20 */ 19 */
21 asm volatile("# __raw_save_flags\n\t" 20 asm volatile("# __raw_save_flags\n\t"
22 "pushf ; pop %0" 21 "pushf ; pop %0"
23 : "=r" (flags) 22 : "=rm" (flags)
24 : /* no input */ 23 : /* no input */
25 : "memory"); 24 : "memory");
26 25
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 5136dad57cbb..0d97deba1e35 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -90,8 +90,9 @@ static inline void lguest_set_ts(void)
90} 90}
91 91
92/* Full 4G segment descriptors, suitable for CS and DS. */ 92/* Full 4G segment descriptors, suitable for CS and DS. */
93#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } }) 93#define FULL_EXEC_SEGMENT \
94#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } }) 94 ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff))
95#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff))
95 96
96#endif /* __ASSEMBLY__ */ 97#endif /* __ASSEMBLY__ */
97 98
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 751af2550ed9..593e51d4643f 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -1,20 +1,8 @@
1#ifndef _ASM_X86_MMAN_H 1#ifndef _ASM_X86_MMAN_H
2#define _ASM_X86_MMAN_H 2#define _ASM_X86_MMAN_H
3 3
4#include <asm-generic/mman-common.h>
5
6#define MAP_32BIT 0x40 /* only give out 32bit addresses */ 4#define MAP_32BIT 0x40 /* only give out 32bit addresses */
7 5
8#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ 6#include <asm-generic/mman.h>
9#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
10#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
11#define MAP_LOCKED 0x2000 /* pages are locked */
12#define MAP_NORESERVE 0x4000 /* don't check for reservations */
13#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
14#define MAP_NONBLOCK 0x10000 /* do not block on IO */
15#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
16
17#define MCL_CURRENT 1 /* lock all current mappings */
18#define MCL_FUTURE 2 /* lock all future mappings */
19 7
20#endif /* _ASM_X86_MMAN_H */ 8#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d62743c4d5..3e2ce58a31a3 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -1,18 +1,7 @@
1#ifndef _ASM_X86_MODULE_H 1#ifndef _ASM_X86_MODULE_H
2#define _ASM_X86_MODULE_H 2#define _ASM_X86_MODULE_H
3 3
4/* x86_32/64 are simple */ 4#include <asm-generic/module.h>
5struct mod_arch_specific {};
6
7#ifdef CONFIG_X86_32
8# define Elf_Shdr Elf32_Shdr
9# define Elf_Sym Elf32_Sym
10# define Elf_Ehdr Elf32_Ehdr
11#else
12# define Elf_Shdr Elf64_Shdr
13# define Elf_Sym Elf64_Sym
14# define Elf_Ehdr Elf64_Ehdr
15#endif
16 5
17#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
18/* X86_64 does not define MODULE_PROC_FAMILY */ 7/* X86_64 does not define MODULE_PROC_FAMILY */
@@ -28,6 +17,8 @@ struct mod_arch_specific {};
28#define MODULE_PROC_FAMILY "586MMX " 17#define MODULE_PROC_FAMILY "586MMX "
29#elif defined CONFIG_MCORE2 18#elif defined CONFIG_MCORE2
30#define MODULE_PROC_FAMILY "CORE2 " 19#define MODULE_PROC_FAMILY "CORE2 "
20#elif defined CONFIG_MATOM
21#define MODULE_PROC_FAMILY "ATOM "
31#elif defined CONFIG_M686 22#elif defined CONFIG_M686
32#define MODULE_PROC_FAMILY "686 " 23#define MODULE_PROC_FAMILY "686 "
33#elif defined CONFIG_MPENTIUMII 24#elif defined CONFIG_MPENTIUMII
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h
index 7e4e9481f51c..809134c644a6 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/asm/msgbuf.h
@@ -1,39 +1 @@
1#ifndef _ASM_X86_MSGBUF_H #include <asm-generic/msgbuf.h>
2#define _ASM_X86_MSGBUF_H
3
4/*
5 * The msqid64_ds structure for i386 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space on i386 is left for:
10 * - 64-bit time_t to solve y2038 problem
11 * - 2 miscellaneous 32-bit values
12 *
13 * Pad space on x8664 is left for:
14 * - 2 miscellaneous 64-bit values
15 */
16struct msqid64_ds {
17 struct ipc64_perm msg_perm;
18 __kernel_time_t msg_stime; /* last msgsnd time */
19#ifdef __i386__
20 unsigned long __unused1;
21#endif
22 __kernel_time_t msg_rtime; /* last msgrcv time */
23#ifdef __i386__
24 unsigned long __unused2;
25#endif
26 __kernel_time_t msg_ctime; /* last change time */
27#ifdef __i386__
28 unsigned long __unused3;
29#endif
30 unsigned long msg_cbytes; /* current number of bytes on queue */
31 unsigned long msg_qnum; /* number of messages in queue */
32 unsigned long msg_qbytes; /* max number of bytes on queue */
33 __kernel_pid_t msg_lspid; /* pid of last msgsnd */
34 __kernel_pid_t msg_lrpid; /* last receive pid */
35 unsigned long __unused4;
36 unsigned long __unused5;
37};
38
39#endif /* _ASM_X86_MSGBUF_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 48ad9d29484a..7e2b6ba962ff 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -3,10 +3,16 @@
3 3
4#include <asm/msr-index.h> 4#include <asm/msr-index.h>
5 5
6#ifdef __KERNEL__
7#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
8 7
9#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/ioctl.h>
10
11#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
12#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
13
14#ifdef __KERNEL__
15
10#include <asm/asm.h> 16#include <asm/asm.h>
11#include <asm/errno.h> 17#include <asm/errno.h>
12#include <asm/cpumask.h> 18#include <asm/cpumask.h>
@@ -67,23 +73,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
67 ".previous\n\t" 73 ".previous\n\t"
68 _ASM_EXTABLE(2b, 3b) 74 _ASM_EXTABLE(2b, 3b)
69 : [err] "=r" (*err), EAX_EDX_RET(val, low, high) 75 : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
70 : "c" (msr), [fault] "i" (-EFAULT)); 76 : "c" (msr), [fault] "i" (-EIO));
71 return EAX_EDX_VAL(val, low, high);
72}
73
74static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
75 int *err)
76{
77 DECLARE_ARGS(val, low, high);
78
79 asm volatile("2: rdmsr ; xor %0,%0\n"
80 "1:\n\t"
81 ".section .fixup,\"ax\"\n\t"
82 "3: mov %3,%0 ; jmp 1b\n\t"
83 ".previous\n\t"
84 _ASM_EXTABLE(2b, 3b)
85 : "=r" (*err), EAX_EDX_RET(val, low, high)
86 : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
87 return EAX_EDX_VAL(val, low, high); 77 return EAX_EDX_VAL(val, low, high);
88} 78}
89 79
@@ -106,13 +96,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
106 _ASM_EXTABLE(2b, 3b) 96 _ASM_EXTABLE(2b, 3b)
107 : [err] "=a" (err) 97 : [err] "=a" (err)
108 : "c" (msr), "0" (low), "d" (high), 98 : "c" (msr), "0" (low), "d" (high),
109 [fault] "i" (-EFAULT) 99 [fault] "i" (-EIO)
110 : "memory"); 100 : "memory");
111 return err; 101 return err;
112} 102}
113 103
114extern unsigned long long native_read_tsc(void); 104extern unsigned long long native_read_tsc(void);
115 105
106extern int native_rdmsr_safe_regs(u32 regs[8]);
107extern int native_wrmsr_safe_regs(u32 regs[8]);
108
116static __always_inline unsigned long long __native_read_tsc(void) 109static __always_inline unsigned long long __native_read_tsc(void)
117{ 110{
118 DECLARE_ARGS(val, low, high); 111 DECLARE_ARGS(val, low, high);
@@ -181,14 +174,44 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
181 *p = native_read_msr_safe(msr, &err); 174 *p = native_read_msr_safe(msr, &err);
182 return err; 175 return err;
183} 176}
177
184static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) 178static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
185{ 179{
180 u32 gprs[8] = { 0 };
186 int err; 181 int err;
187 182
188 *p = native_read_msr_amd_safe(msr, &err); 183 gprs[1] = msr;
184 gprs[7] = 0x9c5a203a;
185
186 err = native_rdmsr_safe_regs(gprs);
187
188 *p = gprs[0] | ((u64)gprs[2] << 32);
189
189 return err; 190 return err;
190} 191}
191 192
193static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
194{
195 u32 gprs[8] = { 0 };
196
197 gprs[0] = (u32)val;
198 gprs[1] = msr;
199 gprs[2] = val >> 32;
200 gprs[7] = 0x9c5a203a;
201
202 return native_wrmsr_safe_regs(gprs);
203}
204
205static inline int rdmsr_safe_regs(u32 regs[8])
206{
207 return native_rdmsr_safe_regs(regs);
208}
209
210static inline int wrmsr_safe_regs(u32 regs[8])
211{
212 return native_wrmsr_safe_regs(regs);
213}
214
192#define rdtscl(low) \ 215#define rdtscl(low) \
193 ((low) = (u32)__native_read_tsc()) 216 ((low) = (u32)__native_read_tsc())
194 217
@@ -228,6 +251,8 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
228void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); 251void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
229int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); 252int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
230int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); 253int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
254int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
255int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
231#else /* CONFIG_SMP */ 256#else /* CONFIG_SMP */
232static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) 257static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
233{ 258{
@@ -258,7 +283,15 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
258{ 283{
259 return wrmsr_safe(msr_no, l, h); 284 return wrmsr_safe(msr_no, l, h);
260} 285}
286static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
287{
288 return rdmsr_safe_regs(regs);
289}
290static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
291{
292 return wrmsr_safe_regs(regs);
293}
261#endif /* CONFIG_SMP */ 294#endif /* CONFIG_SMP */
262#endif /* __ASSEMBLY__ */
263#endif /* __KERNEL__ */ 295#endif /* __KERNEL__ */
296#endif /* __ASSEMBLY__ */
264#endif /* _ASM_X86_MSR_H */ 297#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h
index 6f0d0422f4ca..965d45427975 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/asm/param.h
@@ -1,22 +1 @@
1#ifndef _ASM_X86_PARAM_H #include <asm-generic/param.h>
2#define _ASM_X86_PARAM_H
3
4#ifdef __KERNEL__
5# define HZ CONFIG_HZ /* Internal kernel timer frequency */
6# define USER_HZ 100 /* some user interfaces are */
7# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */
8#endif
9
10#ifndef HZ
11#define HZ 100
12#endif
13
14#define EXEC_PAGESIZE 4096
15
16#ifndef NOGROUP
17#define NOGROUP (-1)
18#endif
19
20#define MAXHOSTNAMELEN 64 /* max length of hostname */
21
22#endif /* _ASM_X86_PARAM_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4fb37c8a0832..40d6586af25b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,689 +7,11 @@
7#include <asm/pgtable_types.h> 7#include <asm/pgtable_types.h>
8#include <asm/asm.h> 8#include <asm/asm.h>
9 9
10/* Bitmask of what can be clobbered: usually at least eax. */ 10#include <asm/paravirt_types.h>
11#define CLBR_NONE 0
12#define CLBR_EAX (1 << 0)
13#define CLBR_ECX (1 << 1)
14#define CLBR_EDX (1 << 2)
15#define CLBR_EDI (1 << 3)
16
17#ifdef CONFIG_X86_32
18/* CLBR_ANY should match all regs platform has. For i386, that's just it */
19#define CLBR_ANY ((1 << 4) - 1)
20
21#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
22#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
23#define CLBR_SCRATCH (0)
24#else
25#define CLBR_RAX CLBR_EAX
26#define CLBR_RCX CLBR_ECX
27#define CLBR_RDX CLBR_EDX
28#define CLBR_RDI CLBR_EDI
29#define CLBR_RSI (1 << 4)
30#define CLBR_R8 (1 << 5)
31#define CLBR_R9 (1 << 6)
32#define CLBR_R10 (1 << 7)
33#define CLBR_R11 (1 << 8)
34
35#define CLBR_ANY ((1 << 9) - 1)
36
37#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
38 CLBR_RCX | CLBR_R8 | CLBR_R9)
39#define CLBR_RET_REG (CLBR_RAX)
40#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
41
42#include <asm/desc_defs.h>
43#endif /* X86_64 */
44
45#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
46 11
47#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
48#include <linux/types.h> 13#include <linux/types.h>
49#include <linux/cpumask.h> 14#include <linux/cpumask.h>
50#include <asm/kmap_types.h>
51#include <asm/desc_defs.h>
52
53struct page;
54struct thread_struct;
55struct desc_ptr;
56struct tss_struct;
57struct mm_struct;
58struct desc_struct;
59struct task_struct;
60
61/*
62 * Wrapper type for pointers to code which uses the non-standard
63 * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
64 */
65struct paravirt_callee_save {
66 void *func;
67};
68
69/* general info */
70struct pv_info {
71 unsigned int kernel_rpl;
72 int shared_kernel_pmd;
73 int paravirt_enabled;
74 const char *name;
75};
76
77struct pv_init_ops {
78 /*
79 * Patch may replace one of the defined code sequences with
80 * arbitrary code, subject to the same register constraints.
81 * This generally means the code is not free to clobber any
82 * registers other than EAX. The patch function should return
83 * the number of bytes of code generated, as we nop pad the
84 * rest in generic code.
85 */
86 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
87 unsigned long addr, unsigned len);
88
89 /* Basic arch-specific setup */
90 void (*arch_setup)(void);
91 char *(*memory_setup)(void);
92 void (*post_allocator_init)(void);
93
94 /* Print a banner to identify the environment */
95 void (*banner)(void);
96};
97
98
99struct pv_lazy_ops {
100 /* Set deferred update mode, used for batching operations. */
101 void (*enter)(void);
102 void (*leave)(void);
103};
104
105struct pv_time_ops {
106 void (*time_init)(void);
107
108 /* Set and set time of day */
109 unsigned long (*get_wallclock)(void);
110 int (*set_wallclock)(unsigned long);
111
112 unsigned long long (*sched_clock)(void);
113 unsigned long (*get_tsc_khz)(void);
114};
115
116struct pv_cpu_ops {
117 /* hooks for various privileged instructions */
118 unsigned long (*get_debugreg)(int regno);
119 void (*set_debugreg)(int regno, unsigned long value);
120
121 void (*clts)(void);
122
123 unsigned long (*read_cr0)(void);
124 void (*write_cr0)(unsigned long);
125
126 unsigned long (*read_cr4_safe)(void);
127 unsigned long (*read_cr4)(void);
128 void (*write_cr4)(unsigned long);
129
130#ifdef CONFIG_X86_64
131 unsigned long (*read_cr8)(void);
132 void (*write_cr8)(unsigned long);
133#endif
134
135 /* Segment descriptor handling */
136 void (*load_tr_desc)(void);
137 void (*load_gdt)(const struct desc_ptr *);
138 void (*load_idt)(const struct desc_ptr *);
139 void (*store_gdt)(struct desc_ptr *);
140 void (*store_idt)(struct desc_ptr *);
141 void (*set_ldt)(const void *desc, unsigned entries);
142 unsigned long (*store_tr)(void);
143 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
144#ifdef CONFIG_X86_64
145 void (*load_gs_index)(unsigned int idx);
146#endif
147 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
148 const void *desc);
149 void (*write_gdt_entry)(struct desc_struct *,
150 int entrynum, const void *desc, int size);
151 void (*write_idt_entry)(gate_desc *,
152 int entrynum, const gate_desc *gate);
153 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
154 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
155
156 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
157
158 void (*set_iopl_mask)(unsigned mask);
159
160 void (*wbinvd)(void);
161 void (*io_delay)(void);
162
163 /* cpuid emulation, mostly so that caps bits can be disabled */
164 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
165 unsigned int *ecx, unsigned int *edx);
166
167 /* MSR, PMC and TSR operations.
168 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
169 u64 (*read_msr_amd)(unsigned int msr, int *err);
170 u64 (*read_msr)(unsigned int msr, int *err);
171 int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
172
173 u64 (*read_tsc)(void);
174 u64 (*read_pmc)(int counter);
175 unsigned long long (*read_tscp)(unsigned int *aux);
176
177 /*
178 * Atomically enable interrupts and return to userspace. This
179 * is only ever used to return to 32-bit processes; in a
180 * 64-bit kernel, it's used for 32-on-64 compat processes, but
181 * never native 64-bit processes. (Jump, not call.)
182 */
183 void (*irq_enable_sysexit)(void);
184
185 /*
186 * Switch to usermode gs and return to 64-bit usermode using
187 * sysret. Only used in 64-bit kernels to return to 64-bit
188 * processes. Usermode register state, including %rsp, must
189 * already be restored.
190 */
191 void (*usergs_sysret64)(void);
192
193 /*
194 * Switch to usermode gs and return to 32-bit usermode using
195 * sysret. Used to return to 32-on-64 compat processes.
196 * Other usermode register state, including %esp, must already
197 * be restored.
198 */
199 void (*usergs_sysret32)(void);
200
201 /* Normal iret. Jump to this with the standard iret stack
202 frame set up. */
203 void (*iret)(void);
204
205 void (*swapgs)(void);
206
207 void (*start_context_switch)(struct task_struct *prev);
208 void (*end_context_switch)(struct task_struct *next);
209};
210
211struct pv_irq_ops {
212 void (*init_IRQ)(void);
213
214 /*
215 * Get/set interrupt state. save_fl and restore_fl are only
216 * expected to use X86_EFLAGS_IF; all other bits
217 * returned from save_fl are undefined, and may be ignored by
218 * restore_fl.
219 *
220 * NOTE: These functions callers expect the callee to preserve
221 * more registers than the standard C calling convention.
222 */
223 struct paravirt_callee_save save_fl;
224 struct paravirt_callee_save restore_fl;
225 struct paravirt_callee_save irq_disable;
226 struct paravirt_callee_save irq_enable;
227
228 void (*safe_halt)(void);
229 void (*halt)(void);
230
231#ifdef CONFIG_X86_64
232 void (*adjust_exception_frame)(void);
233#endif
234};
235
236struct pv_apic_ops {
237#ifdef CONFIG_X86_LOCAL_APIC
238 void (*setup_boot_clock)(void);
239 void (*setup_secondary_clock)(void);
240
241 void (*startup_ipi_hook)(int phys_apicid,
242 unsigned long start_eip,
243 unsigned long start_esp);
244#endif
245};
246
247struct pv_mmu_ops {
248 /*
249 * Called before/after init_mm pagetable setup. setup_start
250 * may reset %cr3, and may pre-install parts of the pagetable;
251 * pagetable setup is expected to preserve any existing
252 * mapping.
253 */
254 void (*pagetable_setup_start)(pgd_t *pgd_base);
255 void (*pagetable_setup_done)(pgd_t *pgd_base);
256
257 unsigned long (*read_cr2)(void);
258 void (*write_cr2)(unsigned long);
259
260 unsigned long (*read_cr3)(void);
261 void (*write_cr3)(unsigned long);
262
263 /*
264 * Hooks for intercepting the creation/use/destruction of an
265 * mm_struct.
266 */
267 void (*activate_mm)(struct mm_struct *prev,
268 struct mm_struct *next);
269 void (*dup_mmap)(struct mm_struct *oldmm,
270 struct mm_struct *mm);
271 void (*exit_mmap)(struct mm_struct *mm);
272
273
274 /* TLB operations */
275 void (*flush_tlb_user)(void);
276 void (*flush_tlb_kernel)(void);
277 void (*flush_tlb_single)(unsigned long addr);
278 void (*flush_tlb_others)(const struct cpumask *cpus,
279 struct mm_struct *mm,
280 unsigned long va);
281
282 /* Hooks for allocating and freeing a pagetable top-level */
283 int (*pgd_alloc)(struct mm_struct *mm);
284 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
285
286 /*
287 * Hooks for allocating/releasing pagetable pages when they're
288 * attached to a pagetable
289 */
290 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
291 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
292 void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
293 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
294 void (*release_pte)(unsigned long pfn);
295 void (*release_pmd)(unsigned long pfn);
296 void (*release_pud)(unsigned long pfn);
297
298 /* Pagetable manipulation functions */
299 void (*set_pte)(pte_t *ptep, pte_t pteval);
300 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
301 pte_t *ptep, pte_t pteval);
302 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
303 void (*pte_update)(struct mm_struct *mm, unsigned long addr,
304 pte_t *ptep);
305 void (*pte_update_defer)(struct mm_struct *mm,
306 unsigned long addr, pte_t *ptep);
307
308 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
309 pte_t *ptep);
310 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
311 pte_t *ptep, pte_t pte);
312
313 struct paravirt_callee_save pte_val;
314 struct paravirt_callee_save make_pte;
315
316 struct paravirt_callee_save pgd_val;
317 struct paravirt_callee_save make_pgd;
318
319#if PAGETABLE_LEVELS >= 3
320#ifdef CONFIG_X86_PAE
321 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
322 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
323 pte_t *ptep);
324 void (*pmd_clear)(pmd_t *pmdp);
325
326#endif /* CONFIG_X86_PAE */
327
328 void (*set_pud)(pud_t *pudp, pud_t pudval);
329
330 struct paravirt_callee_save pmd_val;
331 struct paravirt_callee_save make_pmd;
332
333#if PAGETABLE_LEVELS == 4
334 struct paravirt_callee_save pud_val;
335 struct paravirt_callee_save make_pud;
336
337 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
338#endif /* PAGETABLE_LEVELS == 4 */
339#endif /* PAGETABLE_LEVELS >= 3 */
340
341#ifdef CONFIG_HIGHPTE
342 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
343#endif
344
345 struct pv_lazy_ops lazy_mode;
346
347 /* dom0 ops */
348
349 /* Sometimes the physical address is a pfn, and sometimes its
350 an mfn. We can tell which is which from the index. */
351 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
352 phys_addr_t phys, pgprot_t flags);
353};
354
355struct raw_spinlock;
356struct pv_lock_ops {
357 int (*spin_is_locked)(struct raw_spinlock *lock);
358 int (*spin_is_contended)(struct raw_spinlock *lock);
359 void (*spin_lock)(struct raw_spinlock *lock);
360 void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
361 int (*spin_trylock)(struct raw_spinlock *lock);
362 void (*spin_unlock)(struct raw_spinlock *lock);
363};
364
365/* This contains all the paravirt structures: we get a convenient
366 * number for each function using the offset which we use to indicate
367 * what to patch. */
368struct paravirt_patch_template {
369 struct pv_init_ops pv_init_ops;
370 struct pv_time_ops pv_time_ops;
371 struct pv_cpu_ops pv_cpu_ops;
372 struct pv_irq_ops pv_irq_ops;
373 struct pv_apic_ops pv_apic_ops;
374 struct pv_mmu_ops pv_mmu_ops;
375 struct pv_lock_ops pv_lock_ops;
376};
377
378extern struct pv_info pv_info;
379extern struct pv_init_ops pv_init_ops;
380extern struct pv_time_ops pv_time_ops;
381extern struct pv_cpu_ops pv_cpu_ops;
382extern struct pv_irq_ops pv_irq_ops;
383extern struct pv_apic_ops pv_apic_ops;
384extern struct pv_mmu_ops pv_mmu_ops;
385extern struct pv_lock_ops pv_lock_ops;
386
387#define PARAVIRT_PATCH(x) \
388 (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
389
390#define paravirt_type(op) \
391 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
392 [paravirt_opptr] "i" (&(op))
393#define paravirt_clobber(clobber) \
394 [paravirt_clobber] "i" (clobber)
395
396/*
397 * Generate some code, and mark it as patchable by the
398 * apply_paravirt() alternate instruction patcher.
399 */
400#define _paravirt_alt(insn_string, type, clobber) \
401 "771:\n\t" insn_string "\n" "772:\n" \
402 ".pushsection .parainstructions,\"a\"\n" \
403 _ASM_ALIGN "\n" \
404 _ASM_PTR " 771b\n" \
405 " .byte " type "\n" \
406 " .byte 772b-771b\n" \
407 " .short " clobber "\n" \
408 ".popsection\n"
409
410/* Generate patchable code, with the default asm parameters. */
411#define paravirt_alt(insn_string) \
412 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
413
414/* Simple instruction patching code. */
415#define DEF_NATIVE(ops, name, code) \
416 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
417 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
418
419unsigned paravirt_patch_nop(void);
420unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
421unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
422unsigned paravirt_patch_ignore(unsigned len);
423unsigned paravirt_patch_call(void *insnbuf,
424 const void *target, u16 tgt_clobbers,
425 unsigned long addr, u16 site_clobbers,
426 unsigned len);
427unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
428 unsigned long addr, unsigned len);
429unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
430 unsigned long addr, unsigned len);
431
432unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
433 const char *start, const char *end);
434
435unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
436 unsigned long addr, unsigned len);
437
438int paravirt_disable_iospace(void);
439
440/*
441 * This generates an indirect call based on the operation type number.
442 * The type number, computed in PARAVIRT_PATCH, is derived from the
443 * offset into the paravirt_patch_template structure, and can therefore be
444 * freely converted back into a structure offset.
445 */
446#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
447
448/*
449 * These macros are intended to wrap calls through one of the paravirt
450 * ops structs, so that they can be later identified and patched at
451 * runtime.
452 *
453 * Normally, a call to a pv_op function is a simple indirect call:
454 * (pv_op_struct.operations)(args...).
455 *
456 * Unfortunately, this is a relatively slow operation for modern CPUs,
457 * because it cannot necessarily determine what the destination
458 * address is. In this case, the address is a runtime constant, so at
459 * the very least we can patch the call to e a simple direct call, or
460 * ideally, patch an inline implementation into the callsite. (Direct
461 * calls are essentially free, because the call and return addresses
462 * are completely predictable.)
463 *
464 * For i386, these macros rely on the standard gcc "regparm(3)" calling
465 * convention, in which the first three arguments are placed in %eax,
466 * %edx, %ecx (in that order), and the remaining arguments are placed
467 * on the stack. All caller-save registers (eax,edx,ecx) are expected
468 * to be modified (either clobbered or used for return values).
469 * X86_64, on the other hand, already specifies a register-based calling
470 * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
471 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
472 * special handling for dealing with 4 arguments, unlike i386.
473 * However, x86_64 also have to clobber all caller saved registers, which
474 * unfortunately, are quite a bit (r8 - r11)
475 *
476 * The call instruction itself is marked by placing its start address
477 * and size into the .parainstructions section, so that
478 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
479 * appropriate patching under the control of the backend pv_init_ops
480 * implementation.
481 *
482 * Unfortunately there's no way to get gcc to generate the args setup
483 * for the call, and then allow the call itself to be generated by an
484 * inline asm. Because of this, we must do the complete arg setup and
485 * return value handling from within these macros. This is fairly
486 * cumbersome.
487 *
488 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
489 * It could be extended to more arguments, but there would be little
490 * to be gained from that. For each number of arguments, there are
491 * the two VCALL and CALL variants for void and non-void functions.
492 *
493 * When there is a return value, the invoker of the macro must specify
494 * the return type. The macro then uses sizeof() on that type to
495 * determine whether its a 32 or 64 bit value, and places the return
496 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
497 * 64-bit). For x86_64 machines, it just returns at %rax regardless of
498 * the return value size.
499 *
500 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
501 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
502 * in low,high order
503 *
504 * Small structures are passed and returned in registers. The macro
505 * calling convention can't directly deal with this, so the wrapper
506 * functions must do this.
507 *
508 * These PVOP_* macros are only defined within this header. This
509 * means that all uses must be wrapped in inline functions. This also
510 * makes sure the incoming and outgoing types are always correct.
511 */
512#ifdef CONFIG_X86_32
513#define PVOP_VCALL_ARGS \
514 unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
515#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
516
517#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
518#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
519#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
520
521#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
522 "=c" (__ecx)
523#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
524
525#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
526#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
527
528#define EXTRA_CLOBBERS
529#define VEXTRA_CLOBBERS
530#else /* CONFIG_X86_64 */
531#define PVOP_VCALL_ARGS \
532 unsigned long __edi = __edi, __esi = __esi, \
533 __edx = __edx, __ecx = __ecx
534#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
535
536#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
537#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
538#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
539#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
540
541#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
542 "=S" (__esi), "=d" (__edx), \
543 "=c" (__ecx)
544#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
545
546#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
547#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
548
549#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
550#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
551#endif /* CONFIG_X86_32 */
552
553#ifdef CONFIG_PARAVIRT_DEBUG
554#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
555#else
556#define PVOP_TEST_NULL(op) ((void)op)
557#endif
558
559#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
560 pre, post, ...) \
561 ({ \
562 rettype __ret; \
563 PVOP_CALL_ARGS; \
564 PVOP_TEST_NULL(op); \
565 /* This is 32-bit specific, but is okay in 64-bit */ \
566 /* since this condition will never hold */ \
567 if (sizeof(rettype) > sizeof(unsigned long)) { \
568 asm volatile(pre \
569 paravirt_alt(PARAVIRT_CALL) \
570 post \
571 : call_clbr \
572 : paravirt_type(op), \
573 paravirt_clobber(clbr), \
574 ##__VA_ARGS__ \
575 : "memory", "cc" extra_clbr); \
576 __ret = (rettype)((((u64)__edx) << 32) | __eax); \
577 } else { \
578 asm volatile(pre \
579 paravirt_alt(PARAVIRT_CALL) \
580 post \
581 : call_clbr \
582 : paravirt_type(op), \
583 paravirt_clobber(clbr), \
584 ##__VA_ARGS__ \
585 : "memory", "cc" extra_clbr); \
586 __ret = (rettype)__eax; \
587 } \
588 __ret; \
589 })
590
591#define __PVOP_CALL(rettype, op, pre, post, ...) \
592 ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
593 EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
594
595#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
596 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
597 PVOP_CALLEE_CLOBBERS, , \
598 pre, post, ##__VA_ARGS__)
599
600
601#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
602 ({ \
603 PVOP_VCALL_ARGS; \
604 PVOP_TEST_NULL(op); \
605 asm volatile(pre \
606 paravirt_alt(PARAVIRT_CALL) \
607 post \
608 : call_clbr \
609 : paravirt_type(op), \
610 paravirt_clobber(clbr), \
611 ##__VA_ARGS__ \
612 : "memory", "cc" extra_clbr); \
613 })
614
615#define __PVOP_VCALL(op, pre, post, ...) \
616 ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
617 VEXTRA_CLOBBERS, \
618 pre, post, ##__VA_ARGS__)
619
620#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
621 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
622 PVOP_VCALLEE_CLOBBERS, , \
623 pre, post, ##__VA_ARGS__)
624
625
626
627#define PVOP_CALL0(rettype, op) \
628 __PVOP_CALL(rettype, op, "", "")
629#define PVOP_VCALL0(op) \
630 __PVOP_VCALL(op, "", "")
631
632#define PVOP_CALLEE0(rettype, op) \
633 __PVOP_CALLEESAVE(rettype, op, "", "")
634#define PVOP_VCALLEE0(op) \
635 __PVOP_VCALLEESAVE(op, "", "")
636
637
638#define PVOP_CALL1(rettype, op, arg1) \
639 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
640#define PVOP_VCALL1(op, arg1) \
641 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
642
643#define PVOP_CALLEE1(rettype, op, arg1) \
644 __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
645#define PVOP_VCALLEE1(op, arg1) \
646 __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
647
648
649#define PVOP_CALL2(rettype, op, arg1, arg2) \
650 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
651 PVOP_CALL_ARG2(arg2))
652#define PVOP_VCALL2(op, arg1, arg2) \
653 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
654 PVOP_CALL_ARG2(arg2))
655
656#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
657 __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
658 PVOP_CALL_ARG2(arg2))
659#define PVOP_VCALLEE2(op, arg1, arg2) \
660 __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
661 PVOP_CALL_ARG2(arg2))
662
663
664#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
665 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
666 PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
667#define PVOP_VCALL3(op, arg1, arg2, arg3) \
668 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
669 PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
670
671/* This is the only difference in x86_64. We can make it much simpler */
672#ifdef CONFIG_X86_32
673#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
674 __PVOP_CALL(rettype, op, \
675 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
676 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
677 PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
678#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
679 __PVOP_VCALL(op, \
680 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
681 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
682 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
683#else
684#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
685 __PVOP_CALL(rettype, op, "", "", \
686 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
687 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
688#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
689 __PVOP_VCALL(op, "", "", \
690 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
691 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
692#endif
693 15
694static inline int paravirt_enabled(void) 16static inline int paravirt_enabled(void)
695{ 17{
@@ -820,15 +142,22 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)
820{ 142{
821 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 143 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
822} 144}
823static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) 145
146static inline int paravirt_rdmsr_regs(u32 *regs)
824{ 147{
825 return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); 148 return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
826} 149}
150
827static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 151static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
828{ 152{
829 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 153 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
830} 154}
831 155
156static inline int paravirt_wrmsr_regs(u32 *regs)
157{
158 return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
159}
160
832/* These should all do BUG_ON(_err), but our headers are too tangled. */ 161/* These should all do BUG_ON(_err), but our headers are too tangled. */
833#define rdmsr(msr, val1, val2) \ 162#define rdmsr(msr, val1, val2) \
834do { \ 163do { \
@@ -862,6 +191,9 @@ do { \
862 _err; \ 191 _err; \
863}) 192})
864 193
194#define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs)
195#define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs)
196
865static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) 197static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
866{ 198{
867 int err; 199 int err;
@@ -871,12 +203,31 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
871} 203}
872static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) 204static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
873{ 205{
206 u32 gprs[8] = { 0 };
874 int err; 207 int err;
875 208
876 *p = paravirt_read_msr_amd(msr, &err); 209 gprs[1] = msr;
210 gprs[7] = 0x9c5a203a;
211
212 err = paravirt_rdmsr_regs(gprs);
213
214 *p = gprs[0] | ((u64)gprs[2] << 32);
215
877 return err; 216 return err;
878} 217}
879 218
219static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
220{
221 u32 gprs[8] = { 0 };
222
223 gprs[0] = (u32)val;
224 gprs[1] = msr;
225 gprs[2] = val >> 32;
226 gprs[7] = 0x9c5a203a;
227
228 return paravirt_wrmsr_regs(gprs);
229}
230
880static inline u64 paravirt_read_tsc(void) 231static inline u64 paravirt_read_tsc(void)
881{ 232{
882 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); 233 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
@@ -1393,20 +744,6 @@ static inline void pmd_clear(pmd_t *pmdp)
1393} 744}
1394#endif /* CONFIG_X86_PAE */ 745#endif /* CONFIG_X86_PAE */
1395 746
1396/* Lazy mode for batching updates / context switch */
1397enum paravirt_lazy_mode {
1398 PARAVIRT_LAZY_NONE,
1399 PARAVIRT_LAZY_MMU,
1400 PARAVIRT_LAZY_CPU,
1401};
1402
1403enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1404void paravirt_start_context_switch(struct task_struct *prev);
1405void paravirt_end_context_switch(struct task_struct *next);
1406
1407void paravirt_enter_lazy_mmu(void);
1408void paravirt_leave_lazy_mmu(void);
1409
1410#define __HAVE_ARCH_START_CONTEXT_SWITCH 747#define __HAVE_ARCH_START_CONTEXT_SWITCH
1411static inline void arch_start_context_switch(struct task_struct *prev) 748static inline void arch_start_context_switch(struct task_struct *prev)
1412{ 749{
@@ -1437,12 +774,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1437 pv_mmu_ops.set_fixmap(idx, phys, flags); 774 pv_mmu_ops.set_fixmap(idx, phys, flags);
1438} 775}
1439 776
1440void _paravirt_nop(void);
1441u32 _paravirt_ident_32(u32);
1442u64 _paravirt_ident_64(u64);
1443
1444#define paravirt_nop ((void *)_paravirt_nop)
1445
1446#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 777#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
1447 778
1448static inline int __raw_spin_is_locked(struct raw_spinlock *lock) 779static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
@@ -1479,17 +810,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
1479 810
1480#endif 811#endif
1481 812
1482/* These all sit in the .parainstructions section to tell us what to patch. */
1483struct paravirt_patch_site {
1484 u8 *instr; /* original instructions */
1485 u8 instrtype; /* type of this instruction */
1486 u8 len; /* length of original instruction */
1487 u16 clobbers; /* what registers you may clobber */
1488};
1489
1490extern struct paravirt_patch_site __parainstructions[],
1491 __parainstructions_end[];
1492
1493#ifdef CONFIG_X86_32 813#ifdef CONFIG_X86_32
1494#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" 814#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
1495#define PV_RESTORE_REGS "popl %edx; popl %ecx;" 815#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
new file mode 100644
index 000000000000..25402d0006e7
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -0,0 +1,721 @@
1#ifndef _ASM_X86_PARAVIRT_TYPES_H
2#define _ASM_X86_PARAVIRT_TYPES_H
3
4/* Bitmask of what can be clobbered: usually at least eax. */
5#define CLBR_NONE 0
6#define CLBR_EAX (1 << 0)
7#define CLBR_ECX (1 << 1)
8#define CLBR_EDX (1 << 2)
9#define CLBR_EDI (1 << 3)
10
11#ifdef CONFIG_X86_32
12/* CLBR_ANY should match all regs platform has. For i386, that's just it */
13#define CLBR_ANY ((1 << 4) - 1)
14
15#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
16#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
17#define CLBR_SCRATCH (0)
18#else
19#define CLBR_RAX CLBR_EAX
20#define CLBR_RCX CLBR_ECX
21#define CLBR_RDX CLBR_EDX
22#define CLBR_RDI CLBR_EDI
23#define CLBR_RSI (1 << 4)
24#define CLBR_R8 (1 << 5)
25#define CLBR_R9 (1 << 6)
26#define CLBR_R10 (1 << 7)
27#define CLBR_R11 (1 << 8)
28
29#define CLBR_ANY ((1 << 9) - 1)
30
31#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
32 CLBR_RCX | CLBR_R8 | CLBR_R9)
33#define CLBR_RET_REG (CLBR_RAX)
34#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
35
36#endif /* X86_64 */
37
38#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
39
40#ifndef __ASSEMBLY__
41
42#include <asm/desc_defs.h>
43#include <asm/kmap_types.h>
44
45struct page;
46struct thread_struct;
47struct desc_ptr;
48struct tss_struct;
49struct mm_struct;
50struct desc_struct;
51struct task_struct;
52struct cpumask;
53
54/*
55 * Wrapper type for pointers to code which uses the non-standard
56 * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
57 */
58struct paravirt_callee_save {
59 void *func;
60};
61
62/* general info */
63struct pv_info {
64 unsigned int kernel_rpl;
65 int shared_kernel_pmd;
66 int paravirt_enabled;
67 const char *name;
68};
69
70struct pv_init_ops {
71 /*
72 * Patch may replace one of the defined code sequences with
73 * arbitrary code, subject to the same register constraints.
74 * This generally means the code is not free to clobber any
75 * registers other than EAX. The patch function should return
76 * the number of bytes of code generated, as we nop pad the
77 * rest in generic code.
78 */
79 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
80 unsigned long addr, unsigned len);
81
82 /* Basic arch-specific setup */
83 void (*arch_setup)(void);
84 char *(*memory_setup)(void);
85 void (*post_allocator_init)(void);
86
87 /* Print a banner to identify the environment */
88 void (*banner)(void);
89};
90
91
92struct pv_lazy_ops {
93 /* Set deferred update mode, used for batching operations. */
94 void (*enter)(void);
95 void (*leave)(void);
96};
97
98struct pv_time_ops {
99 void (*time_init)(void);
100
101 /* Set and set time of day */
102 unsigned long (*get_wallclock)(void);
103 int (*set_wallclock)(unsigned long);
104
105 unsigned long long (*sched_clock)(void);
106 unsigned long (*get_tsc_khz)(void);
107};
108
109struct pv_cpu_ops {
110 /* hooks for various privileged instructions */
111 unsigned long (*get_debugreg)(int regno);
112 void (*set_debugreg)(int regno, unsigned long value);
113
114 void (*clts)(void);
115
116 unsigned long (*read_cr0)(void);
117 void (*write_cr0)(unsigned long);
118
119 unsigned long (*read_cr4_safe)(void);
120 unsigned long (*read_cr4)(void);
121 void (*write_cr4)(unsigned long);
122
123#ifdef CONFIG_X86_64
124 unsigned long (*read_cr8)(void);
125 void (*write_cr8)(unsigned long);
126#endif
127
128 /* Segment descriptor handling */
129 void (*load_tr_desc)(void);
130 void (*load_gdt)(const struct desc_ptr *);
131 void (*load_idt)(const struct desc_ptr *);
132 void (*store_gdt)(struct desc_ptr *);
133 void (*store_idt)(struct desc_ptr *);
134 void (*set_ldt)(const void *desc, unsigned entries);
135 unsigned long (*store_tr)(void);
136 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
137#ifdef CONFIG_X86_64
138 void (*load_gs_index)(unsigned int idx);
139#endif
140 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
141 const void *desc);
142 void (*write_gdt_entry)(struct desc_struct *,
143 int entrynum, const void *desc, int size);
144 void (*write_idt_entry)(gate_desc *,
145 int entrynum, const gate_desc *gate);
146 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
147 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
148
149 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
150
151 void (*set_iopl_mask)(unsigned mask);
152
153 void (*wbinvd)(void);
154 void (*io_delay)(void);
155
156 /* cpuid emulation, mostly so that caps bits can be disabled */
157 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
158 unsigned int *ecx, unsigned int *edx);
159
160 /* MSR, PMC and TSR operations.
161 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
162 u64 (*read_msr)(unsigned int msr, int *err);
163 int (*rdmsr_regs)(u32 *regs);
164 int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
165 int (*wrmsr_regs)(u32 *regs);
166
167 u64 (*read_tsc)(void);
168 u64 (*read_pmc)(int counter);
169 unsigned long long (*read_tscp)(unsigned int *aux);
170
171 /*
172 * Atomically enable interrupts and return to userspace. This
173 * is only ever used to return to 32-bit processes; in a
174 * 64-bit kernel, it's used for 32-on-64 compat processes, but
175 * never native 64-bit processes. (Jump, not call.)
176 */
177 void (*irq_enable_sysexit)(void);
178
179 /*
180 * Switch to usermode gs and return to 64-bit usermode using
181 * sysret. Only used in 64-bit kernels to return to 64-bit
182 * processes. Usermode register state, including %rsp, must
183 * already be restored.
184 */
185 void (*usergs_sysret64)(void);
186
187 /*
188 * Switch to usermode gs and return to 32-bit usermode using
189 * sysret. Used to return to 32-on-64 compat processes.
190 * Other usermode register state, including %esp, must already
191 * be restored.
192 */
193 void (*usergs_sysret32)(void);
194
195 /* Normal iret. Jump to this with the standard iret stack
196 frame set up. */
197 void (*iret)(void);
198
199 void (*swapgs)(void);
200
201 void (*start_context_switch)(struct task_struct *prev);
202 void (*end_context_switch)(struct task_struct *next);
203};
204
205struct pv_irq_ops {
206 void (*init_IRQ)(void);
207
208 /*
209 * Get/set interrupt state. save_fl and restore_fl are only
210 * expected to use X86_EFLAGS_IF; all other bits
211 * returned from save_fl are undefined, and may be ignored by
212 * restore_fl.
213 *
214 * NOTE: These functions callers expect the callee to preserve
215 * more registers than the standard C calling convention.
216 */
217 struct paravirt_callee_save save_fl;
218 struct paravirt_callee_save restore_fl;
219 struct paravirt_callee_save irq_disable;
220 struct paravirt_callee_save irq_enable;
221
222 void (*safe_halt)(void);
223 void (*halt)(void);
224
225#ifdef CONFIG_X86_64
226 void (*adjust_exception_frame)(void);
227#endif
228};
229
230struct pv_apic_ops {
231#ifdef CONFIG_X86_LOCAL_APIC
232 void (*setup_boot_clock)(void);
233 void (*setup_secondary_clock)(void);
234
235 void (*startup_ipi_hook)(int phys_apicid,
236 unsigned long start_eip,
237 unsigned long start_esp);
238#endif
239};
240
241struct pv_mmu_ops {
242 /*
243 * Called before/after init_mm pagetable setup. setup_start
244 * may reset %cr3, and may pre-install parts of the pagetable;
245 * pagetable setup is expected to preserve any existing
246 * mapping.
247 */
248 void (*pagetable_setup_start)(pgd_t *pgd_base);
249 void (*pagetable_setup_done)(pgd_t *pgd_base);
250
251 unsigned long (*read_cr2)(void);
252 void (*write_cr2)(unsigned long);
253
254 unsigned long (*read_cr3)(void);
255 void (*write_cr3)(unsigned long);
256
257 /*
258 * Hooks for intercepting the creation/use/destruction of an
259 * mm_struct.
260 */
261 void (*activate_mm)(struct mm_struct *prev,
262 struct mm_struct *next);
263 void (*dup_mmap)(struct mm_struct *oldmm,
264 struct mm_struct *mm);
265 void (*exit_mmap)(struct mm_struct *mm);
266
267
268 /* TLB operations */
269 void (*flush_tlb_user)(void);
270 void (*flush_tlb_kernel)(void);
271 void (*flush_tlb_single)(unsigned long addr);
272 void (*flush_tlb_others)(const struct cpumask *cpus,
273 struct mm_struct *mm,
274 unsigned long va);
275
276 /* Hooks for allocating and freeing a pagetable top-level */
277 int (*pgd_alloc)(struct mm_struct *mm);
278 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
279
280 /*
281 * Hooks for allocating/releasing pagetable pages when they're
282 * attached to a pagetable
283 */
284 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
285 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
286 void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
287 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
288 void (*release_pte)(unsigned long pfn);
289 void (*release_pmd)(unsigned long pfn);
290 void (*release_pud)(unsigned long pfn);
291
292 /* Pagetable manipulation functions */
293 void (*set_pte)(pte_t *ptep, pte_t pteval);
294 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
295 pte_t *ptep, pte_t pteval);
296 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
297 void (*pte_update)(struct mm_struct *mm, unsigned long addr,
298 pte_t *ptep);
299 void (*pte_update_defer)(struct mm_struct *mm,
300 unsigned long addr, pte_t *ptep);
301
302 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
303 pte_t *ptep);
304 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
305 pte_t *ptep, pte_t pte);
306
307 struct paravirt_callee_save pte_val;
308 struct paravirt_callee_save make_pte;
309
310 struct paravirt_callee_save pgd_val;
311 struct paravirt_callee_save make_pgd;
312
313#if PAGETABLE_LEVELS >= 3
314#ifdef CONFIG_X86_PAE
315 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
316 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
317 pte_t *ptep);
318 void (*pmd_clear)(pmd_t *pmdp);
319
320#endif /* CONFIG_X86_PAE */
321
322 void (*set_pud)(pud_t *pudp, pud_t pudval);
323
324 struct paravirt_callee_save pmd_val;
325 struct paravirt_callee_save make_pmd;
326
327#if PAGETABLE_LEVELS == 4
328 struct paravirt_callee_save pud_val;
329 struct paravirt_callee_save make_pud;
330
331 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
332#endif /* PAGETABLE_LEVELS == 4 */
333#endif /* PAGETABLE_LEVELS >= 3 */
334
335#ifdef CONFIG_HIGHPTE
336 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
337#endif
338
339 struct pv_lazy_ops lazy_mode;
340
341 /* dom0 ops */
342
343 /* Sometimes the physical address is a pfn, and sometimes its
344 an mfn. We can tell which is which from the index. */
345 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
346 phys_addr_t phys, pgprot_t flags);
347};
348
349struct raw_spinlock;
350struct pv_lock_ops {
351 int (*spin_is_locked)(struct raw_spinlock *lock);
352 int (*spin_is_contended)(struct raw_spinlock *lock);
353 void (*spin_lock)(struct raw_spinlock *lock);
354 void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
355 int (*spin_trylock)(struct raw_spinlock *lock);
356 void (*spin_unlock)(struct raw_spinlock *lock);
357};
358
359/* This contains all the paravirt structures: we get a convenient
360 * number for each function using the offset which we use to indicate
361 * what to patch. */
362struct paravirt_patch_template {
363 struct pv_init_ops pv_init_ops;
364 struct pv_time_ops pv_time_ops;
365 struct pv_cpu_ops pv_cpu_ops;
366 struct pv_irq_ops pv_irq_ops;
367 struct pv_apic_ops pv_apic_ops;
368 struct pv_mmu_ops pv_mmu_ops;
369 struct pv_lock_ops pv_lock_ops;
370};
371
372extern struct pv_info pv_info;
373extern struct pv_init_ops pv_init_ops;
374extern struct pv_time_ops pv_time_ops;
375extern struct pv_cpu_ops pv_cpu_ops;
376extern struct pv_irq_ops pv_irq_ops;
377extern struct pv_apic_ops pv_apic_ops;
378extern struct pv_mmu_ops pv_mmu_ops;
379extern struct pv_lock_ops pv_lock_ops;
380
381#define PARAVIRT_PATCH(x) \
382 (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
383
384#define paravirt_type(op) \
385 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
386 [paravirt_opptr] "i" (&(op))
387#define paravirt_clobber(clobber) \
388 [paravirt_clobber] "i" (clobber)
389
390/*
391 * Generate some code, and mark it as patchable by the
392 * apply_paravirt() alternate instruction patcher.
393 */
394#define _paravirt_alt(insn_string, type, clobber) \
395 "771:\n\t" insn_string "\n" "772:\n" \
396 ".pushsection .parainstructions,\"a\"\n" \
397 _ASM_ALIGN "\n" \
398 _ASM_PTR " 771b\n" \
399 " .byte " type "\n" \
400 " .byte 772b-771b\n" \
401 " .short " clobber "\n" \
402 ".popsection\n"
403
404/* Generate patchable code, with the default asm parameters. */
405#define paravirt_alt(insn_string) \
406 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
407
408/* Simple instruction patching code. */
409#define DEF_NATIVE(ops, name, code) \
410 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
411 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
412
413unsigned paravirt_patch_nop(void);
414unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
415unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
416unsigned paravirt_patch_ignore(unsigned len);
417unsigned paravirt_patch_call(void *insnbuf,
418 const void *target, u16 tgt_clobbers,
419 unsigned long addr, u16 site_clobbers,
420 unsigned len);
421unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
422 unsigned long addr, unsigned len);
423unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
424 unsigned long addr, unsigned len);
425
426unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
427 const char *start, const char *end);
428
429unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
430 unsigned long addr, unsigned len);
431
432int paravirt_disable_iospace(void);
433
434/*
435 * This generates an indirect call based on the operation type number.
436 * The type number, computed in PARAVIRT_PATCH, is derived from the
437 * offset into the paravirt_patch_template structure, and can therefore be
438 * freely converted back into a structure offset.
439 */
440#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
441
442/*
443 * These macros are intended to wrap calls through one of the paravirt
444 * ops structs, so that they can be later identified and patched at
445 * runtime.
446 *
447 * Normally, a call to a pv_op function is a simple indirect call:
448 * (pv_op_struct.operations)(args...).
449 *
450 * Unfortunately, this is a relatively slow operation for modern CPUs,
451 * because it cannot necessarily determine what the destination
452 * address is. In this case, the address is a runtime constant, so at
453 * the very least we can patch the call to e a simple direct call, or
454 * ideally, patch an inline implementation into the callsite. (Direct
455 * calls are essentially free, because the call and return addresses
456 * are completely predictable.)
457 *
458 * For i386, these macros rely on the standard gcc "regparm(3)" calling
459 * convention, in which the first three arguments are placed in %eax,
460 * %edx, %ecx (in that order), and the remaining arguments are placed
461 * on the stack. All caller-save registers (eax,edx,ecx) are expected
462 * to be modified (either clobbered or used for return values).
463 * X86_64, on the other hand, already specifies a register-based calling
464 * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
465 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
466 * special handling for dealing with 4 arguments, unlike i386.
467 * However, x86_64 also have to clobber all caller saved registers, which
468 * unfortunately, are quite a bit (r8 - r11)
469 *
470 * The call instruction itself is marked by placing its start address
471 * and size into the .parainstructions section, so that
472 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
473 * appropriate patching under the control of the backend pv_init_ops
474 * implementation.
475 *
476 * Unfortunately there's no way to get gcc to generate the args setup
477 * for the call, and then allow the call itself to be generated by an
478 * inline asm. Because of this, we must do the complete arg setup and
479 * return value handling from within these macros. This is fairly
480 * cumbersome.
481 *
482 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
483 * It could be extended to more arguments, but there would be little
484 * to be gained from that. For each number of arguments, there are
485 * the two VCALL and CALL variants for void and non-void functions.
486 *
487 * When there is a return value, the invoker of the macro must specify
488 * the return type. The macro then uses sizeof() on that type to
489 * determine whether its a 32 or 64 bit value, and places the return
490 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
491 * 64-bit). For x86_64 machines, it just returns at %rax regardless of
492 * the return value size.
493 *
494 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
495 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
496 * in low,high order
497 *
498 * Small structures are passed and returned in registers. The macro
499 * calling convention can't directly deal with this, so the wrapper
500 * functions must do this.
501 *
502 * These PVOP_* macros are only defined within this header. This
503 * means that all uses must be wrapped in inline functions. This also
504 * makes sure the incoming and outgoing types are always correct.
505 */
506#ifdef CONFIG_X86_32
507#define PVOP_VCALL_ARGS \
508 unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
509#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
510
511#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
512#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
513#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
514
515#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
516 "=c" (__ecx)
517#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
518
519#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
520#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
521
522#define EXTRA_CLOBBERS
523#define VEXTRA_CLOBBERS
524#else /* CONFIG_X86_64 */
525#define PVOP_VCALL_ARGS \
526 unsigned long __edi = __edi, __esi = __esi, \
527 __edx = __edx, __ecx = __ecx
528#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
529
530#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
531#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
532#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
533#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
534
535#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
536 "=S" (__esi), "=d" (__edx), \
537 "=c" (__ecx)
538#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
539
540#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
541#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
542
543#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
544#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
545#endif /* CONFIG_X86_32 */
546
547#ifdef CONFIG_PARAVIRT_DEBUG
548#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
549#else
550#define PVOP_TEST_NULL(op) ((void)op)
551#endif
552
553#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
554 pre, post, ...) \
555 ({ \
556 rettype __ret; \
557 PVOP_CALL_ARGS; \
558 PVOP_TEST_NULL(op); \
559 /* This is 32-bit specific, but is okay in 64-bit */ \
560 /* since this condition will never hold */ \
561 if (sizeof(rettype) > sizeof(unsigned long)) { \
562 asm volatile(pre \
563 paravirt_alt(PARAVIRT_CALL) \
564 post \
565 : call_clbr \
566 : paravirt_type(op), \
567 paravirt_clobber(clbr), \
568 ##__VA_ARGS__ \
569 : "memory", "cc" extra_clbr); \
570 __ret = (rettype)((((u64)__edx) << 32) | __eax); \
571 } else { \
572 asm volatile(pre \
573 paravirt_alt(PARAVIRT_CALL) \
574 post \
575 : call_clbr \
576 : paravirt_type(op), \
577 paravirt_clobber(clbr), \
578 ##__VA_ARGS__ \
579 : "memory", "cc" extra_clbr); \
580 __ret = (rettype)__eax; \
581 } \
582 __ret; \
583 })
584
585#define __PVOP_CALL(rettype, op, pre, post, ...) \
586 ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
587 EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
588
589#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
590 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
591 PVOP_CALLEE_CLOBBERS, , \
592 pre, post, ##__VA_ARGS__)
593
594
595#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
596 ({ \
597 PVOP_VCALL_ARGS; \
598 PVOP_TEST_NULL(op); \
599 asm volatile(pre \
600 paravirt_alt(PARAVIRT_CALL) \
601 post \
602 : call_clbr \
603 : paravirt_type(op), \
604 paravirt_clobber(clbr), \
605 ##__VA_ARGS__ \
606 : "memory", "cc" extra_clbr); \
607 })
608
609#define __PVOP_VCALL(op, pre, post, ...) \
610 ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
611 VEXTRA_CLOBBERS, \
612 pre, post, ##__VA_ARGS__)
613
614#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
615 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
616 PVOP_VCALLEE_CLOBBERS, , \
617 pre, post, ##__VA_ARGS__)
618
619
620
621#define PVOP_CALL0(rettype, op) \
622 __PVOP_CALL(rettype, op, "", "")
623#define PVOP_VCALL0(op) \
624 __PVOP_VCALL(op, "", "")
625
626#define PVOP_CALLEE0(rettype, op) \
627 __PVOP_CALLEESAVE(rettype, op, "", "")
628#define PVOP_VCALLEE0(op) \
629 __PVOP_VCALLEESAVE(op, "", "")
630
631
632#define PVOP_CALL1(rettype, op, arg1) \
633 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
634#define PVOP_VCALL1(op, arg1) \
635 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
636
637#define PVOP_CALLEE1(rettype, op, arg1) \
638 __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
639#define PVOP_VCALLEE1(op, arg1) \
640 __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
641
642
643#define PVOP_CALL2(rettype, op, arg1, arg2) \
644 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
645 PVOP_CALL_ARG2(arg2))
646#define PVOP_VCALL2(op, arg1, arg2) \
647 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
648 PVOP_CALL_ARG2(arg2))
649
650#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
651 __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
652 PVOP_CALL_ARG2(arg2))
653#define PVOP_VCALLEE2(op, arg1, arg2) \
654 __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
655 PVOP_CALL_ARG2(arg2))
656
657
658#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
659 __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
660 PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
661#define PVOP_VCALL3(op, arg1, arg2, arg3) \
662 __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
663 PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
664
665/* This is the only difference in x86_64. We can make it much simpler */
666#ifdef CONFIG_X86_32
667#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
668 __PVOP_CALL(rettype, op, \
669 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
670 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
671 PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
672#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
673 __PVOP_VCALL(op, \
674 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
675 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
676 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
677#else
678#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
679 __PVOP_CALL(rettype, op, "", "", \
680 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
681 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
682#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
683 __PVOP_VCALL(op, "", "", \
684 PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
685 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
686#endif
687
688/* Lazy mode for batching updates / context switch */
689enum paravirt_lazy_mode {
690 PARAVIRT_LAZY_NONE,
691 PARAVIRT_LAZY_MMU,
692 PARAVIRT_LAZY_CPU,
693};
694
695enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
696void paravirt_start_context_switch(struct task_struct *prev);
697void paravirt_end_context_switch(struct task_struct *next);
698
699void paravirt_enter_lazy_mmu(void);
700void paravirt_leave_lazy_mmu(void);
701
702void _paravirt_nop(void);
703u32 _paravirt_ident_32(u32);
704u64 _paravirt_ident_64(u64);
705
706#define paravirt_nop ((void *)_paravirt_nop)
707
708/* These all sit in the .parainstructions section to tell us what to patch. */
709struct paravirt_patch_site {
710 u8 *instr; /* original instructions */
711 u8 instrtype; /* type of this instruction */
712 u8 len; /* length of original instruction */
713 u16 clobbers; /* what registers you may clobber */
714};
715
716extern struct paravirt_patch_site __parainstructions[],
717 __parainstructions_end[];
718
719#endif /* __ASSEMBLY__ */
720
721#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1ddb0d85..04eacefcfd26 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -49,7 +49,7 @@
49#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 49#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
50#define __my_cpu_offset percpu_read(this_cpu_off) 50#define __my_cpu_offset percpu_read(this_cpu_off)
51#else 51#else
52#define __percpu_arg(x) "%" #x 52#define __percpu_arg(x) "%P" #x
53#endif 53#endif
54 54
55/* 55/*
@@ -104,36 +104,48 @@ do { \
104 } \ 104 } \
105} while (0) 105} while (0)
106 106
107#define percpu_from_op(op, var) \ 107#define percpu_from_op(op, var, constraint) \
108({ \ 108({ \
109 typeof(var) ret__; \ 109 typeof(var) ret__; \
110 switch (sizeof(var)) { \ 110 switch (sizeof(var)) { \
111 case 1: \ 111 case 1: \
112 asm(op "b "__percpu_arg(1)",%0" \ 112 asm(op "b "__percpu_arg(1)",%0" \
113 : "=q" (ret__) \ 113 : "=q" (ret__) \
114 : "m" (var)); \ 114 : constraint); \
115 break; \ 115 break; \
116 case 2: \ 116 case 2: \
117 asm(op "w "__percpu_arg(1)",%0" \ 117 asm(op "w "__percpu_arg(1)",%0" \
118 : "=r" (ret__) \ 118 : "=r" (ret__) \
119 : "m" (var)); \ 119 : constraint); \
120 break; \ 120 break; \
121 case 4: \ 121 case 4: \
122 asm(op "l "__percpu_arg(1)",%0" \ 122 asm(op "l "__percpu_arg(1)",%0" \
123 : "=r" (ret__) \ 123 : "=r" (ret__) \
124 : "m" (var)); \ 124 : constraint); \
125 break; \ 125 break; \
126 case 8: \ 126 case 8: \
127 asm(op "q "__percpu_arg(1)",%0" \ 127 asm(op "q "__percpu_arg(1)",%0" \
128 : "=r" (ret__) \ 128 : "=r" (ret__) \
129 : "m" (var)); \ 129 : constraint); \
130 break; \ 130 break; \
131 default: __bad_percpu_size(); \ 131 default: __bad_percpu_size(); \
132 } \ 132 } \
133 ret__; \ 133 ret__; \
134}) 134})
135 135
136#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) 136/*
137 * percpu_read() makes gcc load the percpu variable every time it is
138 * accessed while percpu_read_stable() allows the value to be cached.
139 * percpu_read_stable() is more efficient and can be used if its value
140 * is guaranteed to be valid across cpus. The current users include
141 * get_current() and get_thread_info() both of which are actually
142 * per-thread variables implemented as per-cpu variables and thus
143 * stable for the duration of the respective task.
144 */
145#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
146 "m" (per_cpu__##var))
147#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
148 "p" (&per_cpu__##var))
137#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) 149#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
138#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) 150#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
139#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) 151#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 16748077559a..4c5b51fdc788 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -135,6 +135,11 @@ static inline unsigned long pte_pfn(pte_t pte)
135 return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; 135 return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
136} 136}
137 137
138static inline unsigned long pmd_pfn(pmd_t pmd)
139{
140 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
141}
142
138#define pte_page(pte) pfn_to_page(pte_pfn(pte)) 143#define pte_page(pte) pfn_to_page(pte_pfn(pte))
139 144
140static inline int pmd_large(pmd_t pte) 145static inline int pmd_large(pmd_t pte)
@@ -359,7 +364,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
359 * this macro returns the index of the entry in the pmd page which would 364 * this macro returns the index of the entry in the pmd page which would
360 * control the given virtual address 365 * control the given virtual address
361 */ 366 */
362static inline unsigned pmd_index(unsigned long address) 367static inline unsigned long pmd_index(unsigned long address)
363{ 368{
364 return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); 369 return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
365} 370}
@@ -379,7 +384,7 @@ static inline unsigned pmd_index(unsigned long address)
379 * this function returns the index of the entry in the pte page which would 384 * this function returns the index of the entry in the pte page which would
380 * control the given virtual address 385 * control the given virtual address
381 */ 386 */
382static inline unsigned pte_index(unsigned long address) 387static inline unsigned long pte_index(unsigned long address)
383{ 388{
384 return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 389 return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
385} 390}
@@ -430,11 +435,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
430 return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); 435 return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
431} 436}
432 437
433static inline unsigned long pmd_pfn(pmd_t pmd)
434{
435 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
436}
437
438static inline int pud_large(pud_t pud) 438static inline int pud_large(pud_t pud)
439{ 439{
440 return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == 440 return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -470,7 +470,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
470#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) 470#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
471 471
472/* to find an entry in a page-table-directory. */ 472/* to find an entry in a page-table-directory. */
473static inline unsigned pud_index(unsigned long address) 473static inline unsigned long pud_index(unsigned long address)
474{ 474{
475 return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); 475 return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
476} 476}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1cf..e08ea043e085 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,17 @@ extern unsigned long kernel_eflags;
403extern asmlinkage void ignore_sysret(void); 403extern asmlinkage void ignore_sysret(void);
404#else /* X86_64 */ 404#else /* X86_64 */
405#ifdef CONFIG_CC_STACKPROTECTOR 405#ifdef CONFIG_CC_STACKPROTECTOR
406DECLARE_PER_CPU(unsigned long, stack_canary); 406/*
407 * Make sure stack canary segment base is cached-aligned:
408 * "For Intel Atom processors, avoid non zero segment base address
409 * that is not aligned to cache line boundary at all cost."
410 * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
411 */
412struct stack_canary {
413 char __pad[20]; /* canary at %gs:20 */
414 unsigned long canary;
415};
416DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
407#endif 417#endif
408#endif /* X86_64 */ 418#endif /* X86_64 */
409 419
@@ -703,13 +713,23 @@ static inline void cpu_relax(void)
703 rep_nop(); 713 rep_nop();
704} 714}
705 715
706/* Stop speculative execution: */ 716/* Stop speculative execution and prefetching of modified code. */
707static inline void sync_core(void) 717static inline void sync_core(void)
708{ 718{
709 int tmp; 719 int tmp;
710 720
711 asm volatile("cpuid" : "=a" (tmp) : "0" (1) 721#if defined(CONFIG_M386) || defined(CONFIG_M486)
712 : "ebx", "ecx", "edx", "memory"); 722 if (boot_cpu_data.x86 < 5)
723 /* There is no speculative execution.
724 * jmp is a barrier to prefetching. */
725 asm volatile("jmp 1f\n1:\n" ::: "memory");
726 else
727#endif
728 /* cpuid is a barrier to speculative execution.
729 * Prefetched instructions are automatically
730 * invalidated when modified. */
731 asm volatile("cpuid" : "=a" (tmp) : "0" (1)
732 : "ebx", "ecx", "edx", "memory");
713} 733}
714 734
715static inline void __monitor(const void *eax, unsigned long ecx, 735static inline void __monitor(const void *eax, unsigned long ecx,
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
index 263d397d2eef..75af592677ec 100644
--- a/arch/x86/include/asm/scatterlist.h
+++ b/arch/x86/include/asm/scatterlist.h
@@ -1,33 +1,8 @@
1#ifndef _ASM_X86_SCATTERLIST_H 1#ifndef _ASM_X86_SCATTERLIST_H
2#define _ASM_X86_SCATTERLIST_H 2#define _ASM_X86_SCATTERLIST_H
3 3
4#include <asm/types.h>
5
6struct scatterlist {
7#ifdef CONFIG_DEBUG_SG
8 unsigned long sg_magic;
9#endif
10 unsigned long page_link;
11 unsigned int offset;
12 unsigned int length;
13 dma_addr_t dma_address;
14 unsigned int dma_length;
15};
16
17#define ARCH_HAS_SG_CHAIN
18#define ISA_DMA_THRESHOLD (0x00ffffff) 4#define ISA_DMA_THRESHOLD (0x00ffffff)
19 5
20/* 6#include <asm-generic/scatterlist.h>
21 * These macros should be used after a pci_map_sg call has been done
22 * to get bus addresses of each of the SG entries and their lengths.
23 * You should only work with the number of sg entries pci_map_sg
24 * returns.
25 */
26#define sg_dma_address(sg) ((sg)->dma_address)
27#ifdef CONFIG_X86_32
28# define sg_dma_len(sg) ((sg)->length)
29#else
30# define sg_dma_len(sg) ((sg)->dma_length)
31#endif
32 7
33#endif /* _ASM_X86_SCATTERLIST_H */ 8#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h
index b51413b74971..83c05fc2de38 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/asm/shmbuf.h
@@ -1,51 +1 @@
1#ifndef _ASM_X86_SHMBUF_H #include <asm-generic/shmbuf.h>
2#define _ASM_X86_SHMBUF_H
3
4/*
5 * The shmid64_ds structure for x86 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space on 32 bit is left for:
10 * - 64-bit time_t to solve y2038 problem
11 * - 2 miscellaneous 32-bit values
12 *
13 * Pad space on 64 bit is left for:
14 * - 2 miscellaneous 64-bit values
15 */
16
17struct shmid64_ds {
18 struct ipc64_perm shm_perm; /* operation perms */
19 size_t shm_segsz; /* size of segment (bytes) */
20 __kernel_time_t shm_atime; /* last attach time */
21#ifdef __i386__
22 unsigned long __unused1;
23#endif
24 __kernel_time_t shm_dtime; /* last detach time */
25#ifdef __i386__
26 unsigned long __unused2;
27#endif
28 __kernel_time_t shm_ctime; /* last change time */
29#ifdef __i386__
30 unsigned long __unused3;
31#endif
32 __kernel_pid_t shm_cpid; /* pid of creator */
33 __kernel_pid_t shm_lpid; /* pid of last operator */
34 unsigned long shm_nattch; /* no. of current attaches */
35 unsigned long __unused4;
36 unsigned long __unused5;
37};
38
39struct shminfo64 {
40 unsigned long shmmax;
41 unsigned long shmmin;
42 unsigned long shmmni;
43 unsigned long shmseg;
44 unsigned long shmall;
45 unsigned long __unused1;
46 unsigned long __unused2;
47 unsigned long __unused3;
48 unsigned long __unused4;
49};
50
51#endif /* _ASM_X86_SHMBUF_H */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index ca8bf2cd0ba9..6b71384b9d8b 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -1,60 +1 @@
1#ifndef _ASM_X86_SOCKET_H #include <asm-generic/socket.h>
2#define _ASM_X86_SOCKET_H
3
4#include <asm/sockios.h>
5
6/* For setsockopt(2) */
7#define SOL_SOCKET 1
8
9#define SO_DEBUG 1
10#define SO_REUSEADDR 2
11#define SO_TYPE 3
12#define SO_ERROR 4
13#define SO_DONTROUTE 5
14#define SO_BROADCAST 6
15#define SO_SNDBUF 7
16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
19#define SO_KEEPALIVE 9
20#define SO_OOBINLINE 10
21#define SO_NO_CHECK 11
22#define SO_PRIORITY 12
23#define SO_LINGER 13
24#define SO_BSDCOMPAT 14
25/* To add :#define SO_REUSEPORT 15 */
26#define SO_PASSCRED 16
27#define SO_PEERCRED 17
28#define SO_RCVLOWAT 18
29#define SO_SNDLOWAT 19
30#define SO_RCVTIMEO 20
31#define SO_SNDTIMEO 21
32
33/* Security levels - as per NRL IPv6 - don't actually do anything */
34#define SO_SECURITY_AUTHENTICATION 22
35#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
36#define SO_SECURITY_ENCRYPTION_NETWORK 24
37
38#define SO_BINDTODEVICE 25
39
40/* Socket filtering */
41#define SO_ATTACH_FILTER 26
42#define SO_DETACH_FILTER 27
43
44#define SO_PEERNAME 28
45#define SO_TIMESTAMP 29
46#define SCM_TIMESTAMP SO_TIMESTAMP
47
48#define SO_ACCEPTCONN 30
49
50#define SO_PEERSEC 31
51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
54
55#define SO_MARK 36
56
57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59
60#endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h
index 49cc72b5d3c9..def6d4746ee7 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/asm/sockios.h
@@ -1,13 +1 @@
1#ifndef _ASM_X86_SOCKIOS_H #include <asm-generic/sockios.h>
2#define _ASM_X86_SOCKIOS_H
3
4/* Socket-level I/O control calls. */
5#define FIOSETOWN 0x8901
6#define SIOCSPGRP 0x8902
7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
12
13#endif /* _ASM_X86_SOCKIOS_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c2d742c6e15f..157517763565 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -48,7 +48,7 @@
48 * head_32 for boot CPU and setup_per_cpu_areas() for others. 48 * head_32 for boot CPU and setup_per_cpu_areas() for others.
49 */ 49 */
50#define GDT_STACK_CANARY_INIT \ 50#define GDT_STACK_CANARY_INIT \
51 [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, 51 [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
52 52
53/* 53/*
54 * Initialize the stackprotector canary value. 54 * Initialize the stackprotector canary value.
@@ -78,21 +78,19 @@ static __always_inline void boot_init_stack_canary(void)
78#ifdef CONFIG_X86_64 78#ifdef CONFIG_X86_64
79 percpu_write(irq_stack_union.stack_canary, canary); 79 percpu_write(irq_stack_union.stack_canary, canary);
80#else 80#else
81 percpu_write(stack_canary, canary); 81 percpu_write(stack_canary.canary, canary);
82#endif 82#endif
83} 83}
84 84
85static inline void setup_stack_canary_segment(int cpu) 85static inline void setup_stack_canary_segment(int cpu)
86{ 86{
87#ifdef CONFIG_X86_32 87#ifdef CONFIG_X86_32
88 unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; 88 unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); 89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
90 struct desc_struct desc; 90 struct desc_struct desc;
91 91
92 desc = gdt_table[GDT_ENTRY_STACK_CANARY]; 92 desc = gdt_table[GDT_ENTRY_STACK_CANARY];
93 desc.base0 = canary & 0xffff; 93 set_desc_base(&desc, canary);
94 desc.base1 = (canary >> 16) & 0xff;
95 desc.base2 = (canary >> 24) & 0xff;
96 write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); 94 write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
97#endif 95#endif
98} 96}
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 643c59b4bc6e..f08f97374892 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
31 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 31 "movl %P[task_canary](%[next]), %%ebx\n\t" \
32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
33#define __switch_canary_oparam \ 33#define __switch_canary_oparam \
34 , [stack_canary] "=m" (per_cpu_var(stack_canary)) 34 , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
35#define __switch_canary_iparam \ 35#define __switch_canary_iparam \
36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
37#else /* CC_STACKPROTECTOR */ 37#else /* CC_STACKPROTECTOR */
@@ -150,33 +150,6 @@ do { \
150#endif 150#endif
151 151
152#ifdef __KERNEL__ 152#ifdef __KERNEL__
153#define _set_base(addr, base) do { unsigned long __pr; \
154__asm__ __volatile__ ("movw %%dx,%1\n\t" \
155 "rorl $16,%%edx\n\t" \
156 "movb %%dl,%2\n\t" \
157 "movb %%dh,%3" \
158 :"=&d" (__pr) \
159 :"m" (*((addr)+2)), \
160 "m" (*((addr)+4)), \
161 "m" (*((addr)+7)), \
162 "0" (base) \
163 ); } while (0)
164
165#define _set_limit(addr, limit) do { unsigned long __lr; \
166__asm__ __volatile__ ("movw %%dx,%1\n\t" \
167 "rorl $16,%%edx\n\t" \
168 "movb %2,%%dh\n\t" \
169 "andb $0xf0,%%dh\n\t" \
170 "orb %%dh,%%dl\n\t" \
171 "movb %%dl,%2" \
172 :"=&d" (__lr) \
173 :"m" (*(addr)), \
174 "m" (*((addr)+6)), \
175 "0" (limit) \
176 ); } while (0)
177
178#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
179#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
180 153
181extern void native_load_gs_index(unsigned); 154extern void native_load_gs_index(unsigned);
182 155
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h
index af1b70ea440f..3935b106de79 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/asm/termbits.h
@@ -1,198 +1 @@
1#ifndef _ASM_X86_TERMBITS_H #include <asm-generic/termbits.h>
2#define _ASM_X86_TERMBITS_H
3
4#include <linux/posix_types.h>
5
6typedef unsigned char cc_t;
7typedef unsigned int speed_t;
8typedef unsigned int tcflag_t;
9
10#define NCCS 19
11struct termios {
12 tcflag_t c_iflag; /* input mode flags */
13 tcflag_t c_oflag; /* output mode flags */
14 tcflag_t c_cflag; /* control mode flags */
15 tcflag_t c_lflag; /* local mode flags */
16 cc_t c_line; /* line discipline */
17 cc_t c_cc[NCCS]; /* control characters */
18};
19
20struct termios2 {
21 tcflag_t c_iflag; /* input mode flags */
22 tcflag_t c_oflag; /* output mode flags */
23 tcflag_t c_cflag; /* control mode flags */
24 tcflag_t c_lflag; /* local mode flags */
25 cc_t c_line; /* line discipline */
26 cc_t c_cc[NCCS]; /* control characters */
27 speed_t c_ispeed; /* input speed */
28 speed_t c_ospeed; /* output speed */
29};
30
31struct ktermios {
32 tcflag_t c_iflag; /* input mode flags */
33 tcflag_t c_oflag; /* output mode flags */
34 tcflag_t c_cflag; /* control mode flags */
35 tcflag_t c_lflag; /* local mode flags */
36 cc_t c_line; /* line discipline */
37 cc_t c_cc[NCCS]; /* control characters */
38 speed_t c_ispeed; /* input speed */
39 speed_t c_ospeed; /* output speed */
40};
41
42/* c_cc characters */
43#define VINTR 0
44#define VQUIT 1
45#define VERASE 2
46#define VKILL 3
47#define VEOF 4
48#define VTIME 5
49#define VMIN 6
50#define VSWTC 7
51#define VSTART 8
52#define VSTOP 9
53#define VSUSP 10
54#define VEOL 11
55#define VREPRINT 12
56#define VDISCARD 13
57#define VWERASE 14
58#define VLNEXT 15
59#define VEOL2 16
60
61/* c_iflag bits */
62#define IGNBRK 0000001
63#define BRKINT 0000002
64#define IGNPAR 0000004
65#define PARMRK 0000010
66#define INPCK 0000020
67#define ISTRIP 0000040
68#define INLCR 0000100
69#define IGNCR 0000200
70#define ICRNL 0000400
71#define IUCLC 0001000
72#define IXON 0002000
73#define IXANY 0004000
74#define IXOFF 0010000
75#define IMAXBEL 0020000
76#define IUTF8 0040000
77
78/* c_oflag bits */
79#define OPOST 0000001
80#define OLCUC 0000002
81#define ONLCR 0000004
82#define OCRNL 0000010
83#define ONOCR 0000020
84#define ONLRET 0000040
85#define OFILL 0000100
86#define OFDEL 0000200
87#define NLDLY 0000400
88#define NL0 0000000
89#define NL1 0000400
90#define CRDLY 0003000
91#define CR0 0000000
92#define CR1 0001000
93#define CR2 0002000
94#define CR3 0003000
95#define TABDLY 0014000
96#define TAB0 0000000
97#define TAB1 0004000
98#define TAB2 0010000
99#define TAB3 0014000
100#define XTABS 0014000
101#define BSDLY 0020000
102#define BS0 0000000
103#define BS1 0020000
104#define VTDLY 0040000
105#define VT0 0000000
106#define VT1 0040000
107#define FFDLY 0100000
108#define FF0 0000000
109#define FF1 0100000
110
111/* c_cflag bit meaning */
112#define CBAUD 0010017
113#define B0 0000000 /* hang up */
114#define B50 0000001
115#define B75 0000002
116#define B110 0000003
117#define B134 0000004
118#define B150 0000005
119#define B200 0000006
120#define B300 0000007
121#define B600 0000010
122#define B1200 0000011
123#define B1800 0000012
124#define B2400 0000013
125#define B4800 0000014
126#define B9600 0000015
127#define B19200 0000016
128#define B38400 0000017
129#define EXTA B19200
130#define EXTB B38400
131#define CSIZE 0000060
132#define CS5 0000000
133#define CS6 0000020
134#define CS7 0000040
135#define CS8 0000060
136#define CSTOPB 0000100
137#define CREAD 0000200
138#define PARENB 0000400
139#define PARODD 0001000
140#define HUPCL 0002000
141#define CLOCAL 0004000
142#define CBAUDEX 0010000
143#define BOTHER 0010000 /* non standard rate */
144#define B57600 0010001
145#define B115200 0010002
146#define B230400 0010003
147#define B460800 0010004
148#define B500000 0010005
149#define B576000 0010006
150#define B921600 0010007
151#define B1000000 0010010
152#define B1152000 0010011
153#define B1500000 0010012
154#define B2000000 0010013
155#define B2500000 0010014
156#define B3000000 0010015
157#define B3500000 0010016
158#define B4000000 0010017
159#define CIBAUD 002003600000 /* input baud rate */
160#define CMSPAR 010000000000 /* mark or space (stick) parity */
161#define CRTSCTS 020000000000 /* flow control */
162
163#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */
164
165/* c_lflag bits */
166#define ISIG 0000001
167#define ICANON 0000002
168#define XCASE 0000004
169#define ECHO 0000010
170#define ECHOE 0000020
171#define ECHOK 0000040
172#define ECHONL 0000100
173#define NOFLSH 0000200
174#define TOSTOP 0000400
175#define ECHOCTL 0001000
176#define ECHOPRT 0002000
177#define ECHOKE 0004000
178#define FLUSHO 0010000
179#define PENDIN 0040000
180#define IEXTEN 0100000
181
182/* tcflow() and TCXONC use these */
183#define TCOOFF 0
184#define TCOON 1
185#define TCIOFF 2
186#define TCION 3
187
188/* tcflush() and TCFLSH use these */
189#define TCIFLUSH 0
190#define TCOFLUSH 1
191#define TCIOFLUSH 2
192
193/* tcsetattr uses these */
194#define TCSANOW 0
195#define TCSADRAIN 1
196#define TCSAFLUSH 2
197
198#endif /* _ASM_X86_TERMBITS_H */
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index c4ee8056baca..280d78a9d966 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -1,114 +1 @@
1#ifndef _ASM_X86_TERMIOS_H #include <asm-generic/termios.h>
2#define _ASM_X86_TERMIOS_H
3
4#include <asm/termbits.h>
5#include <asm/ioctls.h>
6
7struct winsize {
8 unsigned short ws_row;
9 unsigned short ws_col;
10 unsigned short ws_xpixel;
11 unsigned short ws_ypixel;
12};
13
14#define NCC 8
15struct termio {
16 unsigned short c_iflag; /* input mode flags */
17 unsigned short c_oflag; /* output mode flags */
18 unsigned short c_cflag; /* control mode flags */
19 unsigned short c_lflag; /* local mode flags */
20 unsigned char c_line; /* line discipline */
21 unsigned char c_cc[NCC]; /* control characters */
22};
23
24/* modem lines */
25#define TIOCM_LE 0x001
26#define TIOCM_DTR 0x002
27#define TIOCM_RTS 0x004
28#define TIOCM_ST 0x008
29#define TIOCM_SR 0x010
30#define TIOCM_CTS 0x020
31#define TIOCM_CAR 0x040
32#define TIOCM_RNG 0x080
33#define TIOCM_DSR 0x100
34#define TIOCM_CD TIOCM_CAR
35#define TIOCM_RI TIOCM_RNG
36#define TIOCM_OUT1 0x2000
37#define TIOCM_OUT2 0x4000
38#define TIOCM_LOOP 0x8000
39
40/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
41
42#ifdef __KERNEL__
43
44#include <asm/uaccess.h>
45
46/* intr=^C quit=^\ erase=del kill=^U
47 eof=^D vtime=\0 vmin=\1 sxtc=\0
48 start=^Q stop=^S susp=^Z eol=\0
49 reprint=^R discard=^U werase=^W lnext=^V
50 eol2=\0
51*/
52#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
53
54/*
55 * Translate a "termio" structure into a "termios". Ugh.
56 */
57#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
58 unsigned short __tmp; \
59 get_user(__tmp,&(termio)->x); \
60 *(unsigned short *) &(termios)->x = __tmp; \
61}
62
63static inline int user_termio_to_kernel_termios(struct ktermios *termios,
64 struct termio __user *termio)
65{
66 SET_LOW_TERMIOS_BITS(termios, termio, c_iflag);
67 SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
68 SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
69 SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
70 get_user(termios->c_line, &termio->c_line);
71 return copy_from_user(termios->c_cc, termio->c_cc, NCC);
72}
73
74/*
75 * Translate a "termios" structure into a "termio". Ugh.
76 */
77static inline int kernel_termios_to_user_termio(struct termio __user *termio,
78 struct ktermios *termios)
79{
80 put_user((termios)->c_iflag, &(termio)->c_iflag);
81 put_user((termios)->c_oflag, &(termio)->c_oflag);
82 put_user((termios)->c_cflag, &(termio)->c_cflag);
83 put_user((termios)->c_lflag, &(termio)->c_lflag);
84 put_user((termios)->c_line, &(termio)->c_line);
85 return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC);
86}
87
88static inline int user_termios_to_kernel_termios(struct ktermios *k,
89 struct termios2 __user *u)
90{
91 return copy_from_user(k, u, sizeof(struct termios2));
92}
93
94static inline int kernel_termios_to_user_termios(struct termios2 __user *u,
95 struct ktermios *k)
96{
97 return copy_to_user(u, k, sizeof(struct termios2));
98}
99
100static inline int user_termios_to_kernel_termios_1(struct ktermios *k,
101 struct termios __user *u)
102{
103 return copy_from_user(k, u, sizeof(struct termios));
104}
105
106static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
107 struct ktermios *k)
108{
109 return copy_to_user(u, k, sizeof(struct termios));
110}
111
112#endif /* __KERNEL__ */
113
114#endif /* _ASM_X86_TERMIOS_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 6f7786aea4fc..d27d0a2fec4c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -214,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
214static inline struct thread_info *current_thread_info(void) 214static inline struct thread_info *current_thread_info(void)
215{ 215{
216 struct thread_info *ti; 216 struct thread_info *ti;
217 ti = (void *)(percpu_read(kernel_stack) + 217 ti = (void *)(percpu_read_stable(kernel_stack) +
218 KERNEL_STACK_OFFSET - THREAD_SIZE); 218 KERNEL_STACK_OFFSET - THREAD_SIZE);
219 return ti; 219 return ti;
220} 220}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bfd74c032fca..4da91ad69e0d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -81,9 +81,7 @@ extern int panic_on_unrecovered_nmi;
81 81
82void math_error(void __user *); 82void math_error(void __user *);
83void math_emulate(struct math_emu_info *); 83void math_emulate(struct math_emu_info *);
84#ifdef CONFIG_X86_32 84#ifndef CONFIG_X86_32
85unsigned long patch_espfix_desc(unsigned long, unsigned long);
86#else
87asmlinkage void smp_thermal_interrupt(void); 85asmlinkage void smp_thermal_interrupt(void);
88asmlinkage void mce_threshold_interrupt(void); 86asmlinkage void mce_threshold_interrupt(void);
89#endif 87#endif
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index 09b97745772f..df1da20f4534 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -1,19 +1,11 @@
1#ifndef _ASM_X86_TYPES_H 1#ifndef _ASM_X86_TYPES_H
2#define _ASM_X86_TYPES_H 2#define _ASM_X86_TYPES_H
3 3
4#include <asm-generic/int-ll64.h> 4#define dma_addr_t dma_addr_t
5 5
6#ifndef __ASSEMBLY__ 6#include <asm-generic/types.h>
7
8typedef unsigned short umode_t;
9 7
10#endif /* __ASSEMBLY__ */
11
12/*
13 * These aren't exported outside the kernel to avoid name space clashes
14 */
15#ifdef __KERNEL__ 8#ifdef __KERNEL__
16
17#ifndef __ASSEMBLY__ 9#ifndef __ASSEMBLY__
18 10
19typedef u64 dma64_addr_t; 11typedef u64 dma64_addr_t;
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h
index 87324cf439d9..b7c29c8017f2 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/asm/ucontext.h
@@ -7,12 +7,6 @@
7 * sigcontext struct (uc_mcontext). 7 * sigcontext struct (uc_mcontext).
8 */ 8 */
9 9
10struct ucontext { 10#include <asm-generic/ucontext.h>
11 unsigned long uc_flags;
12 struct ucontext *uc_link;
13 stack_t uc_stack;
14 struct sigcontext uc_mcontext;
15 sigset_t uc_sigmask; /* mask last for extensibility */
16};
17 11
18#endif /* _ASM_X86_UCONTEXT_H */ 12#endif /* _ASM_X86_UCONTEXT_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6b8ca3a0285d..67e929b89875 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
833extern int es7000_plat; 833extern int es7000_plat;
834#endif 834#endif
835 835
836static struct {
837 int gsi_base;
838 int gsi_end;
839} mp_ioapic_routing[MAX_IO_APICS];
840
841int mp_find_ioapic(int gsi)
842{
843 int i = 0;
844
845 /* Find the IOAPIC that manages this GSI. */
846 for (i = 0; i < nr_ioapics; i++) {
847 if ((gsi >= mp_ioapic_routing[i].gsi_base)
848 && (gsi <= mp_ioapic_routing[i].gsi_end))
849 return i;
850 }
851
852 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
853 return -1;
854}
855
856int mp_find_ioapic_pin(int ioapic, int gsi)
857{
858 if (WARN_ON(ioapic == -1))
859 return -1;
860 if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
861 return -1;
862
863 return gsi - mp_ioapic_routing[ioapic].gsi_base;
864}
865
866static u8 __init uniq_ioapic_id(u8 id)
867{
868#ifdef CONFIG_X86_32
869 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
870 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
871 return io_apic_get_unique_id(nr_ioapics, id);
872 else
873 return id;
874#else
875 int i;
876 DECLARE_BITMAP(used, 256);
877 bitmap_zero(used, 256);
878 for (i = 0; i < nr_ioapics; i++) {
879 struct mpc_ioapic *ia = &mp_ioapics[i];
880 __set_bit(ia->apicid, used);
881 }
882 if (!test_bit(id, used))
883 return id;
884 return find_first_zero_bit(used, 256);
885#endif
886}
887
888static int bad_ioapic(unsigned long address)
889{
890 if (nr_ioapics >= MAX_IO_APICS) {
891 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
892 "(found %d)\n", MAX_IO_APICS, nr_ioapics);
893 panic("Recompile kernel with bigger MAX_IO_APICS!\n");
894 }
895 if (!address) {
896 printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
897 " found in table, skipping!\n");
898 return 1;
899 }
900 return 0;
901}
902
903void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
904{
905 int idx = 0;
906
907 if (bad_ioapic(address))
908 return;
909
910 idx = nr_ioapics;
911
912 mp_ioapics[idx].type = MP_IOAPIC;
913 mp_ioapics[idx].flags = MPC_APIC_USABLE;
914 mp_ioapics[idx].apicaddr = address;
915
916 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
917 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
918 mp_ioapics[idx].apicver = io_apic_get_version(idx);
919
920 /*
921 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
922 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
923 */
924 mp_ioapic_routing[idx].gsi_base = gsi_base;
925 mp_ioapic_routing[idx].gsi_end = gsi_base +
926 io_apic_get_redir_entries(idx);
927
928 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
929 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
930 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
931 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
932
933 nr_ioapics++;
934}
935
936int __init acpi_probe_gsi(void) 836int __init acpi_probe_gsi(void)
937{ 837{
938 int idx; 838 int idx;
@@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void)
947 847
948 max_gsi = 0; 848 max_gsi = 0;
949 for (idx = 0; idx < nr_ioapics; idx++) { 849 for (idx = 0; idx < nr_ioapics; idx++) {
950 gsi = mp_ioapic_routing[idx].gsi_end; 850 gsi = mp_gsi_routing[idx].gsi_end;
951 851
952 if (gsi > max_gsi) 852 if (gsi > max_gsi)
953 max_gsi = gsi; 853 max_gsi = gsi;
@@ -1179,9 +1079,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1179 * If MPS is present, it will handle them, 1079 * If MPS is present, it will handle them,
1180 * otherwise the system will stay in PIC mode 1080 * otherwise the system will stay in PIC mode
1181 */ 1081 */
1182 if (acpi_disabled || acpi_noirq) { 1082 if (acpi_disabled || acpi_noirq)
1183 return -ENODEV; 1083 return -ENODEV;
1184 }
1185 1084
1186 if (!cpu_has_apic) 1085 if (!cpu_has_apic)
1187 return -ENODEV; 1086 return -ENODEV;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f57658702571..de7353c0ce9c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2,6 +2,7 @@
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/mutex.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/stringify.h>
5#include <linux/kprobes.h> 6#include <linux/kprobes.h>
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly);
32#define smp_alt_once 1 33#define smp_alt_once 1
33#endif 34#endif
34 35
35static int debug_alternative; 36static int __initdata_or_module debug_alternative;
36 37
37static int __init debug_alt(char *str) 38static int __init debug_alt(char *str)
38{ 39{
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str)
51__setup("noreplace-smp", setup_noreplace_smp); 52__setup("noreplace-smp", setup_noreplace_smp);
52 53
53#ifdef CONFIG_PARAVIRT 54#ifdef CONFIG_PARAVIRT
54static int noreplace_paravirt = 0; 55static int __initdata_or_module noreplace_paravirt = 0;
55 56
56static int __init setup_noreplace_paravirt(char *str) 57static int __init setup_noreplace_paravirt(char *str)
57{ 58{
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
64#define DPRINTK(fmt, args...) if (debug_alternative) \ 65#define DPRINTK(fmt, args...) if (debug_alternative) \
65 printk(KERN_DEBUG fmt, args) 66 printk(KERN_DEBUG fmt, args)
66 67
67#ifdef GENERIC_NOP1 68#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
68/* Use inline assembly to define this because the nops are defined 69/* Use inline assembly to define this because the nops are defined
69 as inline assembly strings in the include files and we cannot 70 as inline assembly strings in the include files and we cannot
70 get them easily into strings. */ 71 get them easily into strings. */
71asm("\t.section .rodata, \"a\"\nintelnops: " 72asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
72 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 73 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
73 GENERIC_NOP7 GENERIC_NOP8 74 GENERIC_NOP7 GENERIC_NOP8
74 "\t.previous"); 75 "\t.previous");
75extern const unsigned char intelnops[]; 76extern const unsigned char intelnops[];
76static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { 77static const unsigned char *const __initconst_or_module
78intel_nops[ASM_NOP_MAX+1] = {
77 NULL, 79 NULL,
78 intelnops, 80 intelnops,
79 intelnops + 1, 81 intelnops + 1,
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
87#endif 89#endif
88 90
89#ifdef K8_NOP1 91#ifdef K8_NOP1
90asm("\t.section .rodata, \"a\"\nk8nops: " 92asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
91 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 93 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
92 K8_NOP7 K8_NOP8 94 K8_NOP7 K8_NOP8
93 "\t.previous"); 95 "\t.previous");
94extern const unsigned char k8nops[]; 96extern const unsigned char k8nops[];
95static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { 97static const unsigned char *const __initconst_or_module
98k8_nops[ASM_NOP_MAX+1] = {
96 NULL, 99 NULL,
97 k8nops, 100 k8nops,
98 k8nops + 1, 101 k8nops + 1,
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
105}; 108};
106#endif 109#endif
107 110
108#ifdef K7_NOP1 111#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
109asm("\t.section .rodata, \"a\"\nk7nops: " 112asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
110 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 113 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
111 K7_NOP7 K7_NOP8 114 K7_NOP7 K7_NOP8
112 "\t.previous"); 115 "\t.previous");
113extern const unsigned char k7nops[]; 116extern const unsigned char k7nops[];
114static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { 117static const unsigned char *const __initconst_or_module
118k7_nops[ASM_NOP_MAX+1] = {
115 NULL, 119 NULL,
116 k7nops, 120 k7nops,
117 k7nops + 1, 121 k7nops + 1,
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
125#endif 129#endif
126 130
127#ifdef P6_NOP1 131#ifdef P6_NOP1
128asm("\t.section .rodata, \"a\"\np6nops: " 132asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
129 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 133 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
130 P6_NOP7 P6_NOP8 134 P6_NOP7 P6_NOP8
131 "\t.previous"); 135 "\t.previous");
132extern const unsigned char p6nops[]; 136extern const unsigned char p6nops[];
133static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { 137static const unsigned char *const __initconst_or_module
138p6_nops[ASM_NOP_MAX+1] = {
134 NULL, 139 NULL,
135 p6nops, 140 p6nops,
136 p6nops + 1, 141 p6nops + 1,
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
146#ifdef CONFIG_X86_64 151#ifdef CONFIG_X86_64
147 152
148extern char __vsyscall_0; 153extern char __vsyscall_0;
149const unsigned char *const *find_nop_table(void) 154static const unsigned char *const *__init_or_module find_nop_table(void)
150{ 155{
151 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 156 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
152 boot_cpu_has(X86_FEATURE_NOPL)) 157 boot_cpu_has(X86_FEATURE_NOPL))
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void)
157 162
158#else /* CONFIG_X86_64 */ 163#else /* CONFIG_X86_64 */
159 164
160const unsigned char *const *find_nop_table(void) 165static const unsigned char *const *__init_or_module find_nop_table(void)
161{ 166{
162 if (boot_cpu_has(X86_FEATURE_K8)) 167 if (boot_cpu_has(X86_FEATURE_K8))
163 return k8_nops; 168 return k8_nops;
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void)
172#endif /* CONFIG_X86_64 */ 177#endif /* CONFIG_X86_64 */
173 178
174/* Use this to add nops to a buffer, then text_poke the whole buffer. */ 179/* Use this to add nops to a buffer, then text_poke the whole buffer. */
175void add_nops(void *insns, unsigned int len) 180static void __init_or_module add_nops(void *insns, unsigned int len)
176{ 181{
177 const unsigned char *const *noptable = find_nop_table(); 182 const unsigned char *const *noptable = find_nop_table();
178 183
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len)
185 len -= noplen; 190 len -= noplen;
186 } 191 }
187} 192}
188EXPORT_SYMBOL_GPL(add_nops);
189 193
190extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 194extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
191extern u8 *__smp_locks[], *__smp_locks_end[]; 195extern u8 *__smp_locks[], *__smp_locks_end[];
196static void *text_poke_early(void *addr, const void *opcode, size_t len);
192 197
193/* Replace instructions with better alternatives for this CPU type. 198/* Replace instructions with better alternatives for this CPU type.
194 This runs before SMP is initialized to avoid SMP problems with 199 This runs before SMP is initialized to avoid SMP problems with
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
196 APs have less capabilities than the boot processor are not handled. 201 APs have less capabilities than the boot processor are not handled.
197 Tough. Make sure you disable such features by hand. */ 202 Tough. Make sure you disable such features by hand. */
198 203
199void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 204void __init_or_module apply_alternatives(struct alt_instr *start,
205 struct alt_instr *end)
200{ 206{
201 struct alt_instr *a; 207 struct alt_instr *a;
202 char insnbuf[MAX_PATCH_LEN]; 208 char insnbuf[MAX_PATCH_LEN];
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules);
279static DEFINE_MUTEX(smp_alt); 285static DEFINE_MUTEX(smp_alt);
280static int smp_mode = 1; /* protected by smp_alt */ 286static int smp_mode = 1; /* protected by smp_alt */
281 287
282void alternatives_smp_module_add(struct module *mod, char *name, 288void __init_or_module alternatives_smp_module_add(struct module *mod,
283 void *locks, void *locks_end, 289 char *name,
284 void *text, void *text_end) 290 void *locks, void *locks_end,
291 void *text, void *text_end)
285{ 292{
286 struct smp_alt_module *smp; 293 struct smp_alt_module *smp;
287 294
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name,
317 mutex_unlock(&smp_alt); 324 mutex_unlock(&smp_alt);
318} 325}
319 326
320void alternatives_smp_module_del(struct module *mod) 327void __init_or_module alternatives_smp_module_del(struct module *mod)
321{ 328{
322 struct smp_alt_module *item; 329 struct smp_alt_module *item;
323 330
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp)
386#endif 393#endif
387 394
388#ifdef CONFIG_PARAVIRT 395#ifdef CONFIG_PARAVIRT
389void apply_paravirt(struct paravirt_patch_site *start, 396void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
390 struct paravirt_patch_site *end) 397 struct paravirt_patch_site *end)
391{ 398{
392 struct paravirt_patch_site *p; 399 struct paravirt_patch_site *p;
393 char insnbuf[MAX_PATCH_LEN]; 400 char insnbuf[MAX_PATCH_LEN];
@@ -485,13 +492,14 @@ void __init alternative_instructions(void)
485 * instructions. And on the local CPU you need to be protected again NMI or MCE 492 * instructions. And on the local CPU you need to be protected again NMI or MCE
486 * handlers seeing an inconsistent instruction while you patch. 493 * handlers seeing an inconsistent instruction while you patch.
487 */ 494 */
488void *text_poke_early(void *addr, const void *opcode, size_t len) 495static void *__init_or_module text_poke_early(void *addr, const void *opcode,
496 size_t len)
489{ 497{
490 unsigned long flags; 498 unsigned long flags;
491 local_irq_save(flags); 499 local_irq_save(flags);
492 memcpy(addr, opcode, len); 500 memcpy(addr, opcode, len);
493 local_irq_restore(flags);
494 sync_core(); 501 sync_core();
502 local_irq_restore(flags);
495 /* Could also do a CLFLUSH here to speed up CPU recovery; but 503 /* Could also do a CLFLUSH here to speed up CPU recovery; but
496 that causes hangs on some VIA CPUs. */ 504 that causes hangs on some VIA CPUs. */
497 return addr; 505 return addr;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0a1c2830ec66..159740decc41 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,6 +49,7 @@
49#include <asm/mtrr.h> 49#include <asm/mtrr.h>
50#include <asm/smp.h> 50#include <asm/smp.h>
51#include <asm/mce.h> 51#include <asm/mce.h>
52#include <asm/kvm_para.h>
52 53
53unsigned int num_processors; 54unsigned int num_processors;
54 55
@@ -1361,52 +1362,80 @@ void enable_x2apic(void)
1361} 1362}
1362#endif /* CONFIG_X86_X2APIC */ 1363#endif /* CONFIG_X86_X2APIC */
1363 1364
1364void __init enable_IR_x2apic(void) 1365int __init enable_IR(void)
1365{ 1366{
1366#ifdef CONFIG_INTR_REMAP 1367#ifdef CONFIG_INTR_REMAP
1367 int ret;
1368 unsigned long flags;
1369 struct IO_APIC_route_entry **ioapic_entries = NULL;
1370
1371 ret = dmar_table_init();
1372 if (ret) {
1373 pr_debug("dmar_table_init() failed with %d:\n", ret);
1374 goto ir_failed;
1375 }
1376
1377 if (!intr_remapping_supported()) { 1368 if (!intr_remapping_supported()) {
1378 pr_debug("intr-remapping not supported\n"); 1369 pr_debug("intr-remapping not supported\n");
1379 goto ir_failed; 1370 return 0;
1380 } 1371 }
1381 1372
1382
1383 if (!x2apic_preenabled && skip_ioapic_setup) { 1373 if (!x2apic_preenabled && skip_ioapic_setup) {
1384 pr_info("Skipped enabling intr-remap because of skipping " 1374 pr_info("Skipped enabling intr-remap because of skipping "
1385 "io-apic setup\n"); 1375 "io-apic setup\n");
1386 return; 1376 return 0;
1387 } 1377 }
1388 1378
1379 if (enable_intr_remapping(x2apic_supported()))
1380 return 0;
1381
1382 pr_info("Enabled Interrupt-remapping\n");
1383
1384 return 1;
1385
1386#endif
1387 return 0;
1388}
1389
1390void __init enable_IR_x2apic(void)
1391{
1392 unsigned long flags;
1393 struct IO_APIC_route_entry **ioapic_entries = NULL;
1394 int ret, x2apic_enabled = 0;
1395 int dmar_table_init_ret = 0;
1396
1397#ifdef CONFIG_INTR_REMAP
1398 dmar_table_init_ret = dmar_table_init();
1399 if (dmar_table_init_ret)
1400 pr_debug("dmar_table_init() failed with %d:\n",
1401 dmar_table_init_ret);
1402#endif
1403
1389 ioapic_entries = alloc_ioapic_entries(); 1404 ioapic_entries = alloc_ioapic_entries();
1390 if (!ioapic_entries) { 1405 if (!ioapic_entries) {
1391 pr_info("Allocate ioapic_entries failed: %d\n", ret); 1406 pr_err("Allocate ioapic_entries failed\n");
1392 goto end; 1407 goto out;
1393 } 1408 }
1394 1409
1395 ret = save_IO_APIC_setup(ioapic_entries); 1410 ret = save_IO_APIC_setup(ioapic_entries);
1396 if (ret) { 1411 if (ret) {
1397 pr_info("Saving IO-APIC state failed: %d\n", ret); 1412 pr_info("Saving IO-APIC state failed: %d\n", ret);
1398 goto end; 1413 goto out;
1399 } 1414 }
1400 1415
1401 local_irq_save(flags); 1416 local_irq_save(flags);
1402 mask_IO_APIC_setup(ioapic_entries);
1403 mask_8259A(); 1417 mask_8259A();
1418 mask_IO_APIC_setup(ioapic_entries);
1404 1419
1405 ret = enable_intr_remapping(x2apic_supported()); 1420 if (dmar_table_init_ret)
1406 if (ret) 1421 ret = 0;
1407 goto end_restore; 1422 else
1423 ret = enable_IR();
1408 1424
1409 pr_info("Enabled Interrupt-remapping\n"); 1425 if (!ret) {
1426 /* IR is required if there is APIC ID > 255 even when running
1427 * under KVM
1428 */
1429 if (max_physical_apicid > 255 || !kvm_para_available())
1430 goto nox2apic;
1431 /*
1432 * without IR all CPUs can be addressed by IOAPIC/MSI
1433 * only in physical mode
1434 */
1435 x2apic_force_phys();
1436 }
1437
1438 x2apic_enabled = 1;
1410 1439
1411 if (x2apic_supported() && !x2apic_mode) { 1440 if (x2apic_supported() && !x2apic_mode) {
1412 x2apic_mode = 1; 1441 x2apic_mode = 1;
@@ -1414,41 +1443,25 @@ void __init enable_IR_x2apic(void)
1414 pr_info("Enabled x2apic\n"); 1443 pr_info("Enabled x2apic\n");
1415 } 1444 }
1416 1445
1417end_restore: 1446nox2apic:
1418 if (ret) 1447 if (!ret) /* IR enabling failed */
1419 /*
1420 * IR enabling failed
1421 */
1422 restore_IO_APIC_setup(ioapic_entries); 1448 restore_IO_APIC_setup(ioapic_entries);
1423
1424 unmask_8259A(); 1449 unmask_8259A();
1425 local_irq_restore(flags); 1450 local_irq_restore(flags);
1426 1451
1427end: 1452out:
1428 if (ioapic_entries) 1453 if (ioapic_entries)
1429 free_ioapic_entries(ioapic_entries); 1454 free_ioapic_entries(ioapic_entries);
1430 1455
1431 if (!ret) 1456 if (x2apic_enabled)
1432 return; 1457 return;
1433 1458
1434ir_failed:
1435 if (x2apic_preenabled) 1459 if (x2apic_preenabled)
1436 panic("x2apic enabled by bios. But IR enabling failed"); 1460 panic("x2apic: enabled by BIOS but kernel init failed.");
1437 else if (cpu_has_x2apic) 1461 else if (cpu_has_x2apic)
1438 pr_info("Not enabling x2apic,Intr-remapping\n"); 1462 pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
1439#else
1440 if (!cpu_has_x2apic)
1441 return;
1442
1443 if (x2apic_preenabled)
1444 panic("x2apic enabled prior OS handover,"
1445 " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
1446#endif
1447
1448 return;
1449} 1463}
1450 1464
1451
1452#ifdef CONFIG_X86_64 1465#ifdef CONFIG_X86_64
1453/* 1466/*
1454 * Detect and enable local APICs on non-SMP boards. 1467 * Detect and enable local APICs on non-SMP boards.
@@ -1549,8 +1562,6 @@ no_apic:
1549#ifdef CONFIG_X86_64 1562#ifdef CONFIG_X86_64
1550void __init early_init_lapic_mapping(void) 1563void __init early_init_lapic_mapping(void)
1551{ 1564{
1552 unsigned long phys_addr;
1553
1554 /* 1565 /*
1555 * If no local APIC can be found then go out 1566 * If no local APIC can be found then go out
1556 * : it means there is no mpatable and MADT 1567 * : it means there is no mpatable and MADT
@@ -1558,11 +1569,9 @@ void __init early_init_lapic_mapping(void)
1558 if (!smp_found_config) 1569 if (!smp_found_config)
1559 return; 1570 return;
1560 1571
1561 phys_addr = mp_lapic_addr; 1572 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
1562
1563 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1564 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 1573 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1565 APIC_BASE, phys_addr); 1574 APIC_BASE, mp_lapic_addr);
1566 1575
1567 /* 1576 /*
1568 * Fetch the APIC ID of the BSP in case we have a 1577 * Fetch the APIC ID of the BSP in case we have a
@@ -1651,7 +1660,6 @@ int __init APIC_init_uniprocessor(void)
1651 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1660 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1652 pr_err("BIOS bug, local APIC 0x%x not detected!...\n", 1661 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1653 boot_cpu_physical_apicid); 1662 boot_cpu_physical_apicid);
1654 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1655 return -1; 1663 return -1;
1656 } 1664 }
1657#endif 1665#endif
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8952a5890281..89174f847b49 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void)
167{ 167{
168 /* MPENTIUMIII */ 168 /* MPENTIUMIII */
169 if (boot_cpu_data.x86 == 6 && 169 if (boot_cpu_data.x86 == 6 &&
170 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) 170 (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
171 return 1; 171 return 1;
172 172
173 return 0; 173 return 0;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d2ed6c5ddc80..3c8f9e75d038 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -66,6 +66,8 @@
66#include <asm/apic.h> 66#include <asm/apic.h>
67 67
68#define __apicdebuginit(type) static type __init 68#define __apicdebuginit(type) static type __init
69#define for_each_irq_pin(entry, head) \
70 for (entry = head; entry; entry = entry->next)
69 71
70/* 72/*
71 * Is the SiS APIC rmw bug present ? 73 * Is the SiS APIC rmw bug present ?
@@ -85,6 +87,9 @@ int nr_ioapic_registers[MAX_IO_APICS];
85struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; 87struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 88int nr_ioapics;
87 89
90/* IO APIC gsi routing info */
91struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
92
88/* MP IRQ source entries */ 93/* MP IRQ source entries */
89struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; 94struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 95
@@ -116,15 +121,6 @@ static int __init parse_noapic(char *str)
116} 121}
117early_param("noapic", parse_noapic); 122early_param("noapic", parse_noapic);
118 123
119struct irq_pin_list;
120
121/*
122 * This is performance-critical, we want to do it O(1)
123 *
124 * the indexing order of this array favors 1:1 mappings
125 * between pins and IRQs.
126 */
127
128struct irq_pin_list { 124struct irq_pin_list {
129 int apic, pin; 125 int apic, pin;
130 struct irq_pin_list *next; 126 struct irq_pin_list *next;
@@ -139,6 +135,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
139 return pin; 135 return pin;
140} 136}
141 137
138/*
139 * This is performance-critical, we want to do it O(1)
140 *
141 * Most irqs are mapped 1:1 with pins.
142 */
142struct irq_cfg { 143struct irq_cfg {
143 struct irq_pin_list *irq_2_pin; 144 struct irq_pin_list *irq_2_pin;
144 cpumask_var_t domain; 145 cpumask_var_t domain;
@@ -414,13 +415,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
414 unsigned long flags; 415 unsigned long flags;
415 416
416 spin_lock_irqsave(&ioapic_lock, flags); 417 spin_lock_irqsave(&ioapic_lock, flags);
417 entry = cfg->irq_2_pin; 418 for_each_irq_pin(entry, cfg->irq_2_pin) {
418 for (;;) {
419 unsigned int reg; 419 unsigned int reg;
420 int pin; 420 int pin;
421 421
422 if (!entry)
423 break;
424 pin = entry->pin; 422 pin = entry->pin;
425 reg = io_apic_read(entry->apic, 0x10 + pin*2); 423 reg = io_apic_read(entry->apic, 0x10 + pin*2);
426 /* Is the remote IRR bit set? */ 424 /* Is the remote IRR bit set? */
@@ -428,9 +426,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
428 spin_unlock_irqrestore(&ioapic_lock, flags); 426 spin_unlock_irqrestore(&ioapic_lock, flags);
429 return true; 427 return true;
430 } 428 }
431 if (!entry->next)
432 break;
433 entry = entry->next;
434 } 429 }
435 spin_unlock_irqrestore(&ioapic_lock, flags); 430 spin_unlock_irqrestore(&ioapic_lock, flags);
436 431
@@ -498,72 +493,68 @@ static void ioapic_mask_entry(int apic, int pin)
498 * shared ISA-space IRQs, so we have to support them. We are super 493 * shared ISA-space IRQs, so we have to support them. We are super
499 * fast in the common case, and fast for shared ISA-space IRQs. 494 * fast in the common case, and fast for shared ISA-space IRQs.
500 */ 495 */
501static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 496static int
497add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
502{ 498{
503 struct irq_pin_list *entry; 499 struct irq_pin_list **last, *entry;
504 500
505 entry = cfg->irq_2_pin; 501 /* don't allow duplicates */
506 if (!entry) { 502 last = &cfg->irq_2_pin;
507 entry = get_one_free_irq_2_pin(node); 503 for_each_irq_pin(entry, cfg->irq_2_pin) {
508 if (!entry) {
509 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
510 apic, pin);
511 return;
512 }
513 cfg->irq_2_pin = entry;
514 entry->apic = apic;
515 entry->pin = pin;
516 return;
517 }
518
519 while (entry->next) {
520 /* not again, please */
521 if (entry->apic == apic && entry->pin == pin) 504 if (entry->apic == apic && entry->pin == pin)
522 return; 505 return 0;
523 506 last = &entry->next;
524 entry = entry->next;
525 } 507 }
526 508
527 entry->next = get_one_free_irq_2_pin(node); 509 entry = get_one_free_irq_2_pin(node);
528 entry = entry->next; 510 if (!entry) {
511 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
512 node, apic, pin);
513 return -ENOMEM;
514 }
529 entry->apic = apic; 515 entry->apic = apic;
530 entry->pin = pin; 516 entry->pin = pin;
517
518 *last = entry;
519 return 0;
520}
521
522static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
523{
524 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
525 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
531} 526}
532 527
533/* 528/*
534 * Reroute an IRQ to a different pin. 529 * Reroute an IRQ to a different pin.
535 */ 530 */
536static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, 531static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
537 int oldapic, int oldpin, 532 int oldapic, int oldpin,
538 int newapic, int newpin) 533 int newapic, int newpin)
539{ 534{
540 struct irq_pin_list *entry = cfg->irq_2_pin; 535 struct irq_pin_list *entry;
541 int replaced = 0;
542 536
543 while (entry) { 537 for_each_irq_pin(entry, cfg->irq_2_pin) {
544 if (entry->apic == oldapic && entry->pin == oldpin) { 538 if (entry->apic == oldapic && entry->pin == oldpin) {
545 entry->apic = newapic; 539 entry->apic = newapic;
546 entry->pin = newpin; 540 entry->pin = newpin;
547 replaced = 1;
548 /* every one is different, right? */ 541 /* every one is different, right? */
549 break; 542 return;
550 } 543 }
551 entry = entry->next;
552 } 544 }
553 545
554 /* why? call replace before add? */ 546 /* old apic/pin didn't exist, so just add new ones */
555 if (!replaced) 547 add_pin_to_irq_node(cfg, node, newapic, newpin);
556 add_pin_to_irq_node(cfg, node, newapic, newpin);
557} 548}
558 549
559static inline void io_apic_modify_irq(struct irq_cfg *cfg, 550static void io_apic_modify_irq(struct irq_cfg *cfg,
560 int mask_and, int mask_or, 551 int mask_and, int mask_or,
561 void (*final)(struct irq_pin_list *entry)) 552 void (*final)(struct irq_pin_list *entry))
562{ 553{
563 int pin; 554 int pin;
564 struct irq_pin_list *entry; 555 struct irq_pin_list *entry;
565 556
566 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 557 for_each_irq_pin(entry, cfg->irq_2_pin) {
567 unsigned int reg; 558 unsigned int reg;
568 pin = entry->pin; 559 pin = entry->pin;
569 reg = io_apic_read(entry->apic, 0x10 + pin * 2); 560 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -580,7 +571,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
580 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); 571 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
581} 572}
582 573
583#ifdef CONFIG_X86_64
584static void io_apic_sync(struct irq_pin_list *entry) 574static void io_apic_sync(struct irq_pin_list *entry)
585{ 575{
586 /* 576 /*
@@ -596,11 +586,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
596{ 586{
597 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 587 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
598} 588}
599#else /* CONFIG_X86_32 */
600static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
601{
602 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
603}
604 589
605static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) 590static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
606{ 591{
@@ -613,7 +598,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
613 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 598 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
614 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 599 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
615} 600}
616#endif /* CONFIG_X86_32 */
617 601
618static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 602static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
619{ 603{
@@ -1702,12 +1686,8 @@ __apicdebuginit(void) print_IO_APIC(void)
1702 if (!entry) 1686 if (!entry)
1703 continue; 1687 continue;
1704 printk(KERN_DEBUG "IRQ%d ", irq); 1688 printk(KERN_DEBUG "IRQ%d ", irq);
1705 for (;;) { 1689 for_each_irq_pin(entry, cfg->irq_2_pin)
1706 printk("-> %d:%d", entry->apic, entry->pin); 1690 printk("-> %d:%d", entry->apic, entry->pin);
1707 if (!entry->next)
1708 break;
1709 entry = entry->next;
1710 }
1711 printk("\n"); 1691 printk("\n");
1712 } 1692 }
1713 1693
@@ -2211,7 +2191,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2211 return was_pending; 2191 return was_pending;
2212} 2192}
2213 2193
2214#ifdef CONFIG_X86_64
2215static int ioapic_retrigger_irq(unsigned int irq) 2194static int ioapic_retrigger_irq(unsigned int irq)
2216{ 2195{
2217 2196
@@ -2224,14 +2203,6 @@ static int ioapic_retrigger_irq(unsigned int irq)
2224 2203
2225 return 1; 2204 return 1;
2226} 2205}
2227#else
2228static int ioapic_retrigger_irq(unsigned int irq)
2229{
2230 apic->send_IPI_self(irq_cfg(irq)->vector);
2231
2232 return 1;
2233}
2234#endif
2235 2206
2236/* 2207/*
2237 * Level and edge triggered IO-APIC interrupts need different handling, 2208 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2269,13 +2240,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2269 struct irq_pin_list *entry; 2240 struct irq_pin_list *entry;
2270 u8 vector = cfg->vector; 2241 u8 vector = cfg->vector;
2271 2242
2272 entry = cfg->irq_2_pin; 2243 for_each_irq_pin(entry, cfg->irq_2_pin) {
2273 for (;;) {
2274 unsigned int reg; 2244 unsigned int reg;
2275 2245
2276 if (!entry)
2277 break;
2278
2279 apic = entry->apic; 2246 apic = entry->apic;
2280 pin = entry->pin; 2247 pin = entry->pin;
2281 /* 2248 /*
@@ -2288,9 +2255,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2288 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2255 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2289 reg |= vector; 2256 reg |= vector;
2290 io_apic_modify(apic, 0x10 + pin*2, reg); 2257 io_apic_modify(apic, 0x10 + pin*2, reg);
2291 if (!entry->next)
2292 break;
2293 entry = entry->next;
2294 } 2258 }
2295} 2259}
2296 2260
@@ -2515,11 +2479,8 @@ atomic_t irq_mis_count;
2515static void ack_apic_level(unsigned int irq) 2479static void ack_apic_level(unsigned int irq)
2516{ 2480{
2517 struct irq_desc *desc = irq_to_desc(irq); 2481 struct irq_desc *desc = irq_to_desc(irq);
2518
2519#ifdef CONFIG_X86_32
2520 unsigned long v; 2482 unsigned long v;
2521 int i; 2483 int i;
2522#endif
2523 struct irq_cfg *cfg; 2484 struct irq_cfg *cfg;
2524 int do_unmask_irq = 0; 2485 int do_unmask_irq = 0;
2525 2486
@@ -2532,31 +2493,28 @@ static void ack_apic_level(unsigned int irq)
2532 } 2493 }
2533#endif 2494#endif
2534 2495
2535#ifdef CONFIG_X86_32
2536 /* 2496 /*
2537 * It appears there is an erratum which affects at least version 0x11 2497 * It appears there is an erratum which affects at least version 0x11
2538 * of I/O APIC (that's the 82093AA and cores integrated into various 2498 * of I/O APIC (that's the 82093AA and cores integrated into various
2539 * chipsets). Under certain conditions a level-triggered interrupt is 2499 * chipsets). Under certain conditions a level-triggered interrupt is
2540 * erroneously delivered as edge-triggered one but the respective IRR 2500 * erroneously delivered as edge-triggered one but the respective IRR
2541 * bit gets set nevertheless. As a result the I/O unit expects an EOI 2501 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2542 * message but it will never arrive and further interrupts are blocked 2502 * message but it will never arrive and further interrupts are blocked
2543 * from the source. The exact reason is so far unknown, but the 2503 * from the source. The exact reason is so far unknown, but the
2544 * phenomenon was observed when two consecutive interrupt requests 2504 * phenomenon was observed when two consecutive interrupt requests
2545 * from a given source get delivered to the same CPU and the source is 2505 * from a given source get delivered to the same CPU and the source is
2546 * temporarily disabled in between. 2506 * temporarily disabled in between.
2547 * 2507 *
2548 * A workaround is to simulate an EOI message manually. We achieve it 2508 * A workaround is to simulate an EOI message manually. We achieve it
2549 * by setting the trigger mode to edge and then to level when the edge 2509 * by setting the trigger mode to edge and then to level when the edge
2550 * trigger mode gets detected in the TMR of a local APIC for a 2510 * trigger mode gets detected in the TMR of a local APIC for a
2551 * level-triggered interrupt. We mask the source for the time of the 2511 * level-triggered interrupt. We mask the source for the time of the
2552 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2512 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2553 * The idea is from Manfred Spraul. --macro 2513 * The idea is from Manfred Spraul. --macro
2554 */ 2514 */
2555 cfg = desc->chip_data; 2515 cfg = desc->chip_data;
2556 i = cfg->vector; 2516 i = cfg->vector;
2557
2558 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2517 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2559#endif
2560 2518
2561 /* 2519 /*
2562 * We must acknowledge the irq before we move it or the acknowledge will 2520 * We must acknowledge the irq before we move it or the acknowledge will
@@ -2598,7 +2556,7 @@ static void ack_apic_level(unsigned int irq)
2598 unmask_IO_APIC_irq_desc(desc); 2556 unmask_IO_APIC_irq_desc(desc);
2599 } 2557 }
2600 2558
2601#ifdef CONFIG_X86_32 2559 /* Tail end of version 0x11 I/O APIC bug workaround */
2602 if (!(v & (1 << (i & 0x1f)))) { 2560 if (!(v & (1 << (i & 0x1f)))) {
2603 atomic_inc(&irq_mis_count); 2561 atomic_inc(&irq_mis_count);
2604 spin_lock(&ioapic_lock); 2562 spin_lock(&ioapic_lock);
@@ -2606,26 +2564,15 @@ static void ack_apic_level(unsigned int irq)
2606 __unmask_and_level_IO_APIC_irq(cfg); 2564 __unmask_and_level_IO_APIC_irq(cfg);
2607 spin_unlock(&ioapic_lock); 2565 spin_unlock(&ioapic_lock);
2608 } 2566 }
2609#endif
2610} 2567}
2611 2568
2612#ifdef CONFIG_INTR_REMAP 2569#ifdef CONFIG_INTR_REMAP
2613static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2570static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2614{ 2571{
2615 int apic, pin;
2616 struct irq_pin_list *entry; 2572 struct irq_pin_list *entry;
2617 2573
2618 entry = cfg->irq_2_pin; 2574 for_each_irq_pin(entry, cfg->irq_2_pin)
2619 for (;;) { 2575 io_apic_eoi(entry->apic, entry->pin);
2620
2621 if (!entry)
2622 break;
2623
2624 apic = entry->apic;
2625 pin = entry->pin;
2626 io_apic_eoi(apic, pin);
2627 entry = entry->next;
2628 }
2629} 2576}
2630 2577
2631static void 2578static void
@@ -3241,8 +3188,7 @@ void destroy_irq(unsigned int irq)
3241 cfg = desc->chip_data; 3188 cfg = desc->chip_data;
3242 dynamic_irq_cleanup(irq); 3189 dynamic_irq_cleanup(irq);
3243 /* connect back irq_cfg */ 3190 /* connect back irq_cfg */
3244 if (desc) 3191 desc->chip_data = cfg;
3245 desc->chip_data = cfg;
3246 3192
3247 free_irte(irq); 3193 free_irte(irq);
3248 spin_lock_irqsave(&vector_lock, flags); 3194 spin_lock_irqsave(&vector_lock, flags);
@@ -3912,7 +3858,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3912 */ 3858 */
3913 if (irq >= NR_IRQS_LEGACY) { 3859 if (irq >= NR_IRQS_LEGACY) {
3914 cfg = desc->chip_data; 3860 cfg = desc->chip_data;
3915 add_pin_to_irq_node(cfg, node, ioapic, pin); 3861 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3862 printk(KERN_INFO "can not add pin %d for irq %d\n",
3863 pin, irq);
3864 return 0;
3865 }
3916 } 3866 }
3917 3867
3918 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); 3868 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -3941,11 +3891,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq,
3941 return __io_apic_set_pci_routing(dev, irq, irq_attr); 3891 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3942} 3892}
3943 3893
3944/* -------------------------------------------------------------------------- 3894u8 __init io_apic_unique_id(u8 id)
3945 ACPI-based IOAPIC Configuration 3895{
3946 -------------------------------------------------------------------------- */ 3896#ifdef CONFIG_X86_32
3897 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3898 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3899 return io_apic_get_unique_id(nr_ioapics, id);
3900 else
3901 return id;
3902#else
3903 int i;
3904 DECLARE_BITMAP(used, 256);
3947 3905
3948#ifdef CONFIG_ACPI 3906 bitmap_zero(used, 256);
3907 for (i = 0; i < nr_ioapics; i++) {
3908 struct mpc_ioapic *ia = &mp_ioapics[i];
3909 __set_bit(ia->apicid, used);
3910 }
3911 if (!test_bit(id, used))
3912 return id;
3913 return find_first_zero_bit(used, 256);
3914#endif
3915}
3949 3916
3950#ifdef CONFIG_X86_32 3917#ifdef CONFIG_X86_32
3951int __init io_apic_get_unique_id(int ioapic, int apic_id) 3918int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4054,8 +4021,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4054 return 0; 4021 return 0;
4055} 4022}
4056 4023
4057#endif /* CONFIG_ACPI */
4058
4059/* 4024/*
4060 * This function currently is only a helper for the i386 smp boot process where 4025 * This function currently is only a helper for the i386 smp boot process where
4061 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4026 * we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -4109,7 +4074,7 @@ void __init setup_ioapic_dest(void)
4109 4074
4110static struct resource *ioapic_resources; 4075static struct resource *ioapic_resources;
4111 4076
4112static struct resource * __init ioapic_setup_resources(void) 4077static struct resource * __init ioapic_setup_resources(int nr_ioapics)
4113{ 4078{
4114 unsigned long n; 4079 unsigned long n;
4115 struct resource *res; 4080 struct resource *res;
@@ -4125,15 +4090,13 @@ static struct resource * __init ioapic_setup_resources(void)
4125 mem = alloc_bootmem(n); 4090 mem = alloc_bootmem(n);
4126 res = (void *)mem; 4091 res = (void *)mem;
4127 4092
4128 if (mem != NULL) { 4093 mem += sizeof(struct resource) * nr_ioapics;
4129 mem += sizeof(struct resource) * nr_ioapics;
4130 4094
4131 for (i = 0; i < nr_ioapics; i++) { 4095 for (i = 0; i < nr_ioapics; i++) {
4132 res[i].name = mem; 4096 res[i].name = mem;
4133 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; 4097 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
4134 sprintf(mem, "IOAPIC %u", i); 4098 sprintf(mem, "IOAPIC %u", i);
4135 mem += IOAPIC_RESOURCE_NAME_SIZE; 4099 mem += IOAPIC_RESOURCE_NAME_SIZE;
4136 }
4137 } 4100 }
4138 4101
4139 ioapic_resources = res; 4102 ioapic_resources = res;
@@ -4147,7 +4110,7 @@ void __init ioapic_init_mappings(void)
4147 struct resource *ioapic_res; 4110 struct resource *ioapic_res;
4148 int i; 4111 int i;
4149 4112
4150 ioapic_res = ioapic_setup_resources(); 4113 ioapic_res = ioapic_setup_resources(nr_ioapics);
4151 for (i = 0; i < nr_ioapics; i++) { 4114 for (i = 0; i < nr_ioapics; i++) {
4152 if (smp_found_config) { 4115 if (smp_found_config) {
4153 ioapic_phys = mp_ioapics[i].apicaddr; 4116 ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4176,11 +4139,9 @@ fake_ioapic_page:
4176 __fix_to_virt(idx), ioapic_phys); 4139 __fix_to_virt(idx), ioapic_phys);
4177 idx++; 4140 idx++;
4178 4141
4179 if (ioapic_res != NULL) { 4142 ioapic_res->start = ioapic_phys;
4180 ioapic_res->start = ioapic_phys; 4143 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
4181 ioapic_res->end = ioapic_phys + (4 * 1024) - 1; 4144 ioapic_res++;
4182 ioapic_res++;
4183 }
4184 } 4145 }
4185} 4146}
4186 4147
@@ -4201,3 +4162,76 @@ void __init ioapic_insert_resources(void)
4201 r++; 4162 r++;
4202 } 4163 }
4203} 4164}
4165
4166int mp_find_ioapic(int gsi)
4167{
4168 int i = 0;
4169
4170 /* Find the IOAPIC that manages this GSI. */
4171 for (i = 0; i < nr_ioapics; i++) {
4172 if ((gsi >= mp_gsi_routing[i].gsi_base)
4173 && (gsi <= mp_gsi_routing[i].gsi_end))
4174 return i;
4175 }
4176
4177 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
4178 return -1;
4179}
4180
4181int mp_find_ioapic_pin(int ioapic, int gsi)
4182{
4183 if (WARN_ON(ioapic == -1))
4184 return -1;
4185 if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
4186 return -1;
4187
4188 return gsi - mp_gsi_routing[ioapic].gsi_base;
4189}
4190
4191static int bad_ioapic(unsigned long address)
4192{
4193 if (nr_ioapics >= MAX_IO_APICS) {
4194 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
4195 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
4196 return 1;
4197 }
4198 if (!address) {
4199 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
4200 " found in table, skipping!\n");
4201 return 1;
4202 }
4203 return 0;
4204}
4205
4206void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4207{
4208 int idx = 0;
4209
4210 if (bad_ioapic(address))
4211 return;
4212
4213 idx = nr_ioapics;
4214
4215 mp_ioapics[idx].type = MP_IOAPIC;
4216 mp_ioapics[idx].flags = MPC_APIC_USABLE;
4217 mp_ioapics[idx].apicaddr = address;
4218
4219 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4220 mp_ioapics[idx].apicid = io_apic_unique_id(id);
4221 mp_ioapics[idx].apicver = io_apic_get_version(idx);
4222
4223 /*
4224 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4225 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4226 */
4227 mp_gsi_routing[idx].gsi_base = gsi_base;
4228 mp_gsi_routing[idx].gsi_end = gsi_base +
4229 io_apic_get_redir_entries(idx);
4230
4231 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4232 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
4233 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
4234 mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
4235
4236 nr_ioapics++;
4237}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 6ef00ba4c886..08385e090a6f 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -153,7 +153,7 @@ int safe_smp_processor_id(void)
153{ 153{
154 int apicid, cpuid; 154 int apicid, cpuid;
155 155
156 if (!boot_cpu_has(X86_FEATURE_APIC)) 156 if (!cpu_has_apic)
157 return 0; 157 return 0;
158 158
159 apicid = hard_smp_processor_id(); 159 apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index fcec2f1d34a1..65edc180fc82 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -55,11 +55,11 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
55void __init default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{ 56{
57#ifdef CONFIG_X86_X2APIC 57#ifdef CONFIG_X86_X2APIC
58 if (x2apic_mode && (apic != &apic_x2apic_phys && 58 if (x2apic_mode
59#ifdef CONFIG_X86_UV 59#ifdef CONFIG_X86_UV
60 apic != &apic_x2apic_uv_x && 60 && apic != &apic_x2apic_uv_x
61#endif 61#endif
62 apic != &apic_x2apic_cluster)) { 62 ) {
63 if (x2apic_phys) 63 if (x2apic_phys)
64 apic = &apic_x2apic_phys; 64 apic = &apic_x2apic_phys;
65 else 65 else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 442b5508893f..151ace69a5aa 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
403static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); 403static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
404static struct apm_user *user_list; 404static struct apm_user *user_list;
405static DEFINE_SPINLOCK(user_list_lock); 405static DEFINE_SPINLOCK(user_list_lock);
406static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; 406
407/*
408 * Set up a segment that references the real mode segment 0x40
409 * that extends up to the end of page zero (that we have reserved).
410 * This is for buggy BIOS's that refer to (real mode) segment 0x40
411 * even though they are called in protected mode.
412 */
413static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
414 (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
407 415
408static const char driver_version[] = "1.16ac"; /* no spaces */ 416static const char driver_version[] = "1.16ac"; /* no spaces */
409 417
@@ -2332,15 +2340,6 @@ static int __init apm_init(void)
2332 pm_flags |= PM_APM; 2340 pm_flags |= PM_APM;
2333 2341
2334 /* 2342 /*
2335 * Set up a segment that references the real mode segment 0x40
2336 * that extends up to the end of page zero (that we have reserved).
2337 * This is for buggy BIOS's that refer to (real mode) segment 0x40
2338 * even though they are called in protected mode.
2339 */
2340 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
2341 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
2342
2343 /*
2344 * Set up the long jump entry point to the APM BIOS, which is called 2343 * Set up the long jump entry point to the APM BIOS, which is called
2345 * from inline assembly. 2344 * from inline assembly.
2346 */ 2345 */
@@ -2358,12 +2357,12 @@ static int __init apm_init(void)
2358 * code to that CPU. 2357 * code to that CPU.
2359 */ 2358 */
2360 gdt = get_cpu_gdt_table(0); 2359 gdt = get_cpu_gdt_table(0);
2361 set_base(gdt[APM_CS >> 3], 2360 set_desc_base(&gdt[APM_CS >> 3],
2362 __va((unsigned long)apm_info.bios.cseg << 4)); 2361 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
2363 set_base(gdt[APM_CS_16 >> 3], 2362 set_desc_base(&gdt[APM_CS_16 >> 3],
2364 __va((unsigned long)apm_info.bios.cseg_16 << 4)); 2363 (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
2365 set_base(gdt[APM_DS >> 3], 2364 set_desc_base(&gdt[APM_DS >> 3],
2366 __va((unsigned long)apm_info.bios.dseg << 4)); 2365 (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
2367 2366
2368 proc_create("apm", 0, NULL, &apm_file_ops); 2367 proc_create("apm", 0, NULL, &apm_file_ops);
2369 2368
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 63fddcd082cd..22a47c82f3c0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -2,7 +2,7 @@
2#include <linux/bitops.h> 2#include <linux/bitops.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4 4
5#include <asm/io.h> 5#include <linux/io.h>
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/apic.h> 7#include <asm/apic.h>
8#include <asm/cpu.h> 8#include <asm/cpu.h>
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
45#define CBAR_ENB (0x80000000) 45#define CBAR_ENB (0x80000000)
46#define CBAR_KEY (0X000000CB) 46#define CBAR_KEY (0X000000CB)
47 if (c->x86_model == 9 || c->x86_model == 10) { 47 if (c->x86_model == 9 || c->x86_model == 10) {
48 if (inl (CBAR) & CBAR_ENB) 48 if (inl(CBAR) & CBAR_ENB)
49 outl (0 | CBAR_KEY, CBAR); 49 outl(0 | CBAR_KEY, CBAR);
50 } 50 }
51} 51}
52 52
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
87 d = d2-d; 87 d = d2-d;
88 88
89 if (d > 20*K6_BUG_LOOP) 89 if (d > 20*K6_BUG_LOOP)
90 printk("system stability may be impaired when more than 32 MB are used.\n"); 90 printk(KERN_CONT
91 "system stability may be impaired when more than 32 MB are used.\n");
91 else 92 else
92 printk("probably OK (after B9730xxxx).\n"); 93 printk(KERN_CONT "probably OK (after B9730xxxx).\n");
93 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); 94 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
94 } 95 }
95 96
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
219 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 220 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
220 rdmsr(MSR_K7_CLK_CTL, l, h); 221 rdmsr(MSR_K7_CLK_CTL, l, h);
221 if ((l & 0xfff00000) != 0x20000000) { 222 if ((l & 0xfff00000) != 0x20000000) {
222 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, 223 printk(KERN_INFO
223 ((l & 0x000fffff)|0x20000000)); 224 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
225 l, ((l & 0x000fffff)|0x20000000));
224 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 226 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
225 } 227 }
226 } 228 }
@@ -251,6 +253,64 @@ static int __cpuinit nearby_node(int apicid)
251#endif 253#endif
252 254
253/* 255/*
256 * Fixup core topology information for AMD multi-node processors.
257 * Assumption 1: Number of cores in each internal node is the same.
258 * Assumption 2: Mixed systems with both single-node and dual-node
259 * processors are not supported.
260 */
261#ifdef CONFIG_X86_HT
262static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
263{
264#ifdef CONFIG_PCI
265 u32 t, cpn;
266 u8 n, n_id;
267 int cpu = smp_processor_id();
268
269 /* fixup topology information only once for a core */
270 if (cpu_has(c, X86_FEATURE_AMD_DCM))
271 return;
272
273 /* check for multi-node processor on boot cpu */
274 t = read_pci_config(0, 24, 3, 0xe8);
275 if (!(t & (1 << 29)))
276 return;
277
278 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
279
280 /* cores per node: each internal node has half the number of cores */
281 cpn = c->x86_max_cores >> 1;
282
283 /* even-numbered NB_id of this dual-node processor */
284 n = c->phys_proc_id << 1;
285
286 /*
287 * determine internal node id and assign cores fifty-fifty to
288 * each node of the dual-node processor
289 */
290 t = read_pci_config(0, 24 + n, 3, 0xe8);
291 n = (t>>30) & 0x3;
292 if (n == 0) {
293 if (c->cpu_core_id < cpn)
294 n_id = 0;
295 else
296 n_id = 1;
297 } else {
298 if (c->cpu_core_id < cpn)
299 n_id = 1;
300 else
301 n_id = 0;
302 }
303
304 /* compute entire NodeID, use llc_shared_map to store sibling info */
305 per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
306
307 /* fixup core id to be in range from 0 to cpn */
308 c->cpu_core_id = c->cpu_core_id % cpn;
309#endif
310}
311#endif
312
313/*
254 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 314 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
255 * Assumes number of cores is a power of two. 315 * Assumes number of cores is a power of two.
256 */ 316 */
@@ -267,6 +327,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
267 c->phys_proc_id = c->initial_apicid >> bits; 327 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */ 328 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 329 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
330 /* fixup topology information on multi-node processors */
331 if ((c->x86 == 0x10) && (c->x86_model == 9))
332 amd_fixup_dcm(c);
270#endif 333#endif
271} 334}
272 335
@@ -275,9 +338,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
275#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 338#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
276 int cpu = smp_processor_id(); 339 int cpu = smp_processor_id();
277 int node; 340 int node;
278 unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; 341 unsigned apicid = c->apicid;
342
343 node = per_cpu(cpu_llc_id, cpu);
279 344
280 node = c->phys_proc_id;
281 if (apicid_to_node[apicid] != NUMA_NO_NODE) 345 if (apicid_to_node[apicid] != NUMA_NO_NODE)
282 node = apicid_to_node[apicid]; 346 node = apicid_to_node[apicid];
283 if (!node_online(node)) { 347 if (!node_online(node)) {
@@ -398,18 +462,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
398 u32 level; 462 u32 level;
399 463
400 level = cpuid_eax(1); 464 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 465 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 466 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403 467
404 /* 468 /*
405 * Some BIOSes incorrectly force this feature, but only K8 469 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it. 470 * revision D (model = 0x14) and later actually support it.
471 * (AMD Erratum #110, docId: 25759).
407 */ 472 */
408 if (c->x86_model < 0x14) 473 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
474 u64 val;
475
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 476 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
477 if (!rdmsrl_amd_safe(0xc001100d, &val)) {
478 val &= ~(1ULL << 32);
479 wrmsrl_amd_safe(0xc001100d, val);
480 }
481 }
482
410 } 483 }
411 if (c->x86 == 0x10 || c->x86 == 0x11) 484 if (c->x86 == 0x10 || c->x86 == 0x11)
412 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 485 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
486
487 /* get apicid instead of initial apic id from cpuid */
488 c->apicid = hard_smp_processor_id();
413#else 489#else
414 490
415 /* 491 /*
@@ -494,27 +570,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
494 * benefit in doing so. 570 * benefit in doing so.
495 */ 571 */
496 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 572 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
497 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 573 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
498 if ((tseg>>PMD_SHIFT) < 574 if ((tseg>>PMD_SHIFT) <
499 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || 575 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
500 ((tseg>>PMD_SHIFT) < 576 ((tseg>>PMD_SHIFT) <
501 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && 577 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
502 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) 578 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
503 set_memory_4k((unsigned long)__va(tseg), 1); 579 set_memory_4k((unsigned long)__va(tseg), 1);
504 } 580 }
505 } 581 }
506#endif 582#endif
507} 583}
508 584
509#ifdef CONFIG_X86_32 585#ifdef CONFIG_X86_32
510static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 586static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
587 unsigned int size)
511{ 588{
512 /* AMD errata T13 (order #21922) */ 589 /* AMD errata T13 (order #21922) */
513 if ((c->x86 == 6)) { 590 if ((c->x86 == 6)) {
514 if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ 591 /* Duron Rev A0 */
592 if (c->x86_model == 3 && c->x86_mask == 0)
515 size = 64; 593 size = 64;
594 /* Tbird rev A1/A2 */
516 if (c->x86_model == 4 && 595 if (c->x86_model == 4 &&
517 (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ 596 (c->x86_mask == 0 || c->x86_mask == 1))
518 size = 256; 597 size = 256;
519 } 598 }
520 return size; 599 return size;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c8e315f1aa83..01a265212395 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -81,7 +81,7 @@ static void __init check_fpu(void)
81 81
82 boot_cpu_data.fdiv_bug = fdiv_bug; 82 boot_cpu_data.fdiv_bug = fdiv_bug;
83 if (boot_cpu_data.fdiv_bug) 83 if (boot_cpu_data.fdiv_bug)
84 printk("Hmm, FPU with FDIV bug.\n"); 84 printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
85} 85}
86 86
87static void __init check_hlt(void) 87static void __init check_hlt(void)
@@ -98,7 +98,7 @@ static void __init check_hlt(void)
98 halt(); 98 halt();
99 halt(); 99 halt();
100 halt(); 100 halt();
101 printk("OK.\n"); 101 printk(KERN_CONT "OK.\n");
102} 102}
103 103
104/* 104/*
@@ -122,9 +122,9 @@ static void __init check_popad(void)
122 * CPU hard. Too bad. 122 * CPU hard. Too bad.
123 */ 123 */
124 if (res != 12345678) 124 if (res != 12345678)
125 printk("Buggy.\n"); 125 printk(KERN_CONT "Buggy.\n");
126 else 126 else
127 printk("OK.\n"); 127 printk(KERN_CONT "OK.\n");
128#endif 128#endif
129} 129}
130 130
@@ -156,7 +156,7 @@ void __init check_bugs(void)
156{ 156{
157 identify_boot_cpu(); 157 identify_boot_cpu();
158#ifndef CONFIG_SMP 158#ifndef CONFIG_SMP
159 printk("CPU: "); 159 printk(KERN_INFO "CPU: ");
160 print_cpu_info(&boot_cpu_data); 160 print_cpu_info(&boot_cpu_data);
161#endif 161#endif
162 check_config(); 162 check_config();
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4e..04f0fe5af83e 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
15{ 15{
16 identify_boot_cpu(); 16 identify_boot_cpu();
17#if !defined(CONFIG_SMP) 17#if !defined(CONFIG_SMP)
18 printk("CPU: "); 18 printk(KERN_INFO "CPU: ");
19 print_cpu_info(&boot_cpu_data); 19 print_cpu_info(&boot_cpu_data);
20#endif 20#endif
21 alternative_instructions(); 21 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ce60a88027b..2055fc2b2e6b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,8 +18,8 @@
18#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
19#include <asm/processor.h> 19#include <asm/processor.h>
20#include <asm/sections.h> 20#include <asm/sections.h>
21#include <asm/topology.h> 21#include <linux/topology.h>
22#include <asm/cpumask.h> 22#include <linux/cpumask.h>
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/atomic.h> 24#include <asm/atomic.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
@@ -28,13 +28,13 @@
28#include <asm/desc.h> 28#include <asm/desc.h>
29#include <asm/i387.h> 29#include <asm/i387.h>
30#include <asm/mtrr.h> 30#include <asm/mtrr.h>
31#include <asm/numa.h> 31#include <linux/numa.h>
32#include <asm/asm.h> 32#include <asm/asm.h>
33#include <asm/cpu.h> 33#include <asm/cpu.h>
34#include <asm/mce.h> 34#include <asm/mce.h>
35#include <asm/msr.h> 35#include <asm/msr.h>
36#include <asm/pat.h> 36#include <asm/pat.h>
37#include <asm/smp.h> 37#include <linux/smp.h>
38 38
39#ifdef CONFIG_X86_LOCAL_APIC 39#ifdef CONFIG_X86_LOCAL_APIC
40#include <asm/uv/uv.h> 40#include <asm/uv/uv.h>
@@ -94,45 +94,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
94 * TLS descriptors are currently at a different place compared to i386. 94 * TLS descriptors are currently at a different place compared to i386.
95 * Hopefully nobody expects them at a fixed place (Wine?) 95 * Hopefully nobody expects them at a fixed place (Wine?)
96 */ 96 */
97 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 97 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
98 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 98 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
99 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 99 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
100 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 100 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
101 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 101 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
102 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 102 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
103#else 103#else
104 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 104 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
105 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 105 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
106 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 106 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
107 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, 107 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
108 /* 108 /*
109 * Segments used for calling PnP BIOS have byte granularity. 109 * Segments used for calling PnP BIOS have byte granularity.
110 * They code segments and data segments have fixed 64k limits, 110 * They code segments and data segments have fixed 64k limits,
111 * the transfer segment sizes are set at run time. 111 * the transfer segment sizes are set at run time.
112 */ 112 */
113 /* 32-bit code */ 113 /* 32-bit code */
114 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, 114 [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
115 /* 16-bit code */ 115 /* 16-bit code */
116 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, 116 [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
117 /* 16-bit data */ 117 /* 16-bit data */
118 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, 118 [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
119 /* 16-bit data */ 119 /* 16-bit data */
120 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, 120 [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
121 /* 16-bit data */ 121 /* 16-bit data */
122 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, 122 [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
123 /* 123 /*
124 * The APM segments have byte granularity and their bases 124 * The APM segments have byte granularity and their bases
125 * are set at run time. All have 64k limits. 125 * are set at run time. All have 64k limits.
126 */ 126 */
127 /* 32-bit code */ 127 /* 32-bit code */
128 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, 128 [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
129 /* 16-bit code */ 129 /* 16-bit code */
130 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, 130 [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
131 /* data */ 131 /* data */
132 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 132 [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
133 133
134 [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, 134 [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
135 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 135 [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
136 GDT_STACK_CANARY_INIT 136 GDT_STACK_CANARY_INIT
137#endif 137#endif
138} }; 138} };
@@ -982,18 +982,26 @@ static __init int setup_disablecpuid(char *arg)
982__setup("clearcpuid=", setup_disablecpuid); 982__setup("clearcpuid=", setup_disablecpuid);
983 983
984#ifdef CONFIG_X86_64 984#ifdef CONFIG_X86_64
985struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 985struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
986 986
987DEFINE_PER_CPU_FIRST(union irq_stack_union, 987DEFINE_PER_CPU_FIRST(union irq_stack_union,
988 irq_stack_union) __aligned(PAGE_SIZE); 988 irq_stack_union) __aligned(PAGE_SIZE);
989 989
990DEFINE_PER_CPU(char *, irq_stack_ptr) = 990/*
991 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 991 * The following four percpu variables are hot. Align current_task to
992 * cacheline size such that all four fall in the same cacheline.
993 */
994DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
995 &init_task;
996EXPORT_PER_CPU_SYMBOL(current_task);
992 997
993DEFINE_PER_CPU(unsigned long, kernel_stack) = 998DEFINE_PER_CPU(unsigned long, kernel_stack) =
994 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 999 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
995EXPORT_PER_CPU_SYMBOL(kernel_stack); 1000EXPORT_PER_CPU_SYMBOL(kernel_stack);
996 1001
1002DEFINE_PER_CPU(char *, irq_stack_ptr) =
1003 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
1004
997DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1005DEFINE_PER_CPU(unsigned int, irq_count) = -1;
998 1006
999/* 1007/*
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1008}; 1016};
1009 1017
1010static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 1018static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1011 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 1019 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1012 __aligned(PAGE_SIZE);
1013 1020
1014/* May not be marked __init: used by software suspend */ 1021/* May not be marked __init: used by software suspend */
1015void syscall_init(void) 1022void syscall_init(void)
@@ -1042,8 +1049,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
1042 1049
1043#else /* CONFIG_X86_64 */ 1050#else /* CONFIG_X86_64 */
1044 1051
1052DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1053EXPORT_PER_CPU_SYMBOL(current_task);
1054
1045#ifdef CONFIG_CC_STACKPROTECTOR 1055#ifdef CONFIG_CC_STACKPROTECTOR
1046DEFINE_PER_CPU(unsigned long, stack_canary); 1056DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
1047#endif 1057#endif
1048 1058
1049/* Make sure %fs and %gs are initialized properly in idle threads */ 1059/* Make sure %fs and %gs are initialized properly in idle threads */
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 593171e967ef..19807b89f058 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -3,10 +3,10 @@
3#include <linux/delay.h> 3#include <linux/delay.h>
4#include <linux/pci.h> 4#include <linux/pci.h>
5#include <asm/dma.h> 5#include <asm/dma.h>
6#include <asm/io.h> 6#include <linux/io.h>
7#include <asm/processor-cyrix.h> 7#include <asm/processor-cyrix.h>
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9#include <asm/timer.h> 9#include <linux/timer.h>
10#include <asm/pci-direct.h> 10#include <asm/pci-direct.h>
11#include <asm/tsc.h> 11#include <asm/tsc.h>
12 12
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
282 * The 5510/5520 companion chips have a funky PIT. 282 * The 5510/5520 companion chips have a funky PIT.
283 */ 283 */
284 if (vendor == PCI_VENDOR_ID_CYRIX && 284 if (vendor == PCI_VENDOR_ID_CYRIX &&
285 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) 285 (device == PCI_DEVICE_ID_CYRIX_5510 ||
286 device == PCI_DEVICE_ID_CYRIX_5520))
286 mark_tsc_unstable("cyrix 5510/5520 detected"); 287 mark_tsc_unstable("cyrix 5510/5520 detected");
287 } 288 }
288#endif 289#endif
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
299 * ? : 0x7x 300 * ? : 0x7x
300 * GX1 : 0x8x GX1 datasheet 56 301 * GX1 : 0x8x GX1 datasheet 56
301 */ 302 */
302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) 303 if ((0x30 <= dir1 && dir1 <= 0x6f) ||
304 (0x80 <= dir1 && dir1 <= 0x8f))
303 geode_configure(); 305 geode_configure();
304 return; 306 return;
305 } else { /* MediaGX */ 307 } else { /* MediaGX */
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
427 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); 429 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
428 local_irq_save(flags); 430 local_irq_save(flags);
429 ccr3 = getCx86(CX86_CCR3); 431 ccr3 = getCx86(CX86_CCR3);
430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 432 /* enable MAPEN */
431 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ 433 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 434 /* enable cpuid */
435 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
436 /* disable MAPEN */
437 setCx86(CX86_CCR3, ccr3);
433 local_irq_restore(flags); 438 local_irq_restore(flags);
434 } 439 }
435 } 440 }
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index fb5b86af0b01..93ba8eeb100a 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -28,11 +28,10 @@
28static inline void __cpuinit 28static inline void __cpuinit
29detect_hypervisor_vendor(struct cpuinfo_x86 *c) 29detect_hypervisor_vendor(struct cpuinfo_x86 *c)
30{ 30{
31 if (vmware_platform()) { 31 if (vmware_platform())
32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; 32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
33 } else { 33 else
34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; 34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
35 }
36} 35}
37 36
38unsigned long get_hypervisor_tsc_freq(void) 37unsigned long get_hypervisor_tsc_freq(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3260ab044996..80a722a071b5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,17 +7,17 @@
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/thread_info.h> 8#include <linux/thread_info.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/uaccess.h>
10 11
11#include <asm/processor.h> 12#include <asm/processor.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14#include <asm/uaccess.h>
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h> 16#include <asm/bugs.h>
17#include <asm/cpu.h> 17#include <asm/cpu.h>
18 18
19#ifdef CONFIG_X86_64 19#ifdef CONFIG_X86_64
20#include <asm/topology.h> 20#include <linux/topology.h>
21#include <asm/numa_64.h> 21#include <asm/numa_64.h>
22#endif 22#endif
23 23
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
174#ifdef CONFIG_X86_F00F_BUG 174#ifdef CONFIG_X86_F00F_BUG
175 /* 175 /*
176 * All current models of Pentium and Pentium with MMX technology CPUs 176 * All current models of Pentium and Pentium with MMX technology CPUs
177 * have the F0 0F bug, which lets nonprivileged users lock up the system. 177 * have the F0 0F bug, which lets nonprivileged users lock up the
178 * system.
178 * Note that the workaround only should be initialized once... 179 * Note that the workaround only should be initialized once...
179 */ 180 */
180 c->f00f_bug = 0; 181 c->f00f_bug = 0;
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
207 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 208 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
208 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 209 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
209 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; 210 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
210 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 211 wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
211 } 212 }
212 } 213 }
213 214
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
283 /* Intel has a non-standard dependency on %ecx for this CPUID level. */ 284 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
284 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); 285 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
285 if (eax & 0x1f) 286 if (eax & 0x1f)
286 return ((eax >> 26) + 1); 287 return (eax >> 26) + 1;
287 else 288 else
288 return 1; 289 return 1;
289} 290}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 789efe217e1a..804c40e2bc3e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
9 9
@@ -16,7 +16,7 @@
16#include <linux/pci.h> 16#include <linux/pci.h>
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21 21
22#define LVL_1_INST 1 22#define LVL_1_INST 1
@@ -25,14 +25,15 @@
25#define LVL_3 4 25#define LVL_3 4
26#define LVL_TRACE 5 26#define LVL_TRACE 5
27 27
28struct _cache_table 28struct _cache_table {
29{
30 unsigned char descriptor; 29 unsigned char descriptor;
31 char cache_type; 30 char cache_type;
32 short size; 31 short size;
33}; 32};
34 33
35/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 34/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */
36
36static const struct _cache_table __cpuinitconst cache_table[] = 37static const struct _cache_table __cpuinitconst cache_table[] =
37{ 38{
38 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 39 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
105}; 106};
106 107
107 108
108enum _cache_type 109enum _cache_type {
109{
110 CACHE_TYPE_NULL = 0, 110 CACHE_TYPE_NULL = 0,
111 CACHE_TYPE_DATA = 1, 111 CACHE_TYPE_DATA = 1,
112 CACHE_TYPE_INST = 2, 112 CACHE_TYPE_INST = 2,
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves;
170 Maybe later */ 170 Maybe later */
171union l1_cache { 171union l1_cache {
172 struct { 172 struct {
173 unsigned line_size : 8; 173 unsigned line_size:8;
174 unsigned lines_per_tag : 8; 174 unsigned lines_per_tag:8;
175 unsigned assoc : 8; 175 unsigned assoc:8;
176 unsigned size_in_kb : 8; 176 unsigned size_in_kb:8;
177 }; 177 };
178 unsigned val; 178 unsigned val;
179}; 179};
180 180
181union l2_cache { 181union l2_cache {
182 struct { 182 struct {
183 unsigned line_size : 8; 183 unsigned line_size:8;
184 unsigned lines_per_tag : 4; 184 unsigned lines_per_tag:4;
185 unsigned assoc : 4; 185 unsigned assoc:4;
186 unsigned size_in_kb : 16; 186 unsigned size_in_kb:16;
187 }; 187 };
188 unsigned val; 188 unsigned val;
189}; 189};
190 190
191union l3_cache { 191union l3_cache {
192 struct { 192 struct {
193 unsigned line_size : 8; 193 unsigned line_size:8;
194 unsigned lines_per_tag : 4; 194 unsigned lines_per_tag:4;
195 unsigned assoc : 4; 195 unsigned assoc:4;
196 unsigned res : 2; 196 unsigned res:2;
197 unsigned size_encoded : 14; 197 unsigned size_encoded:14;
198 }; 198 };
199 unsigned val; 199 unsigned val;
200}; 200};
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
241 case 0: 241 case 0:
242 if (!l1->val) 242 if (!l1->val)
243 return; 243 return;
244 assoc = l1->assoc; 244 assoc = assocs[l1->assoc];
245 line_size = l1->line_size; 245 line_size = l1->line_size;
246 lines_per_tag = l1->lines_per_tag; 246 lines_per_tag = l1->lines_per_tag;
247 size_in_kb = l1->size_in_kb; 247 size_in_kb = l1->size_in_kb;
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
249 case 2: 249 case 2:
250 if (!l2.val) 250 if (!l2.val)
251 return; 251 return;
252 assoc = l2.assoc; 252 assoc = assocs[l2.assoc];
253 line_size = l2.line_size; 253 line_size = l2.line_size;
254 lines_per_tag = l2.lines_per_tag; 254 lines_per_tag = l2.lines_per_tag;
255 /* cpu_data has errata corrections for K7 applied */ 255 /* cpu_data has errata corrections for K7 applied */
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
258 case 3: 258 case 3:
259 if (!l3.val) 259 if (!l3.val)
260 return; 260 return;
261 assoc = l3.assoc; 261 assoc = assocs[l3.assoc];
262 line_size = l3.line_size; 262 line_size = l3.line_size;
263 lines_per_tag = l3.lines_per_tag; 263 lines_per_tag = l3.lines_per_tag;
264 size_in_kb = l3.size_encoded * 512; 264 size_in_kb = l3.size_encoded * 512;
265 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
266 size_in_kb = size_in_kb >> 1;
267 assoc = assoc >> 1;
268 }
265 break; 269 break;
266 default: 270 default:
267 return; 271 return;
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
270 eax->split.is_self_initializing = 1; 274 eax->split.is_self_initializing = 1;
271 eax->split.type = types[leaf]; 275 eax->split.type = types[leaf];
272 eax->split.level = levels[leaf]; 276 eax->split.level = levels[leaf];
273 if (leaf == 3) 277 eax->split.num_threads_sharing = 0;
274 eax->split.num_threads_sharing =
275 current_cpu_data.x86_max_cores - 1;
276 else
277 eax->split.num_threads_sharing = 0;
278 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 278 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
279 279
280 280
281 if (assoc == 0xf) 281 if (assoc == 0xffff)
282 eax->split.is_fully_associative = 1; 282 eax->split.is_fully_associative = 1;
283 ebx->split.coherency_line_size = line_size - 1; 283 ebx->split.coherency_line_size = line_size - 1;
284 ebx->split.ways_of_associativity = assocs[assoc] - 1; 284 ebx->split.ways_of_associativity = assoc - 1;
285 ebx->split.physical_line_partition = lines_per_tag - 1; 285 ebx->split.physical_line_partition = lines_per_tag - 1;
286 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 286 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
287 (ebx->split.ways_of_associativity + 1) - 1; 287 (ebx->split.ways_of_associativity + 1) - 1;
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void)
350 350
351unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 351unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
352{ 352{
353 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 353 /* Cache sizes */
354 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
354 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 355 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
355 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 356 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
356 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 357 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
377 378
378 retval = cpuid4_cache_lookup_regs(i, &this_leaf); 379 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
379 if (retval >= 0) { 380 if (retval >= 0) {
380 switch(this_leaf.eax.split.level) { 381 switch (this_leaf.eax.split.level) {
381 case 1: 382 case 1:
382 if (this_leaf.eax.split.type == 383 if (this_leaf.eax.split.type ==
383 CACHE_TYPE_DATA) 384 CACHE_TYPE_DATA)
384 new_l1d = this_leaf.size/1024; 385 new_l1d = this_leaf.size/1024;
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
386 CACHE_TYPE_INST) 387 CACHE_TYPE_INST)
387 new_l1i = this_leaf.size/1024; 388 new_l1i = this_leaf.size/1024;
388 break; 389 break;
389 case 2: 390 case 2:
390 new_l2 = this_leaf.size/1024; 391 new_l2 = this_leaf.size/1024;
391 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 392 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
392 index_msb = get_count_order(num_threads_sharing); 393 index_msb = get_count_order(num_threads_sharing);
393 l2_id = c->apicid >> index_msb; 394 l2_id = c->apicid >> index_msb;
394 break; 395 break;
395 case 3: 396 case 3:
396 new_l3 = this_leaf.size/1024; 397 new_l3 = this_leaf.size/1024;
397 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 398 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
398 index_msb = get_count_order(num_threads_sharing); 399 index_msb = get_count_order(
400 num_threads_sharing);
399 l3_id = c->apicid >> index_msb; 401 l3_id = c->apicid >> index_msb;
400 break; 402 break;
401 default: 403 default:
402 break; 404 break;
403 } 405 }
404 } 406 }
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
421 /* Number of times to iterate */ 423 /* Number of times to iterate */
422 n = cpuid_eax(2) & 0xFF; 424 n = cpuid_eax(2) & 0xFF;
423 425
424 for ( i = 0 ; i < n ; i++ ) { 426 for (i = 0 ; i < n ; i++) {
425 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]); 427 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
426 428
427 /* If bit 31 is set, this is an unknown format */ 429 /* If bit 31 is set, this is an unknown format */
428 for ( j = 0 ; j < 3 ; j++ ) { 430 for (j = 0 ; j < 3 ; j++)
429 if (regs[j] & (1 << 31)) regs[j] = 0; 431 if (regs[j] & (1 << 31))
430 } 432 regs[j] = 0;
431 433
432 /* Byte 0 is level count, not a descriptor */ 434 /* Byte 0 is level count, not a descriptor */
433 for ( j = 1 ; j < 16 ; j++ ) { 435 for (j = 1 ; j < 16 ; j++) {
434 unsigned char des = dp[j]; 436 unsigned char des = dp[j];
435 unsigned char k = 0; 437 unsigned char k = 0;
436 438
437 /* look up this descriptor in the table */ 439 /* look up this descriptor in the table */
438 while (cache_table[k].descriptor != 0) 440 while (cache_table[k].descriptor != 0) {
439 {
440 if (cache_table[k].descriptor == des) { 441 if (cache_table[k].descriptor == des) {
441 if (only_trace && cache_table[k].cache_type != LVL_TRACE) 442 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
442 break; 443 break;
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
488 } 489 }
489 490
490 if (trace) 491 if (trace)
491 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); 492 printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
492 else if ( l1i ) 493 else if (l1i)
493 printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); 494 printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
494 495
495 if (l1d) 496 if (l1d)
496 printk(", L1 D cache: %dK\n", l1d); 497 printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
497 else 498 else
498 printk("\n"); 499 printk(KERN_CONT "\n");
499 500
500 if (l2) 501 if (l2)
501 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); 502 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
@@ -522,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
522 int index_msb, i; 523 int index_msb, i;
523 struct cpuinfo_x86 *c = &cpu_data(cpu); 524 struct cpuinfo_x86 *c = &cpu_data(cpu);
524 525
526 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
527 struct cpuinfo_x86 *d;
528 for_each_online_cpu(i) {
529 if (!per_cpu(cpuid4_info, i))
530 continue;
531 d = &cpu_data(i);
532 this_leaf = CPUID4_INFO_IDX(i, index);
533 cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
534 d->llc_shared_map);
535 }
536 return;
537 }
525 this_leaf = CPUID4_INFO_IDX(cpu, index); 538 this_leaf = CPUID4_INFO_IDX(cpu, index);
526 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 539 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
527 540
@@ -558,8 +571,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
558 } 571 }
559} 572}
560#else 573#else
561static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 574static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
562static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} 575{
576}
577
578static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
579{
580}
563#endif 581#endif
564 582
565static void __cpuinit free_cache_attributes(unsigned int cpu) 583static void __cpuinit free_cache_attributes(unsigned int cpu)
@@ -645,7 +663,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
645static ssize_t show_##file_name \ 663static ssize_t show_##file_name \
646 (struct _cpuid4_info *this_leaf, char *buf) \ 664 (struct _cpuid4_info *this_leaf, char *buf) \
647{ \ 665{ \
648 return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 666 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
649} 667}
650 668
651show_one_plus(level, eax.split.level, 0); 669show_one_plus(level, eax.split.level, 0);
@@ -656,7 +674,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
656 674
657static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 675static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
658{ 676{
659 return sprintf (buf, "%luK\n", this_leaf->size / 1024); 677 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
660} 678}
661 679
662static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, 680static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -669,7 +687,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
669 const struct cpumask *mask; 687 const struct cpumask *mask;
670 688
671 mask = to_cpumask(this_leaf->shared_cpu_map); 689 mask = to_cpumask(this_leaf->shared_cpu_map);
672 n = type? 690 n = type ?
673 cpulist_scnprintf(buf, len-2, mask) : 691 cpulist_scnprintf(buf, len-2, mask) :
674 cpumask_scnprintf(buf, len-2, mask); 692 cpumask_scnprintf(buf, len-2, mask);
675 buf[n++] = '\n'; 693 buf[n++] = '\n';
@@ -800,7 +818,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
800static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 818static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
801 show_cache_disable_1, store_cache_disable_1); 819 show_cache_disable_1, store_cache_disable_1);
802 820
803static struct attribute * default_attrs[] = { 821static struct attribute *default_attrs[] = {
804 &type.attr, 822 &type.attr,
805 &level.attr, 823 &level.attr,
806 &coherency_line_size.attr, 824 &coherency_line_size.attr,
@@ -815,7 +833,7 @@ static struct attribute * default_attrs[] = {
815 NULL 833 NULL
816}; 834};
817 835
818static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 836static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
819{ 837{
820 struct _cache_attr *fattr = to_attr(attr); 838 struct _cache_attr *fattr = to_attr(attr);
821 struct _index_kobject *this_leaf = to_object(kobj); 839 struct _index_kobject *this_leaf = to_object(kobj);
@@ -828,8 +846,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
828 return ret; 846 return ret;
829} 847}
830 848
831static ssize_t store(struct kobject * kobj, struct attribute * attr, 849static ssize_t store(struct kobject *kobj, struct attribute *attr,
832 const char * buf, size_t count) 850 const char *buf, size_t count)
833{ 851{
834 struct _cache_attr *fattr = to_attr(attr); 852 struct _cache_attr *fattr = to_attr(attr);
835 struct _index_kobject *this_leaf = to_object(kobj); 853 struct _index_kobject *this_leaf = to_object(kobj);
@@ -883,7 +901,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
883 goto err_out; 901 goto err_out;
884 902
885 per_cpu(index_kobject, cpu) = kzalloc( 903 per_cpu(index_kobject, cpu) = kzalloc(
886 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); 904 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
887 if (unlikely(per_cpu(index_kobject, cpu) == NULL)) 905 if (unlikely(per_cpu(index_kobject, cpu) == NULL))
888 goto err_out; 906 goto err_out;
889 907
@@ -917,7 +935,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
917 } 935 }
918 936
919 for (i = 0; i < num_cache_leaves; i++) { 937 for (i = 0; i < num_cache_leaves; i++) {
920 this_object = INDEX_KOBJECT_PTR(cpu,i); 938 this_object = INDEX_KOBJECT_PTR(cpu, i);
921 this_object->cpu = cpu; 939 this_object->cpu = cpu;
922 this_object->index = i; 940 this_object->index = i;
923 retval = kobject_init_and_add(&(this_object->kobj), 941 retval = kobject_init_and_add(&(this_object->kobj),
@@ -925,9 +943,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
925 per_cpu(cache_kobject, cpu), 943 per_cpu(cache_kobject, cpu),
926 "index%1lu", i); 944 "index%1lu", i);
927 if (unlikely(retval)) { 945 if (unlikely(retval)) {
928 for (j = 0; j < i; j++) { 946 for (j = 0; j < i; j++)
929 kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); 947 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
930 }
931 kobject_put(per_cpu(cache_kobject, cpu)); 948 kobject_put(per_cpu(cache_kobject, cpu));
932 cpuid4_cache_sysfs_exit(cpu); 949 cpuid4_cache_sysfs_exit(cpu);
933 return retval; 950 return retval;
@@ -952,7 +969,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
952 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); 969 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
953 970
954 for (i = 0; i < num_cache_leaves; i++) 971 for (i = 0; i < num_cache_leaves; i++)
955 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 972 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
956 kobject_put(per_cpu(cache_kobject, cpu)); 973 kobject_put(per_cpu(cache_kobject, cpu));
957 cpuid4_cache_sysfs_exit(cpu); 974 cpuid4_cache_sysfs_exit(cpu);
958} 975}
@@ -977,8 +994,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
977 return NOTIFY_OK; 994 return NOTIFY_OK;
978} 995}
979 996
980static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = 997static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
981{
982 .notifier_call = cacheinfo_cpu_callback, 998 .notifier_call = cacheinfo_cpu_callback,
983}; 999};
984 1000
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..1fecba404fd8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
489 int i, err = 0; 489 int i, err = 0;
490 struct threshold_bank *b = NULL; 490 struct threshold_bank *b = NULL;
491 char name[32]; 491 char name[32];
492 struct cpuinfo_x86 *c = &cpu_data(cpu);
493
492 494
493 sprintf(name, "threshold_bank%i", bank); 495 sprintf(name, "threshold_bank%i", bank);
494 496
495#ifdef CONFIG_SMP 497#ifdef CONFIG_SMP
496 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 498 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
497 i = cpumask_first(cpu_core_mask(cpu)); 499 i = cpumask_first(c->llc_shared_map);
498 500
499 /* first core not up yet */ 501 /* first core not up yet */
500 if (cpu_data(i).cpu_core_id) 502 if (cpu_data(i).cpu_core_id)
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
514 if (err) 516 if (err)
515 goto out; 517 goto out;
516 518
517 cpumask_copy(b->cpus, cpu_core_mask(cpu)); 519 cpumask_copy(b->cpus, c->llc_shared_map);
518 per_cpu(threshold_banks, cpu)[bank] = b; 520 per_cpu(threshold_banks, cpu)[bank] = b;
519 521
520 goto out; 522 goto out;
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
539#ifndef CONFIG_SMP 541#ifndef CONFIG_SMP
540 cpumask_setall(b->cpus); 542 cpumask_setall(b->cpus);
541#else 543#else
542 cpumask_copy(b->cpus, cpu_core_mask(cpu)); 544 cpumask_copy(b->cpus, c->llc_shared_map);
543#endif 545#endif
544 546
545 per_cpu(threshold_banks, cpu)[bank] = b; 547 per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index ee2331b0e58f..33af14110dfd 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -7,15 +7,15 @@
7 7
8static void 8static void
9amd_get_mtrr(unsigned int reg, unsigned long *base, 9amd_get_mtrr(unsigned int reg, unsigned long *base,
10 unsigned long *size, mtrr_type * type) 10 unsigned long *size, mtrr_type *type)
11{ 11{
12 unsigned long low, high; 12 unsigned long low, high;
13 13
14 rdmsr(MSR_K6_UWCCR, low, high); 14 rdmsr(MSR_K6_UWCCR, low, high);
15 /* Upper dword is region 1, lower is region 0 */ 15 /* Upper dword is region 1, lower is region 0 */
16 if (reg == 1) 16 if (reg == 1)
17 low = high; 17 low = high;
18 /* The base masks off on the right alignment */ 18 /* The base masks off on the right alignment */
19 *base = (low & 0xFFFE0000) >> PAGE_SHIFT; 19 *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
20 *type = 0; 20 *type = 0;
21 if (low & 1) 21 if (low & 1)
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base,
27 return; 27 return;
28 } 28 }
29 /* 29 /*
30 * This needs a little explaining. The size is stored as an 30 * This needs a little explaining. The size is stored as an
31 * inverted mask of bits of 128K granularity 15 bits long offset 31 * inverted mask of bits of 128K granularity 15 bits long offset
32 * 2 bits 32 * 2 bits.
33 * 33 *
34 * So to get a size we do invert the mask and add 1 to the lowest 34 * So to get a size we do invert the mask and add 1 to the lowest
35 * mask bit (4 as its 2 bits in). This gives us a size we then shift 35 * mask bit (4 as its 2 bits in). This gives us a size we then shift
36 * to turn into 128K blocks 36 * to turn into 128K blocks.
37 * 37 *
38 * eg 111 1111 1111 1100 is 512K 38 * eg 111 1111 1111 1100 is 512K
39 * 39 *
40 * invert 000 0000 0000 0011 40 * invert 000 0000 0000 0011
41 * +1 000 0000 0000 0100 41 * +1 000 0000 0000 0100
42 * *128K ... 42 * *128K ...
43 */ 43 */
44 low = (~low) & 0x1FFFC; 44 low = (~low) & 0x1FFFC;
45 *size = (low + 4) << (15 - PAGE_SHIFT); 45 *size = (low + 4) << (15 - PAGE_SHIFT);
46 return;
47} 46}
48 47
49static void amd_set_mtrr(unsigned int reg, unsigned long base, 48/**
50 unsigned long size, mtrr_type type) 49 * amd_set_mtrr - Set variable MTRR register on the local CPU.
51/* [SUMMARY] Set variable MTRR register on the local CPU. 50 *
52 <reg> The register to set. 51 * @reg The register to set.
53 <base> The base address of the region. 52 * @base The base address of the region.
54 <size> The size of the region. If this is 0 the region is disabled. 53 * @size The size of the region. If this is 0 the region is disabled.
55 <type> The type of the region. 54 * @type The type of the region.
56 [RETURNS] Nothing. 55 *
57*/ 56 * Returns nothing.
57 */
58static void
59amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
58{ 60{
59 u32 regs[2]; 61 u32 regs[2];
60 62
61 /* 63 /*
62 * Low is MTRR0 , High MTRR 1 64 * Low is MTRR0, High MTRR 1
63 */ 65 */
64 rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); 66 rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
65 /* 67 /*
66 * Blank to disable 68 * Blank to disable
67 */ 69 */
68 if (size == 0) 70 if (size == 0) {
69 regs[reg] = 0; 71 regs[reg] = 0;
70 else 72 } else {
71 /* Set the register to the base, the type (off by one) and an 73 /*
72 inverted bitmask of the size The size is the only odd 74 * Set the register to the base, the type (off by one) and an
73 bit. We are fed say 512K We invert this and we get 111 1111 75 * inverted bitmask of the size The size is the only odd
74 1111 1011 but if you subtract one and invert you get the 76 * bit. We are fed say 512K We invert this and we get 111 1111
75 desired 111 1111 1111 1100 mask 77 * 1111 1011 but if you subtract one and invert you get the
76 78 * desired 111 1111 1111 1100 mask
77 But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ 79 *
80 * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
81 */
78 regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) 82 regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
79 | (base << PAGE_SHIFT) | (type + 1); 83 | (base << PAGE_SHIFT) | (type + 1);
84 }
80 85
81 /* 86 /*
82 * The writeback rule is quite specific. See the manual. Its 87 * The writeback rule is quite specific. See the manual. Its
83 * disable local interrupts, write back the cache, set the mtrr 88 * disable local interrupts, write back the cache, set the mtrr
84 */ 89 */
85 wbinvd(); 90 wbinvd();
86 wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); 91 wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
87} 92}
88 93
89static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) 94static int
95amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
90{ 96{
91 /* Apply the K6 block alignment and size rules 97 /*
92 In order 98 * Apply the K6 block alignment and size rules
93 o Uncached or gathering only 99 * In order
94 o 128K or bigger block 100 * o Uncached or gathering only
95 o Power of 2 block 101 * o 128K or bigger block
96 o base suitably aligned to the power 102 * o Power of 2 block
97 */ 103 * o base suitably aligned to the power
104 */
98 if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) 105 if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
99 || (size & ~(size - 1)) - size || (base & (size - 1))) 106 || (size & ~(size - 1)) - size || (base & (size - 1)))
100 return -EINVAL; 107 return -EINVAL;
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void)
115 set_mtrr_ops(&amd_mtrr_ops); 122 set_mtrr_ops(&amd_mtrr_ops);
116 return 0; 123 return 0;
117} 124}
118
119//arch_initcall(amd_mtrr_init);
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index cb9aa3a7a7ab..de89f14eff3a 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,7 +1,9 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/mm.h> 2#include <linux/mm.h>
3
3#include <asm/mtrr.h> 4#include <asm/mtrr.h>
4#include <asm/msr.h> 5#include <asm/msr.h>
6
5#include "mtrr.h" 7#include "mtrr.h"
6 8
7static struct { 9static struct {
@@ -12,25 +14,25 @@ static struct {
12static u8 centaur_mcr_reserved; 14static u8 centaur_mcr_reserved;
13static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ 15static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
14 16
15/* 17/**
16 * Report boot time MCR setups 18 * centaur_get_free_region - Get a free MTRR.
19 *
20 * @base: The starting (base) address of the region.
21 * @size: The size (in bytes) of the region.
22 *
23 * Returns: the index of the region on success, else -1 on error.
17 */ 24 */
18
19static int 25static int
20centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) 26centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
21/* [SUMMARY] Get a free MTRR.
22 <base> The starting (base) address of the region.
23 <size> The size (in bytes) of the region.
24 [RETURNS] The index of the region on success, else -1 on error.
25*/
26{ 27{
27 int i, max;
28 mtrr_type ltype;
29 unsigned long lbase, lsize; 28 unsigned long lbase, lsize;
29 mtrr_type ltype;
30 int i, max;
30 31
31 max = num_var_ranges; 32 max = num_var_ranges;
32 if (replace_reg >= 0 && replace_reg < max) 33 if (replace_reg >= 0 && replace_reg < max)
33 return replace_reg; 34 return replace_reg;
35
34 for (i = 0; i < max; ++i) { 36 for (i = 0; i < max; ++i) {
35 if (centaur_mcr_reserved & (1 << i)) 37 if (centaur_mcr_reserved & (1 << i))
36 continue; 38 continue;
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
38 if (lsize == 0) 40 if (lsize == 0)
39 return i; 41 return i;
40 } 42 }
43
41 return -ENOSPC; 44 return -ENOSPC;
42} 45}
43 46
44void 47/*
45mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) 48 * Report boot time MCR setups
49 */
50void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
46{ 51{
47 centaur_mcr[mcr].low = lo; 52 centaur_mcr[mcr].low = lo;
48 centaur_mcr[mcr].high = hi; 53 centaur_mcr[mcr].high = hi;
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base,
54{ 59{
55 *base = centaur_mcr[reg].high >> PAGE_SHIFT; 60 *base = centaur_mcr[reg].high >> PAGE_SHIFT;
56 *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; 61 *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
57 *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ 62 *type = MTRR_TYPE_WRCOMB; /* write-combining */
63
58 if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) 64 if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
59 *type = MTRR_TYPE_UNCACHABLE; 65 *type = MTRR_TYPE_UNCACHABLE;
60 if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) 66 if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
61 *type = MTRR_TYPE_WRBACK; 67 *type = MTRR_TYPE_WRBACK;
62 if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) 68 if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
63 *type = MTRR_TYPE_WRBACK; 69 *type = MTRR_TYPE_WRBACK;
64
65} 70}
66 71
67static void centaur_set_mcr(unsigned int reg, unsigned long base, 72static void
68 unsigned long size, mtrr_type type) 73centaur_set_mcr(unsigned int reg, unsigned long base,
74 unsigned long size, mtrr_type type)
69{ 75{
70 unsigned long low, high; 76 unsigned long low, high;
71 77
72 if (size == 0) { 78 if (size == 0) {
73 /* Disable */ 79 /* Disable */
74 high = low = 0; 80 high = low = 0;
75 } else { 81 } else {
76 high = base << PAGE_SHIFT; 82 high = base << PAGE_SHIFT;
77 if (centaur_mcr_type == 0) 83 if (centaur_mcr_type == 0) {
78 low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ 84 /* Only support write-combining... */
79 else { 85 low = -size << PAGE_SHIFT | 0x1f;
86 } else {
80 if (type == MTRR_TYPE_UNCACHABLE) 87 if (type == MTRR_TYPE_UNCACHABLE)
81 low = -size << PAGE_SHIFT | 0x02; /* NC */ 88 low = -size << PAGE_SHIFT | 0x02; /* NC */
82 else 89 else
83 low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ 90 low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
84 } 91 }
85 } 92 }
86 centaur_mcr[reg].high = high; 93 centaur_mcr[reg].high = high;
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base,
88 wrmsr(MSR_IDT_MCR0 + reg, low, high); 95 wrmsr(MSR_IDT_MCR0 + reg, low, high);
89} 96}
90 97
91#if 0 98static int
92/* 99centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
93 * Initialise the later (saner) Winchip MCR variant. In this version
94 * the BIOS can pass us the registers it has used (but not their values)
95 * and the control register is read/write
96 */
97
98static void __init
99centaur_mcr1_init(void)
100{
101 unsigned i;
102 u32 lo, hi;
103
104 /* Unfortunately, MCR's are read-only, so there is no way to
105 * find out what the bios might have done.
106 */
107
108 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
109 if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
110 lo &= ~0x1C0; /* clear key */
111 lo |= 0x040; /* set key to 1 */
112 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
113 }
114
115 centaur_mcr_type = 1;
116
117 /*
118 * Clear any unconfigured MCR's.
119 */
120
121 for (i = 0; i < 8; ++i) {
122 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
123 if (!(lo & (1 << (9 + i))))
124 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
125 else
126 /*
127 * If the BIOS set up an MCR we cannot see it
128 * but we don't wish to obliterate it
129 */
130 centaur_mcr_reserved |= (1 << i);
131 }
132 }
133 /*
134 * Throw the main write-combining switch...
135 * However if OOSTORE is enabled then people have already done far
136 * cleverer things and we should behave.
137 */
138
139 lo |= 15; /* Write combine enables */
140 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
141}
142
143/*
144 * Initialise the original winchip with read only MCR registers
145 * no used bitmask for the BIOS to pass on and write only control
146 */
147
148static void __init
149centaur_mcr0_init(void)
150{
151 unsigned i;
152
153 /* Unfortunately, MCR's are read-only, so there is no way to
154 * find out what the bios might have done.
155 */
156
157 /* Clear any unconfigured MCR's.
158 * This way we are sure that the centaur_mcr array contains the actual
159 * values. The disadvantage is that any BIOS tweaks are thus undone.
160 *
161 */
162 for (i = 0; i < 8; ++i) {
163 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
164 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
165 }
166
167 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
168}
169
170/*
171 * Initialise Winchip series MCR registers
172 */
173
174static void __init
175centaur_mcr_init(void)
176{
177 struct set_mtrr_context ctxt;
178
179 set_mtrr_prepare_save(&ctxt);
180 set_mtrr_cache_disable(&ctxt);
181
182 if (boot_cpu_data.x86_model == 4)
183 centaur_mcr0_init();
184 else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
185 centaur_mcr1_init();
186
187 set_mtrr_done(&ctxt);
188}
189#endif
190
191static int centaur_validate_add_page(unsigned long base,
192 unsigned long size, unsigned int type)
193{ 100{
194 /* 101 /*
195 * FIXME: Winchip2 supports uncached 102 * FIXME: Winchip2 supports uncached
196 */ 103 */
197 if (type != MTRR_TYPE_WRCOMB && 104 if (type != MTRR_TYPE_WRCOMB &&
198 (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { 105 (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
199 printk(KERN_WARNING 106 pr_warning("mtrr: only write-combining%s supported\n",
200 "mtrr: only write-combining%s supported\n", 107 centaur_mcr_type ? " and uncacheable are" : " is");
201 centaur_mcr_type ? " and uncacheable are"
202 : " is");
203 return -EINVAL; 108 return -EINVAL;
204 } 109 }
205 return 0; 110 return 0;
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base,
207 112
208static struct mtrr_ops centaur_mtrr_ops = { 113static struct mtrr_ops centaur_mtrr_ops = {
209 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
210// .init = centaur_mcr_init,
211 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
212 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
213 .get_free_region = centaur_get_free_region, 117 .get_free_region = centaur_get_free_region,
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void)
220 set_mtrr_ops(&centaur_mtrr_ops); 124 set_mtrr_ops(&centaur_mtrr_ops);
221 return 0; 125 return 0;
222} 126}
223
224//arch_initcall(centaur_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 1d584a18a50d..315738c74aad 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,51 +1,75 @@
1/* MTRR (Memory Type Range Register) cleanup 1/*
2 2 * MTRR (Memory Type Range Register) cleanup
3 Copyright (C) 2009 Yinghai Lu 3 *
4 4 * Copyright (C) 2009 Yinghai Lu
5 This library is free software; you can redistribute it and/or 5 *
6 modify it under the terms of the GNU Library General Public 6 * This library is free software; you can redistribute it and/or
7 License as published by the Free Software Foundation; either 7 * modify it under the terms of the GNU Library General Public
8 version 2 of the License, or (at your option) any later version. 8 * License as published by the Free Software Foundation; either
9 9 * version 2 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful, 10 *
11 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * This library is distributed in the hope that it will be useful,
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 Library General Public License for more details. 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 14 * Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public 15 *
16 License along with this library; if not, write to the Free 16 * You should have received a copy of the GNU Library General Public
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 * License along with this library; if not, write to the Free
18*/ 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 19 */
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/mutex.h>
26#include <linux/sort.h> 25#include <linux/sort.h>
26#include <linux/mutex.h>
27#include <linux/uaccess.h>
28#include <linux/kvm_para.h>
27 29
30#include <asm/processor.h>
28#include <asm/e820.h> 31#include <asm/e820.h>
29#include <asm/mtrr.h> 32#include <asm/mtrr.h>
30#include <asm/uaccess.h>
31#include <asm/processor.h>
32#include <asm/msr.h> 33#include <asm/msr.h>
33#include <asm/kvm_para.h>
34#include "mtrr.h"
35 34
36/* should be related to MTRR_VAR_RANGES nums */ 35#include "mtrr.h"
37#define RANGE_NUM 256
38 36
39struct res_range { 37struct res_range {
40 unsigned long start; 38 unsigned long start;
41 unsigned long end; 39 unsigned long end;
40};
41
42struct var_mtrr_range_state {
43 unsigned long base_pfn;
44 unsigned long size_pfn;
45 mtrr_type type;
46};
47
48struct var_mtrr_state {
49 unsigned long range_startk;
50 unsigned long range_sizek;
51 unsigned long chunk_sizek;
52 unsigned long gran_sizek;
53 unsigned int reg;
42}; 54};
43 55
56/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256
58
59static struct res_range __initdata range[RANGE_NUM];
60static int __initdata nr_range;
61
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
63
64static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66
67
44static int __init 68static int __init
45add_range(struct res_range *range, int nr_range, unsigned long start, 69add_range(struct res_range *range, int nr_range,
46 unsigned long end) 70 unsigned long start, unsigned long end)
47{ 71{
48 /* out of slots */ 72 /* Out of slots: */
49 if (nr_range >= RANGE_NUM) 73 if (nr_range >= RANGE_NUM)
50 return nr_range; 74 return nr_range;
51 75
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start,
58} 82}
59 83
60static int __init 84static int __init
61add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, 85add_range_with_merge(struct res_range *range, int nr_range,
62 unsigned long end) 86 unsigned long start, unsigned long end)
63{ 87{
64 int i; 88 int i;
65 89
66 /* try to merge it with old one */ 90 /* Try to merge it with old one: */
67 for (i = 0; i < nr_range; i++) { 91 for (i = 0; i < nr_range; i++) {
68 unsigned long final_start, final_end; 92 unsigned long final_start, final_end;
69 unsigned long common_start, common_end; 93 unsigned long common_start, common_end;
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
84 return nr_range; 108 return nr_range;
85 } 109 }
86 110
87 /* need to add that */ 111 /* Need to add it: */
88 return add_range(range, nr_range, start, end); 112 return add_range(range, nr_range, start, end);
89} 113}
90 114
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117 } 141 }
118 142
119 if (start > range[j].start && end < range[j].end) { 143 if (start > range[j].start && end < range[j].end) {
120 /* find the new spare */ 144 /* Find the new spare: */
121 for (i = 0; i < RANGE_NUM; i++) { 145 for (i = 0; i < RANGE_NUM; i++) {
122 if (range[i].end == 0) 146 if (range[i].end == 0)
123 break; 147 break;
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2)
146 return start1 - start2; 170 return start1 - start2;
147} 171}
148 172
149struct var_mtrr_range_state { 173#define BIOS_BUG_MSG KERN_WARNING \
150 unsigned long base_pfn; 174 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
151 unsigned long size_pfn;
152 mtrr_type type;
153};
154
155static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
156static int __initdata debug_print;
157 175
158static int __init 176static int __init
159x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 177x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
180 range[i].start, range[i].end + 1); 198 range[i].start, range[i].end + 1);
181 } 199 }
182 200
183 /* take out UC ranges */ 201 /* Take out UC ranges: */
184 for (i = 0; i < num_var_ranges; i++) { 202 for (i = 0; i < num_var_ranges; i++) {
185 type = range_state[i].type; 203 type = range_state[i].type;
186 if (type != MTRR_TYPE_UNCACHABLE && 204 if (type != MTRR_TYPE_UNCACHABLE &&
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
193 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 211 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
194 (mtrr_state.enabled & 1)) { 212 (mtrr_state.enabled & 1)) {
195 /* Var MTRR contains UC entry below 1M? Skip it: */ 213 /* Var MTRR contains UC entry below 1M? Skip it: */
196 printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " 214 printk(BIOS_BUG_MSG, i);
197 "contains strange UC entry under 1M, check "
198 "with your system vendor!\n", i);
199 if (base + size <= (1<<(20-PAGE_SHIFT))) 215 if (base + size <= (1<<(20-PAGE_SHIFT)))
200 continue; 216 continue;
201 size -= (1<<(20-PAGE_SHIFT)) - base; 217 size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
237 return nr_range; 253 return nr_range;
238} 254}
239 255
240static struct res_range __initdata range[RANGE_NUM];
241static int __initdata nr_range;
242
243#ifdef CONFIG_MTRR_SANITIZER 256#ifdef CONFIG_MTRR_SANITIZER
244 257
245static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 258static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
246{ 259{
247 unsigned long sum; 260 unsigned long sum = 0;
248 int i; 261 int i;
249 262
250 sum = 0;
251 for (i = 0; i < nr_range; i++) 263 for (i = 0; i < nr_range; i++)
252 sum += range[i].end + 1 - range[i].start; 264 sum += range[i].end + 1 - range[i].start;
253 265
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str)
278} 290}
279early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); 291early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
280 292
281struct var_mtrr_state {
282 unsigned long range_startk;
283 unsigned long range_sizek;
284 unsigned long chunk_sizek;
285 unsigned long gran_sizek;
286 unsigned int reg;
287};
288
289static void __init 293static void __init
290set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 294set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
291 unsigned char type, unsigned int address_bits) 295 unsigned char type, unsigned int address_bits)
292{ 296{
293 u32 base_lo, base_hi, mask_lo, mask_hi; 297 u32 base_lo, base_hi, mask_lo, mask_hi;
294 u64 base, mask; 298 u64 base, mask;
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
301 mask = (1ULL << address_bits) - 1; 305 mask = (1ULL << address_bits) - 1;
302 mask &= ~((((u64)sizek) << 10) - 1); 306 mask &= ~((((u64)sizek) << 10) - 1);
303 307
304 base = ((u64)basek) << 10; 308 base = ((u64)basek) << 10;
305 309
306 base |= type; 310 base |= type;
307 mask |= 0x800; 311 mask |= 0x800;
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
317 321
318static void __init 322static void __init
319save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 323save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
320 unsigned char type) 324 unsigned char type)
321{ 325{
322 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); 326 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
323 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); 327 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
324 range_state[reg].type = type; 328 range_state[reg].type = type;
325} 329}
326 330
327static void __init 331static void __init set_var_mtrr_all(unsigned int address_bits)
328set_var_mtrr_all(unsigned int address_bits)
329{ 332{
330 unsigned long basek, sizek; 333 unsigned long basek, sizek;
331 unsigned char type; 334 unsigned char type;
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits)
342 345
343static unsigned long to_size_factor(unsigned long sizek, char *factorp) 346static unsigned long to_size_factor(unsigned long sizek, char *factorp)
344{ 347{
345 char factor;
346 unsigned long base = sizek; 348 unsigned long base = sizek;
349 char factor;
347 350
348 if (base & ((1<<10) - 1)) { 351 if (base & ((1<<10) - 1)) {
349 /* not MB alignment */ 352 /* Not MB-aligned: */
350 factor = 'K'; 353 factor = 'K';
351 } else if (base & ((1<<20) - 1)) { 354 } else if (base & ((1<<20) - 1)) {
352 factor = 'M'; 355 factor = 'M';
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
372 unsigned long max_align, align; 375 unsigned long max_align, align;
373 unsigned long sizek; 376 unsigned long sizek;
374 377
375 /* Compute the maximum size I can make a range */ 378 /* Compute the maximum size with which we can make a range: */
376 if (range_startk) 379 if (range_startk)
377 max_align = ffs(range_startk) - 1; 380 max_align = ffs(range_startk) - 1;
378 else 381 else
379 max_align = 32; 382 max_align = 32;
383
380 align = fls(range_sizek) - 1; 384 align = fls(range_sizek) - 1;
381 if (align > max_align) 385 if (align > max_align)
382 align = max_align; 386 align = max_align;
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
386 char start_factor = 'K', size_factor = 'K'; 390 char start_factor = 'K', size_factor = 'K';
387 unsigned long start_base, size_base; 391 unsigned long start_base, size_base;
388 392
389 start_base = to_size_factor(range_startk, 393 start_base = to_size_factor(range_startk, &start_factor);
390 &start_factor), 394 size_base = to_size_factor(sizek, &size_factor);
391 size_base = to_size_factor(sizek, &size_factor),
392 395
393 printk(KERN_DEBUG "Setting variable MTRR %d, " 396 Dprintk("Setting variable MTRR %d, "
394 "base: %ld%cB, range: %ld%cB, type %s\n", 397 "base: %ld%cB, range: %ld%cB, type %s\n",
395 reg, start_base, start_factor, 398 reg, start_base, start_factor,
396 size_base, size_factor, 399 size_base, size_factor,
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
425 chunk_sizek = state->chunk_sizek; 428 chunk_sizek = state->chunk_sizek;
426 gran_sizek = state->gran_sizek; 429 gran_sizek = state->gran_sizek;
427 430
428 /* align with gran size, prevent small block used up MTRRs */ 431 /* Align with gran size, prevent small block used up MTRRs: */
429 range_basek = ALIGN(state->range_startk, gran_sizek); 432 range_basek = ALIGN(state->range_startk, gran_sizek);
430 if ((range_basek > basek) && basek) 433 if ((range_basek > basek) && basek)
431 return second_sizek; 434 return second_sizek;
435
432 state->range_sizek -= (range_basek - state->range_startk); 436 state->range_sizek -= (range_basek - state->range_startk);
433 range_sizek = ALIGN(state->range_sizek, gran_sizek); 437 range_sizek = ALIGN(state->range_sizek, gran_sizek);
434 438
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
439 } 443 }
440 state->range_sizek = range_sizek; 444 state->range_sizek = range_sizek;
441 445
442 /* try to append some small hole */ 446 /* Try to append some small hole: */
443 range0_basek = state->range_startk; 447 range0_basek = state->range_startk;
444 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 448 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
445 449
446 /* no increase */ 450 /* No increase: */
447 if (range0_sizek == state->range_sizek) { 451 if (range0_sizek == state->range_sizek) {
448 if (debug_print) 452 Dprintk("rangeX: %016lx - %016lx\n",
449 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", 453 range0_basek<<10,
450 range0_basek<<10, 454 (range0_basek + state->range_sizek)<<10);
451 (range0_basek + state->range_sizek)<<10);
452 state->reg = range_to_mtrr(state->reg, range0_basek, 455 state->reg = range_to_mtrr(state->reg, range0_basek,
453 state->range_sizek, MTRR_TYPE_WRBACK); 456 state->range_sizek, MTRR_TYPE_WRBACK);
454 return 0; 457 return 0;
455 } 458 }
456 459
457 /* only cut back, when it is not the last */ 460 /* Only cut back when it is not the last: */
458 if (sizek) { 461 if (sizek) {
459 while (range0_basek + range0_sizek > (basek + sizek)) { 462 while (range0_basek + range0_sizek > (basek + sizek)) {
460 if (range0_sizek >= chunk_sizek) 463 if (range0_sizek >= chunk_sizek)
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
470second_try: 473second_try:
471 range_basek = range0_basek + range0_sizek; 474 range_basek = range0_basek + range0_sizek;
472 475
473 /* one hole in the middle */ 476 /* One hole in the middle: */
474 if (range_basek > basek && range_basek <= (basek + sizek)) 477 if (range_basek > basek && range_basek <= (basek + sizek))
475 second_sizek = range_basek - basek; 478 second_sizek = range_basek - basek;
476 479
477 if (range0_sizek > state->range_sizek) { 480 if (range0_sizek > state->range_sizek) {
478 481
479 /* one hole in middle or at end */ 482 /* One hole in middle or at the end: */
480 hole_sizek = range0_sizek - state->range_sizek - second_sizek; 483 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
481 484
482 /* hole size should be less than half of range0 size */ 485 /* Hole size should be less than half of range0 size: */
483 if (hole_sizek >= (range0_sizek >> 1) && 486 if (hole_sizek >= (range0_sizek >> 1) &&
484 range0_sizek >= chunk_sizek) { 487 range0_sizek >= chunk_sizek) {
485 range0_sizek -= chunk_sizek; 488 range0_sizek -= chunk_sizek;
@@ -491,32 +494,30 @@ second_try:
491 } 494 }
492 495
493 if (range0_sizek) { 496 if (range0_sizek) {
494 if (debug_print) 497 Dprintk("range0: %016lx - %016lx\n",
495 printk(KERN_DEBUG "range0: %016lx - %016lx\n", 498 range0_basek<<10,
496 range0_basek<<10, 499 (range0_basek + range0_sizek)<<10);
497 (range0_basek + range0_sizek)<<10);
498 state->reg = range_to_mtrr(state->reg, range0_basek, 500 state->reg = range_to_mtrr(state->reg, range0_basek,
499 range0_sizek, MTRR_TYPE_WRBACK); 501 range0_sizek, MTRR_TYPE_WRBACK);
500 } 502 }
501 503
502 if (range0_sizek < state->range_sizek) { 504 if (range0_sizek < state->range_sizek) {
503 /* need to handle left over */ 505 /* Need to handle left over range: */
504 range_sizek = state->range_sizek - range0_sizek; 506 range_sizek = state->range_sizek - range0_sizek;
505 507
506 if (debug_print) 508 Dprintk("range: %016lx - %016lx\n",
507 printk(KERN_DEBUG "range: %016lx - %016lx\n", 509 range_basek<<10,
508 range_basek<<10, 510 (range_basek + range_sizek)<<10);
509 (range_basek + range_sizek)<<10); 511
510 state->reg = range_to_mtrr(state->reg, range_basek, 512 state->reg = range_to_mtrr(state->reg, range_basek,
511 range_sizek, MTRR_TYPE_WRBACK); 513 range_sizek, MTRR_TYPE_WRBACK);
512 } 514 }
513 515
514 if (hole_sizek) { 516 if (hole_sizek) {
515 hole_basek = range_basek - hole_sizek - second_sizek; 517 hole_basek = range_basek - hole_sizek - second_sizek;
516 if (debug_print) 518 Dprintk("hole: %016lx - %016lx\n",
517 printk(KERN_DEBUG "hole: %016lx - %016lx\n", 519 hole_basek<<10,
518 hole_basek<<10, 520 (hole_basek + hole_sizek)<<10);
519 (hole_basek + hole_sizek)<<10);
520 state->reg = range_to_mtrr(state->reg, hole_basek, 521 state->reg = range_to_mtrr(state->reg, hole_basek,
521 hole_sizek, MTRR_TYPE_UNCACHABLE); 522 hole_sizek, MTRR_TYPE_UNCACHABLE);
522 } 523 }
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
537 basek = base_pfn << (PAGE_SHIFT - 10); 538 basek = base_pfn << (PAGE_SHIFT - 10);
538 sizek = size_pfn << (PAGE_SHIFT - 10); 539 sizek = size_pfn << (PAGE_SHIFT - 10);
539 540
540 /* See if I can merge with the last range */ 541 /* See if I can merge with the last range: */
541 if ((basek <= 1024) || 542 if ((basek <= 1024) ||
542 (state->range_startk + state->range_sizek == basek)) { 543 (state->range_startk + state->range_sizek == basek)) {
543 unsigned long endk = basek + sizek; 544 unsigned long endk = basek + sizek;
544 state->range_sizek = endk - state->range_startk; 545 state->range_sizek = endk - state->range_startk;
545 return; 546 return;
546 } 547 }
547 /* Write the range mtrrs */ 548 /* Write the range mtrrs: */
548 if (state->range_sizek != 0) 549 if (state->range_sizek != 0)
549 second_sizek = range_to_mtrr_with_hole(state, basek, sizek); 550 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
550 551
551 /* Allocate an msr */ 552 /* Allocate an msr: */
552 state->range_startk = basek + second_sizek; 553 state->range_startk = basek + second_sizek;
553 state->range_sizek = sizek - second_sizek; 554 state->range_sizek = sizek - second_sizek;
554} 555}
555 556
556/* mininum size of mtrr block that can take hole */ 557/* Mininum size of mtrr block that can take hole: */
557static u64 mtrr_chunk_size __initdata = (256ULL<<20); 558static u64 mtrr_chunk_size __initdata = (256ULL<<20);
558 559
559static int __init parse_mtrr_chunk_size_opt(char *p) 560static int __init parse_mtrr_chunk_size_opt(char *p)
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
565} 566}
566early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); 567early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
567 568
568/* granity of mtrr of block */ 569/* Granularity of mtrr of block: */
569static u64 mtrr_gran_size __initdata; 570static u64 mtrr_gran_size __initdata;
570 571
571static int __init parse_mtrr_gran_size_opt(char *p) 572static int __init parse_mtrr_gran_size_opt(char *p)
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p)
577} 578}
578early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); 579early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
579 580
580static int nr_mtrr_spare_reg __initdata = 581static unsigned long nr_mtrr_spare_reg __initdata =
581 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; 582 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
582 583
583static int __init parse_mtrr_spare_reg(char *arg) 584static int __init parse_mtrr_spare_reg(char *arg)
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg)
586 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); 587 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
587 return 0; 588 return 0;
588} 589}
589
590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
591 591
592static int __init 592static int __init
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
594 u64 chunk_size, u64 gran_size) 594 u64 chunk_size, u64 gran_size)
595{ 595{
596 struct var_mtrr_state var_state; 596 struct var_mtrr_state var_state;
597 int i;
598 int num_reg; 597 int num_reg;
598 int i;
599 599
600 var_state.range_startk = 0; 600 var_state.range_startk = 0;
601 var_state.range_sizek = 0; 601 var_state.range_sizek = 0;
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
605 605
606 memset(range_state, 0, sizeof(range_state)); 606 memset(range_state, 0, sizeof(range_state));
607 607
608 /* Write the range etc */ 608 /* Write the range: */
609 for (i = 0; i < nr_range; i++) 609 for (i = 0; i < nr_range; i++) {
610 set_var_mtrr_range(&var_state, range[i].start, 610 set_var_mtrr_range(&var_state, range[i].start,
611 range[i].end - range[i].start + 1); 611 range[i].end - range[i].start + 1);
612 }
612 613
613 /* Write the last range */ 614 /* Write the last range: */
614 if (var_state.range_sizek != 0) 615 if (var_state.range_sizek != 0)
615 range_to_mtrr_with_hole(&var_state, 0, 0); 616 range_to_mtrr_with_hole(&var_state, 0, 0);
616 617
617 num_reg = var_state.reg; 618 num_reg = var_state.reg;
618 /* Clear out the extra MTRR's */ 619 /* Clear out the extra MTRR's: */
619 while (var_state.reg < num_var_ranges) { 620 while (var_state.reg < num_var_ranges) {
620 save_var_mtrr(var_state.reg, 0, 0, 0); 621 save_var_mtrr(var_state.reg, 0, 0, 0);
621 var_state.reg++; 622 var_state.reg++;
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
625} 626}
626 627
627struct mtrr_cleanup_result { 628struct mtrr_cleanup_result {
628 unsigned long gran_sizek; 629 unsigned long gran_sizek;
629 unsigned long chunk_sizek; 630 unsigned long chunk_sizek;
630 unsigned long lose_cover_sizek; 631 unsigned long lose_cover_sizek;
631 unsigned int num_reg; 632 unsigned int num_reg;
632 int bad; 633 int bad;
633}; 634};
634 635
635/* 636/*
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
645 646
646static void __init print_out_mtrr_range_state(void) 647static void __init print_out_mtrr_range_state(void)
647{ 648{
648 int i;
649 char start_factor = 'K', size_factor = 'K'; 649 char start_factor = 'K', size_factor = 'K';
650 unsigned long start_base, size_base; 650 unsigned long start_base, size_base;
651 mtrr_type type; 651 mtrr_type type;
652 int i;
652 653
653 for (i = 0; i < num_var_ranges; i++) { 654 for (i = 0; i < num_var_ranges; i++) {
654 655
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void)
676 int i; 677 int i;
677 mtrr_type type; 678 mtrr_type type;
678 unsigned long size; 679 unsigned long size;
679 /* extra one for all 0 */ 680 /* Extra one for all 0: */
680 int num[MTRR_NUM_TYPES + 1]; 681 int num[MTRR_NUM_TYPES + 1];
681 682
682 /* check entries number */ 683 /* Check entries number: */
683 memset(num, 0, sizeof(num)); 684 memset(num, 0, sizeof(num));
684 for (i = 0; i < num_var_ranges; i++) { 685 for (i = 0; i < num_var_ranges; i++) {
685 type = range_state[i].type; 686 type = range_state[i].type;
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void)
693 num[type]++; 694 num[type]++;
694 } 695 }
695 696
696 /* check if we got UC entries */ 697 /* Check if we got UC entries: */
697 if (!num[MTRR_TYPE_UNCACHABLE]) 698 if (!num[MTRR_TYPE_UNCACHABLE])
698 return 0; 699 return 0;
699 700
700 /* check if we only had WB and UC */ 701 /* Check if we only had WB and UC */
701 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 702 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
702 num_var_ranges - num[MTRR_NUM_TYPES]) 703 num_var_ranges - num[MTRR_NUM_TYPES])
703 return 0; 704 return 0;
704 705
705 return 1; 706 return 1;
706} 707}
707 708
708static unsigned long __initdata range_sums; 709static unsigned long __initdata range_sums;
709static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, 710
710 unsigned long extra_remove_base, 711static void __init
711 unsigned long extra_remove_size, 712mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
712 int i) 713 unsigned long x_remove_base,
714 unsigned long x_remove_size, int i)
713{ 715{
714 int num_reg;
715 static struct res_range range_new[RANGE_NUM]; 716 static struct res_range range_new[RANGE_NUM];
716 static int nr_range_new;
717 unsigned long range_sums_new; 717 unsigned long range_sums_new;
718 static int nr_range_new;
719 int num_reg;
718 720
719 /* convert ranges to var ranges state */ 721 /* Convert ranges to var ranges state: */
720 num_reg = x86_setup_var_mtrrs(range, nr_range, 722 num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
721 chunk_size, gran_size);
722 723
723 /* we got new setting in range_state, check it */ 724 /* We got new setting in range_state, check it: */
724 memset(range_new, 0, sizeof(range_new)); 725 memset(range_new, 0, sizeof(range_new));
725 nr_range_new = x86_get_mtrr_mem_range(range_new, 0, 726 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
726 extra_remove_base, extra_remove_size); 727 x_remove_base, x_remove_size);
727 range_sums_new = sum_ranges(range_new, nr_range_new); 728 range_sums_new = sum_ranges(range_new, nr_range_new);
728 729
729 result[i].chunk_sizek = chunk_size >> 10; 730 result[i].chunk_sizek = chunk_size >> 10;
730 result[i].gran_sizek = gran_size >> 10; 731 result[i].gran_sizek = gran_size >> 10;
731 result[i].num_reg = num_reg; 732 result[i].num_reg = num_reg;
733
732 if (range_sums < range_sums_new) { 734 if (range_sums < range_sums_new) {
733 result[i].lose_cover_sizek = 735 result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
734 (range_sums_new - range_sums) << PSHIFT;
735 result[i].bad = 1; 736 result[i].bad = 1;
736 } else 737 } else {
737 result[i].lose_cover_sizek = 738 result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
738 (range_sums - range_sums_new) << PSHIFT; 739 }
739 740
740 /* double check it */ 741 /* Double check it: */
741 if (!result[i].bad && !result[i].lose_cover_sizek) { 742 if (!result[i].bad && !result[i].lose_cover_sizek) {
742 if (nr_range_new != nr_range || 743 if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
743 memcmp(range, range_new, sizeof(range))) 744 result[i].bad = 1;
744 result[i].bad = 1;
745 } 745 }
746 746
747 if (!result[i].bad && (range_sums - range_sums_new < 747 if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
748 min_loss_pfn[num_reg])) { 748 min_loss_pfn[num_reg] = range_sums - range_sums_new;
749 min_loss_pfn[num_reg] =
750 range_sums - range_sums_new;
751 }
752} 749}
753 750
754static void __init mtrr_print_out_one_result(int i) 751static void __init mtrr_print_out_one_result(int i)
755{ 752{
756 char gran_factor, chunk_factor, lose_factor;
757 unsigned long gran_base, chunk_base, lose_base; 753 unsigned long gran_base, chunk_base, lose_base;
754 char gran_factor, chunk_factor, lose_factor;
758 755
759 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 756 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
760 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 757 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
761 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 758 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
762 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", 759
763 result[i].bad ? "*BAD*" : " ", 760 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
764 gran_base, gran_factor, chunk_base, chunk_factor); 761 result[i].bad ? "*BAD*" : " ",
765 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", 762 gran_base, gran_factor, chunk_base, chunk_factor);
766 result[i].num_reg, result[i].bad ? "-" : "", 763 pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
767 lose_base, lose_factor); 764 result[i].num_reg, result[i].bad ? "-" : "",
765 lose_base, lose_factor);
768} 766}
769 767
770static int __init mtrr_search_optimal_index(void) 768static int __init mtrr_search_optimal_index(void)
771{ 769{
772 int i;
773 int num_reg_good; 770 int num_reg_good;
774 int index_good; 771 int index_good;
772 int i;
775 773
776 if (nr_mtrr_spare_reg >= num_var_ranges) 774 if (nr_mtrr_spare_reg >= num_var_ranges)
777 nr_mtrr_spare_reg = num_var_ranges - 1; 775 nr_mtrr_spare_reg = num_var_ranges - 1;
776
778 num_reg_good = -1; 777 num_reg_good = -1;
779 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 778 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
780 if (!min_loss_pfn[i]) 779 if (!min_loss_pfn[i])
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void)
796 return index_good; 795 return index_good;
797} 796}
798 797
799
800int __init mtrr_cleanup(unsigned address_bits) 798int __init mtrr_cleanup(unsigned address_bits)
801{ 799{
802 unsigned long extra_remove_base, extra_remove_size; 800 unsigned long x_remove_base, x_remove_size;
803 unsigned long base, size, def, dummy; 801 unsigned long base, size, def, dummy;
804 mtrr_type type;
805 u64 chunk_size, gran_size; 802 u64 chunk_size, gran_size;
803 mtrr_type type;
806 int index_good; 804 int index_good;
807 int i; 805 int i;
808 806
809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 807 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
810 return 0; 808 return 0;
809
811 rdmsr(MSR_MTRRdefType, def, dummy); 810 rdmsr(MSR_MTRRdefType, def, dummy);
812 def &= 0xff; 811 def &= 0xff;
813 if (def != MTRR_TYPE_UNCACHABLE) 812 if (def != MTRR_TYPE_UNCACHABLE)
814 return 0; 813 return 0;
815 814
816 /* get it and store it aside */ 815 /* Get it and store it aside: */
817 memset(range_state, 0, sizeof(range_state)); 816 memset(range_state, 0, sizeof(range_state));
818 for (i = 0; i < num_var_ranges; i++) { 817 for (i = 0; i < num_var_ranges; i++) {
819 mtrr_if->get(i, &base, &size, &type); 818 mtrr_if->get(i, &base, &size, &type);
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits)
822 range_state[i].type = type; 821 range_state[i].type = type;
823 } 822 }
824 823
825 /* check if we need handle it and can handle it */ 824 /* Check if we need handle it and can handle it: */
826 if (!mtrr_need_cleanup()) 825 if (!mtrr_need_cleanup())
827 return 0; 826 return 0;
828 827
829 /* print original var MTRRs at first, for debugging: */ 828 /* Print original var MTRRs at first, for debugging: */
830 printk(KERN_DEBUG "original variable MTRRs\n"); 829 printk(KERN_DEBUG "original variable MTRRs\n");
831 print_out_mtrr_range_state(); 830 print_out_mtrr_range_state();
832 831
833 memset(range, 0, sizeof(range)); 832 memset(range, 0, sizeof(range));
834 extra_remove_size = 0; 833 x_remove_size = 0;
835 extra_remove_base = 1 << (32 - PAGE_SHIFT); 834 x_remove_base = 1 << (32 - PAGE_SHIFT);
836 if (mtrr_tom2) 835 if (mtrr_tom2)
837 extra_remove_size = 836 x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
838 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; 837
839 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, 838 nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
840 extra_remove_size);
841 /* 839 /*
842 * [0, 1M) should always be coverred by var mtrr with WB 840 * [0, 1M) should always be covered by var mtrr with WB
843 * and fixed mtrrs should take effective before var mtrr for it 841 * and fixed mtrrs should take effect before var mtrr for it:
844 */ 842 */
845 nr_range = add_range_with_merge(range, nr_range, 0, 843 nr_range = add_range_with_merge(range, nr_range, 0,
846 (1ULL<<(20 - PAGE_SHIFT)) - 1); 844 (1ULL<<(20 - PAGE_SHIFT)) - 1);
847 /* sort the ranges */ 845 /* Sort the ranges: */
848 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
849 847
850 range_sums = sum_ranges(range, nr_range); 848 range_sums = sum_ranges(range, nr_range);
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits)
854 if (mtrr_chunk_size && mtrr_gran_size) { 852 if (mtrr_chunk_size && mtrr_gran_size) {
855 i = 0; 853 i = 0;
856 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, 854 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
857 extra_remove_base, extra_remove_size, i); 855 x_remove_base, x_remove_size, i);
858 856
859 mtrr_print_out_one_result(i); 857 mtrr_print_out_one_result(i);
860 858
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits)
880 continue; 878 continue;
881 879
882 mtrr_calc_range_state(chunk_size, gran_size, 880 mtrr_calc_range_state(chunk_size, gran_size,
883 extra_remove_base, extra_remove_size, i); 881 x_remove_base, x_remove_size, i);
884 if (debug_print) { 882 if (debug_print) {
885 mtrr_print_out_one_result(i); 883 mtrr_print_out_one_result(i);
886 printk(KERN_INFO "\n"); 884 printk(KERN_INFO "\n");
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits)
890 } 888 }
891 } 889 }
892 890
893 /* try to find the optimal index */ 891 /* Try to find the optimal index: */
894 index_good = mtrr_search_optimal_index(); 892 index_good = mtrr_search_optimal_index();
895 893
896 if (index_good != -1) { 894 if (index_good != -1) {
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits)
898 i = index_good; 896 i = index_good;
899 mtrr_print_out_one_result(i); 897 mtrr_print_out_one_result(i);
900 898
901 /* convert ranges to var ranges state */ 899 /* Convert ranges to var ranges state: */
902 chunk_size = result[i].chunk_sizek; 900 chunk_size = result[i].chunk_sizek;
903 chunk_size <<= 10; 901 chunk_size <<= 10;
904 gran_size = result[i].gran_sizek; 902 gran_size = result[i].gran_sizek;
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
941 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't 939 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
942 * apply to are wrong, but so far we don't know of any such case in the wild. 940 * apply to are wrong, but so far we don't know of any such case in the wild.
943 */ 941 */
944#define Tom2Enabled (1U << 21) 942#define Tom2Enabled (1U << 21)
945#define Tom2ForceMemTypeWB (1U << 22) 943#define Tom2ForceMemTypeWB (1U << 22)
946 944
947int __init amd_special_default_mtrr(void) 945int __init amd_special_default_mtrr(void)
948{ 946{
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void)
952 return 0; 950 return 0;
953 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 951 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
954 return 0; 952 return 0;
955 /* In case some hypervisor doesn't pass SYSCFG through */ 953 /* In case some hypervisor doesn't pass SYSCFG through: */
956 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 954 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
957 return 0; 955 return 0;
958 /* 956 /*
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void)
965 return 0; 963 return 0;
966} 964}
967 965
968static u64 __init real_trim_memory(unsigned long start_pfn, 966static u64 __init
969 unsigned long limit_pfn) 967real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
970{ 968{
971 u64 trim_start, trim_size; 969 u64 trim_start, trim_size;
970
972 trim_start = start_pfn; 971 trim_start = start_pfn;
973 trim_start <<= PAGE_SHIFT; 972 trim_start <<= PAGE_SHIFT;
973
974 trim_size = limit_pfn; 974 trim_size = limit_pfn;
975 trim_size <<= PAGE_SHIFT; 975 trim_size <<= PAGE_SHIFT;
976 trim_size -= trim_start; 976 trim_size -= trim_start;
977 977
978 return e820_update_range(trim_start, trim_size, E820_RAM, 978 return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
979 E820_RESERVED);
980} 979}
980
981/** 981/**
982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
983 * @end_pfn: ending page frame number 983 * @end_pfn: ending page frame number
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
986 * memory configurations. This routine checks that the highest MTRR matches 986 * memory configurations. This routine checks that the highest MTRR matches
987 * the end of memory, to make sure the MTRRs having a write back type cover 987 * the end of memory, to make sure the MTRRs having a write back type cover
988 * all of the memory the kernel is intending to use. If not, it'll trim any 988 * all of the memory the kernel is intending to use. If not, it'll trim any
989 * memory off the end by adjusting end_pfn, removing it from the kernel's 989 * memory off the end by adjusting end_pfn, removing it from the kernel's
990 * allocation pools, warning the user with an obnoxious message. 990 * allocation pools, warning the user with an obnoxious message.
991 */ 991 */
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
994 unsigned long i, base, size, highest_pfn = 0, def, dummy; 994 unsigned long i, base, size, highest_pfn = 0, def, dummy;
995 mtrr_type type; 995 mtrr_type type;
996 u64 total_trim_size; 996 u64 total_trim_size;
997
998 /* extra one for all 0 */ 997 /* extra one for all 0 */
999 int num[MTRR_NUM_TYPES + 1]; 998 int num[MTRR_NUM_TYPES + 1];
999
1000 /* 1000 /*
1001 * Make sure we only trim uncachable memory on machines that 1001 * Make sure we only trim uncachable memory on machines that
1002 * support the Intel MTRR architecture: 1002 * support the Intel MTRR architecture:
1003 */ 1003 */
1004 if (!is_cpu(INTEL) || disable_mtrr_trim) 1004 if (!is_cpu(INTEL) || disable_mtrr_trim)
1005 return 0; 1005 return 0;
1006
1006 rdmsr(MSR_MTRRdefType, def, dummy); 1007 rdmsr(MSR_MTRRdefType, def, dummy);
1007 def &= 0xff; 1008 def &= 0xff;
1008 if (def != MTRR_TYPE_UNCACHABLE) 1009 if (def != MTRR_TYPE_UNCACHABLE)
1009 return 0; 1010 return 0;
1010 1011
1011 /* get it and store it aside */ 1012 /* Get it and store it aside: */
1012 memset(range_state, 0, sizeof(range_state)); 1013 memset(range_state, 0, sizeof(range_state));
1013 for (i = 0; i < num_var_ranges; i++) { 1014 for (i = 0; i < num_var_ranges; i++) {
1014 mtrr_if->get(i, &base, &size, &type); 1015 mtrr_if->get(i, &base, &size, &type);
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1017 range_state[i].type = type; 1018 range_state[i].type = type;
1018 } 1019 }
1019 1020
1020 /* Find highest cached pfn */ 1021 /* Find highest cached pfn: */
1021 for (i = 0; i < num_var_ranges; i++) { 1022 for (i = 0; i < num_var_ranges; i++) {
1022 type = range_state[i].type; 1023 type = range_state[i].type;
1023 if (type != MTRR_TYPE_WRBACK) 1024 if (type != MTRR_TYPE_WRBACK)
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1028 highest_pfn = base + size; 1029 highest_pfn = base + size;
1029 } 1030 }
1030 1031
1031 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 1032 /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
1032 if (!highest_pfn) { 1033 if (!highest_pfn) {
1033 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); 1034 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1034 return 0; 1035 return 0;
1035 } 1036 }
1036 1037
1037 /* check entries number */ 1038 /* Check entries number: */
1038 memset(num, 0, sizeof(num)); 1039 memset(num, 0, sizeof(num));
1039 for (i = 0; i < num_var_ranges; i++) { 1040 for (i = 0; i < num_var_ranges; i++) {
1040 type = range_state[i].type; 1041 type = range_state[i].type;
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1046 num[type]++; 1047 num[type]++;
1047 } 1048 }
1048 1049
1049 /* no entry for WB? */ 1050 /* No entry for WB? */
1050 if (!num[MTRR_TYPE_WRBACK]) 1051 if (!num[MTRR_TYPE_WRBACK])
1051 return 0; 1052 return 0;
1052 1053
1053 /* check if we only had WB and UC */ 1054 /* Check if we only had WB and UC: */
1054 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 1055 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1055 num_var_ranges - num[MTRR_NUM_TYPES]) 1056 num_var_ranges - num[MTRR_NUM_TYPES])
1056 return 0; 1057 return 0;
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1066 } 1067 }
1067 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 1068 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1068 1069
1070 /* Check the head: */
1069 total_trim_size = 0; 1071 total_trim_size = 0;
1070 /* check the head */
1071 if (range[0].start) 1072 if (range[0].start)
1072 total_trim_size += real_trim_memory(0, range[0].start); 1073 total_trim_size += real_trim_memory(0, range[0].start);
1073 /* check the holes */ 1074
1075 /* Check the holes: */
1074 for (i = 0; i < nr_range - 1; i++) { 1076 for (i = 0; i < nr_range - 1; i++) {
1075 if (range[i].end + 1 < range[i+1].start) 1077 if (range[i].end + 1 < range[i+1].start)
1076 total_trim_size += real_trim_memory(range[i].end + 1, 1078 total_trim_size += real_trim_memory(range[i].end + 1,
1077 range[i+1].start); 1079 range[i+1].start);
1078 } 1080 }
1079 /* check the top */ 1081
1082 /* Check the top: */
1080 i = nr_range - 1; 1083 i = nr_range - 1;
1081 if (range[i].end + 1 < end_pfn) 1084 if (range[i].end + 1 < end_pfn)
1082 total_trim_size += real_trim_memory(range[i].end + 1, 1085 total_trim_size += real_trim_memory(range[i].end + 1,
1083 end_pfn); 1086 end_pfn);
1084 1087
1085 if (total_trim_size) { 1088 if (total_trim_size) {
1086 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1089 pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
1087 " all of memory, losing %lluMB of RAM.\n",
1088 total_trim_size >> 20);
1089 1090
1090 if (!changed_by_mtrr_cleanup) 1091 if (!changed_by_mtrr_cleanup)
1091 WARN_ON(1); 1092 WARN_ON(1);
1092 1093
1093 printk(KERN_INFO "update e820 for mtrr\n"); 1094 pr_info("update e820 for mtrr\n");
1094 update_e820(); 1095 update_e820();
1095 1096
1096 return 1; 1097 return 1;
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1098 1099
1099 return 0; 1100 return 0;
1100} 1101}
1101
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index ff14c320040c..228d982ce09c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,38 +1,40 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/io.h>
2#include <linux/mm.h> 3#include <linux/mm.h>
3#include <asm/mtrr.h> 4
4#include <asm/msr.h>
5#include <asm/io.h>
6#include <asm/processor-cyrix.h> 5#include <asm/processor-cyrix.h>
7#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
8#include "mtrr.h" 10#include "mtrr.h"
9 11
10static void 12static void
11cyrix_get_arr(unsigned int reg, unsigned long *base, 13cyrix_get_arr(unsigned int reg, unsigned long *base,
12 unsigned long *size, mtrr_type * type) 14 unsigned long *size, mtrr_type * type)
13{ 15{
14 unsigned long flags;
15 unsigned char arr, ccr3, rcr, shift; 16 unsigned char arr, ccr3, rcr, shift;
17 unsigned long flags;
16 18
17 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ 19 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
18 20
19 /* Save flags and disable interrupts */
20 local_irq_save(flags); 21 local_irq_save(flags);
21 22
22 ccr3 = getCx86(CX86_CCR3); 23 ccr3 = getCx86(CX86_CCR3);
23 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 24 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
24 ((unsigned char *) base)[3] = getCx86(arr); 25 ((unsigned char *)base)[3] = getCx86(arr);
25 ((unsigned char *) base)[2] = getCx86(arr + 1); 26 ((unsigned char *)base)[2] = getCx86(arr + 1);
26 ((unsigned char *) base)[1] = getCx86(arr + 2); 27 ((unsigned char *)base)[1] = getCx86(arr + 2);
27 rcr = getCx86(CX86_RCR_BASE + reg); 28 rcr = getCx86(CX86_RCR_BASE + reg);
28 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 29 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
29 30
30 /* Enable interrupts if it was enabled previously */
31 local_irq_restore(flags); 31 local_irq_restore(flags);
32
32 shift = ((unsigned char *) base)[1] & 0x0f; 33 shift = ((unsigned char *) base)[1] & 0x0f;
33 *base >>= PAGE_SHIFT; 34 *base >>= PAGE_SHIFT;
34 35
35 /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 36 /*
37 * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
36 * Note: shift==0xf means 4G, this is unsupported. 38 * Note: shift==0xf means 4G, this is unsupported.
37 */ 39 */
38 if (shift) 40 if (shift)
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
76 } 78 }
77} 79}
78 80
81/*
82 * cyrix_get_free_region - get a free ARR.
83 *
84 * @base: the starting (base) address of the region.
85 * @size: the size (in bytes) of the region.
86 *
87 * Returns: the index of the region on success, else -1 on error.
88*/
79static int 89static int
80cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) 90cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
81/* [SUMMARY] Get a free ARR.
82 <base> The starting (base) address of the region.
83 <size> The size (in bytes) of the region.
84 [RETURNS] The index of the region on success, else -1 on error.
85*/
86{ 91{
87 int i;
88 mtrr_type ltype;
89 unsigned long lbase, lsize; 92 unsigned long lbase, lsize;
93 mtrr_type ltype;
94 int i;
90 95
91 switch (replace_reg) { 96 switch (replace_reg) {
92 case 7: 97 case 7:
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
107 cyrix_get_arr(7, &lbase, &lsize, &ltype); 112 cyrix_get_arr(7, &lbase, &lsize, &ltype);
108 if (lsize == 0) 113 if (lsize == 0)
109 return 7; 114 return 7;
110 /* Else try ARR0-ARR6 first */ 115 /* Else try ARR0-ARR6 first */
111 } else { 116 } else {
112 for (i = 0; i < 7; i++) { 117 for (i = 0; i < 7; i++) {
113 cyrix_get_arr(i, &lbase, &lsize, &ltype); 118 cyrix_get_arr(i, &lbase, &lsize, &ltype);
114 if (lsize == 0) 119 if (lsize == 0)
115 return i; 120 return i;
116 } 121 }
117 /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ 122 /*
123 * ARR0-ARR6 isn't free
124 * try ARR7 but its size must be at least 256K
125 */
118 cyrix_get_arr(i, &lbase, &lsize, &ltype); 126 cyrix_get_arr(i, &lbase, &lsize, &ltype);
119 if ((lsize == 0) && (size >= 0x40)) 127 if ((lsize == 0) && (size >= 0x40))
120 return i; 128 return i;
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
122 return -ENOSPC; 130 return -ENOSPC;
123} 131}
124 132
125static u32 cr4 = 0; 133static u32 cr4, ccr3;
126static u32 ccr3;
127 134
128static void prepare_set(void) 135static void prepare_set(void)
129{ 136{
130 u32 cr0; 137 u32 cr0;
131 138
132 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 139 /* Save value of CR4 and clear Page Global Enable (bit 7) */
133 if ( cpu_has_pge ) { 140 if (cpu_has_pge) {
134 cr4 = read_cr4(); 141 cr4 = read_cr4();
135 write_cr4(cr4 & ~X86_CR4_PGE); 142 write_cr4(cr4 & ~X86_CR4_PGE);
136 } 143 }
137 144
138 /* Disable and flush caches. Note that wbinvd flushes the TLBs as 145 /*
139 a side-effect */ 146 * Disable and flush caches.
147 * Note that wbinvd flushes the TLBs as a side-effect
148 */
140 cr0 = read_cr0() | X86_CR0_CD; 149 cr0 = read_cr0() | X86_CR0_CD;
141 wbinvd(); 150 wbinvd();
142 write_cr0(cr0); 151 write_cr0(cr0);
@@ -147,22 +156,21 @@ static void prepare_set(void)
147 156
148 /* Cyrix ARRs - everything else was excluded at the top */ 157 /* Cyrix ARRs - everything else was excluded at the top */
149 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); 158 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
150
151} 159}
152 160
153static void post_set(void) 161static void post_set(void)
154{ 162{
155 /* Flush caches and TLBs */ 163 /* Flush caches and TLBs */
156 wbinvd(); 164 wbinvd();
157 165
158 /* Cyrix ARRs - everything else was excluded at the top */ 166 /* Cyrix ARRs - everything else was excluded at the top */
159 setCx86(CX86_CCR3, ccr3); 167 setCx86(CX86_CCR3, ccr3);
160 168
161 /* Enable caches */ 169 /* Enable caches */
162 write_cr0(read_cr0() & 0xbfffffff); 170 write_cr0(read_cr0() & 0xbfffffff);
163 171
164 /* Restore value of CR4 */ 172 /* Restore value of CR4 */
165 if ( cpu_has_pge ) 173 if (cpu_has_pge)
166 write_cr4(cr4); 174 write_cr4(cr4);
167} 175}
168 176
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
178 size >>= 6; 186 size >>= 6;
179 187
180 size &= 0x7fff; /* make sure arr_size <= 14 */ 188 size &= 0x7fff; /* make sure arr_size <= 14 */
181 for (arr_size = 0; size; arr_size++, size >>= 1) ; 189 for (arr_size = 0; size; arr_size++, size >>= 1)
190 ;
182 191
183 if (reg < 7) { 192 if (reg < 7) {
184 switch (type) { 193 switch (type) {
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
215 prepare_set(); 224 prepare_set();
216 225
217 base <<= PAGE_SHIFT; 226 base <<= PAGE_SHIFT;
218 setCx86(arr, ((unsigned char *) &base)[3]); 227 setCx86(arr + 0, ((unsigned char *)&base)[3]);
219 setCx86(arr + 1, ((unsigned char *) &base)[2]); 228 setCx86(arr + 1, ((unsigned char *)&base)[2]);
220 setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); 229 setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
221 setCx86(CX86_RCR_BASE + reg, arr_type); 230 setCx86(CX86_RCR_BASE + reg, arr_type);
222 231
223 post_set(); 232 post_set();
224} 233}
225 234
226typedef struct { 235typedef struct {
227 unsigned long base; 236 unsigned long base;
228 unsigned long size; 237 unsigned long size;
229 mtrr_type type; 238 mtrr_type type;
230} arr_state_t; 239} arr_state_t;
231 240
232static arr_state_t arr_state[8] = { 241static arr_state_t arr_state[8] = {
@@ -247,16 +256,17 @@ static void cyrix_set_all(void)
247 setCx86(CX86_CCR0 + i, ccr_state[i]); 256 setCx86(CX86_CCR0 + i, ccr_state[i]);
248 for (; i < 7; i++) 257 for (; i < 7; i++)
249 setCx86(CX86_CCR4 + i, ccr_state[i]); 258 setCx86(CX86_CCR4 + i, ccr_state[i]);
250 for (i = 0; i < 8; i++) 259
251 cyrix_set_arr(i, arr_state[i].base, 260 for (i = 0; i < 8; i++) {
261 cyrix_set_arr(i, arr_state[i].base,
252 arr_state[i].size, arr_state[i].type); 262 arr_state[i].size, arr_state[i].type);
263 }
253 264
254 post_set(); 265 post_set();
255} 266}
256 267
257static struct mtrr_ops cyrix_mtrr_ops = { 268static struct mtrr_ops cyrix_mtrr_ops = {
258 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
259// .init = cyrix_arr_init,
260 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
261 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
262 .get = cyrix_get_arr, 272 .get = cyrix_get_arr,
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void)
270 set_mtrr_ops(&cyrix_mtrr_ops); 280 set_mtrr_ops(&cyrix_mtrr_ops);
271 return 0; 281 return 0;
272} 282}
273
274//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0543f69f0b27..55da0c5f68dd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,28 +1,34 @@
1/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong 1/*
2 because MTRRs can span upto 40 bits (36bits on most modern x86) */ 2 * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
3 * because MTRRs can span upto 40 bits (36bits on most modern x86)
4 */
5#define DEBUG
6
7#include <linux/module.h>
3#include <linux/init.h> 8#include <linux/init.h>
4#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/io.h>
5#include <linux/mm.h> 11#include <linux/mm.h>
6#include <linux/module.h> 12
7#include <asm/io.h>
8#include <asm/mtrr.h>
9#include <asm/msr.h>
10#include <asm/system.h>
11#include <asm/cpufeature.h>
12#include <asm/processor-flags.h> 13#include <asm/processor-flags.h>
14#include <asm/cpufeature.h>
13#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/system.h>
17#include <asm/mtrr.h>
18#include <asm/msr.h>
14#include <asm/pat.h> 19#include <asm/pat.h>
20
15#include "mtrr.h" 21#include "mtrr.h"
16 22
17struct fixed_range_block { 23struct fixed_range_block {
18 int base_msr; /* start address of an MTRR block */ 24 int base_msr; /* start address of an MTRR block */
19 int ranges; /* number of MTRRs in this block */ 25 int ranges; /* number of MTRRs in this block */
20}; 26};
21 27
22static struct fixed_range_block fixed_range_blocks[] = { 28static struct fixed_range_block fixed_range_blocks[] = {
23 { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ 29 { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
24 { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ 30 { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
25 { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ 31 { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
26 {} 32 {}
27}; 33};
28 34
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask;
30static int mtrr_state_set; 36static int mtrr_state_set;
31u64 mtrr_tom2; 37u64 mtrr_tom2;
32 38
33struct mtrr_state_type mtrr_state = {}; 39struct mtrr_state_type mtrr_state;
34EXPORT_SYMBOL_GPL(mtrr_state); 40EXPORT_SYMBOL_GPL(mtrr_state);
35 41
36/** 42/*
37 * BIOS is expected to clear MtrrFixDramModEn bit, see for example 43 * BIOS is expected to clear MtrrFixDramModEn bit, see for example
38 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD 44 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
39 * Opteron Processors" (26094 Rev. 3.30 February 2006), section 45 * Opteron Processors" (26094 Rev. 3.30 February 2006), section
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
104 * Look of multiple ranges matching this address and pick type 110 * Look of multiple ranges matching this address and pick type
105 * as per MTRR precedence 111 * as per MTRR precedence
106 */ 112 */
107 if (!(mtrr_state.enabled & 2)) { 113 if (!(mtrr_state.enabled & 2))
108 return mtrr_state.def_type; 114 return mtrr_state.def_type;
109 }
110 115
111 prev_match = 0xFF; 116 prev_match = 0xFF;
112 for (i = 0; i < num_var_ranges; ++i) { 117 for (i = 0; i < num_var_ranges; ++i) {
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
125 if (start_state != end_state) 130 if (start_state != end_state)
126 return 0xFE; 131 return 0xFE;
127 132
128 if ((start & mask) != (base & mask)) { 133 if ((start & mask) != (base & mask))
129 continue; 134 continue;
130 }
131 135
132 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; 136 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
133 if (prev_match == 0xFF) { 137 if (prev_match == 0xFF) {
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
148 curr_match = MTRR_TYPE_WRTHROUGH; 152 curr_match = MTRR_TYPE_WRTHROUGH;
149 } 153 }
150 154
151 if (prev_match != curr_match) { 155 if (prev_match != curr_match)
152 return MTRR_TYPE_UNCACHABLE; 156 return MTRR_TYPE_UNCACHABLE;
153 }
154 } 157 }
155 158
156 if (mtrr_tom2) { 159 if (mtrr_tom2) {
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end)
164 return mtrr_state.def_type; 167 return mtrr_state.def_type;
165} 168}
166 169
167/* Get the MSR pair relating to a var range */ 170/* Get the MSR pair relating to a var range */
168static void 171static void
169get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) 172get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
170{ 173{
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
172 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 175 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
173} 176}
174 177
175/* fill the MSR pair relating to a var range */ 178/* Fill the MSR pair relating to a var range */
176void fill_mtrr_var_range(unsigned int index, 179void fill_mtrr_var_range(unsigned int index,
177 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) 180 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
178{ 181{
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index,
186 vr[index].mask_hi = mask_hi; 189 vr[index].mask_hi = mask_hi;
187} 190}
188 191
189static void 192static void get_fixed_ranges(mtrr_type *frs)
190get_fixed_ranges(mtrr_type * frs)
191{ 193{
192 unsigned int *p = (unsigned int *) frs; 194 unsigned int *p = (unsigned int *)frs;
193 int i; 195 int i;
194 196
195 k8_check_syscfg_dram_mod_en(); 197 k8_check_syscfg_dram_mod_en();
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void)
217 if (!last_fixed_end) 219 if (!last_fixed_end)
218 return; 220 return;
219 221
220 printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, 222 pr_debug(" %05X-%05X %s\n", last_fixed_start,
221 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); 223 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
222 224
223 last_fixed_end = 0; 225 last_fixed_end = 0;
224} 226}
225 227
226static void __init update_fixed_last(unsigned base, unsigned end, 228static void __init update_fixed_last(unsigned base, unsigned end,
227 mtrr_type type) 229 mtrr_type type)
228{ 230{
229 last_fixed_start = base; 231 last_fixed_start = base;
230 last_fixed_end = end; 232 last_fixed_end = end;
231 last_fixed_type = type; 233 last_fixed_type = type;
232} 234}
233 235
234static void __init print_fixed(unsigned base, unsigned step, 236static void __init
235 const mtrr_type *types) 237print_fixed(unsigned base, unsigned step, const mtrr_type *types)
236{ 238{
237 unsigned i; 239 unsigned i;
238 240
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void)
259 unsigned int i; 261 unsigned int i;
260 int high_width; 262 int high_width;
261 263
262 printk(KERN_DEBUG "MTRR default type: %s\n", 264 pr_debug("MTRR default type: %s\n",
263 mtrr_attrib_to_str(mtrr_state.def_type)); 265 mtrr_attrib_to_str(mtrr_state.def_type));
264 if (mtrr_state.have_fixed) { 266 if (mtrr_state.have_fixed) {
265 printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", 267 pr_debug("MTRR fixed ranges %sabled:\n",
266 mtrr_state.enabled & 1 ? "en" : "dis"); 268 mtrr_state.enabled & 1 ? "en" : "dis");
267 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); 269 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
268 for (i = 0; i < 2; ++i) 270 for (i = 0; i < 2; ++i)
269 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); 271 print_fixed(0x80000 + i * 0x20000, 0x04000,
272 mtrr_state.fixed_ranges + (i + 1) * 8);
270 for (i = 0; i < 8; ++i) 273 for (i = 0; i < 8; ++i)
271 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); 274 print_fixed(0xC0000 + i * 0x08000, 0x01000,
275 mtrr_state.fixed_ranges + (i + 3) * 8);
272 276
273 /* tail */ 277 /* tail */
274 print_fixed_last(); 278 print_fixed_last();
275 } 279 }
276 printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", 280 pr_debug("MTRR variable ranges %sabled:\n",
277 mtrr_state.enabled & 2 ? "en" : "dis"); 281 mtrr_state.enabled & 2 ? "en" : "dis");
278 if (size_or_mask & 0xffffffffUL) 282 if (size_or_mask & 0xffffffffUL)
279 high_width = ffs(size_or_mask & 0xffffffffUL) - 1; 283 high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
280 else 284 else
281 high_width = ffs(size_or_mask>>32) + 32 - 1; 285 high_width = ffs(size_or_mask>>32) + 32 - 1;
282 high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; 286 high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
287
283 for (i = 0; i < num_var_ranges; ++i) { 288 for (i = 0; i < num_var_ranges; ++i) {
284 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) 289 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
285 printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", 290 pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
286 i, 291 i,
287 high_width, 292 high_width,
288 mtrr_state.var_ranges[i].base_hi, 293 mtrr_state.var_ranges[i].base_hi,
289 mtrr_state.var_ranges[i].base_lo >> 12, 294 mtrr_state.var_ranges[i].base_lo >> 12,
290 high_width, 295 high_width,
291 mtrr_state.var_ranges[i].mask_hi, 296 mtrr_state.var_ranges[i].mask_hi,
292 mtrr_state.var_ranges[i].mask_lo >> 12, 297 mtrr_state.var_ranges[i].mask_lo >> 12,
293 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); 298 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
294 else 299 else
295 printk(KERN_DEBUG " %u disabled\n", i); 300 pr_debug(" %u disabled\n", i);
296 }
297 if (mtrr_tom2) {
298 printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
299 mtrr_tom2, mtrr_tom2>>20);
300 } 301 }
302 if (mtrr_tom2)
303 pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
301} 304}
302 305
303/* Grab all of the MTRR state for this CPU into *state */ 306/* Grab all of the MTRR state for this CPU into *state */
304void __init get_mtrr_state(void) 307void __init get_mtrr_state(void)
305{ 308{
306 unsigned int i;
307 struct mtrr_var_range *vrs; 309 struct mtrr_var_range *vrs;
308 unsigned lo, dummy;
309 unsigned long flags; 310 unsigned long flags;
311 unsigned lo, dummy;
312 unsigned int i;
310 313
311 vrs = mtrr_state.var_ranges; 314 vrs = mtrr_state.var_ranges;
312 315
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void)
324 327
325 if (amd_special_default_mtrr()) { 328 if (amd_special_default_mtrr()) {
326 unsigned low, high; 329 unsigned low, high;
330
327 /* TOP_MEM2 */ 331 /* TOP_MEM2 */
328 rdmsr(MSR_K8_TOP_MEM2, low, high); 332 rdmsr(MSR_K8_TOP_MEM2, low, high);
329 mtrr_tom2 = high; 333 mtrr_tom2 = high;
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void)
344 348
345 post_set(); 349 post_set();
346 local_irq_restore(flags); 350 local_irq_restore(flags);
347
348} 351}
349 352
350/* Some BIOS's are fucked and don't set all MTRRs the same! */ 353/* Some BIOS's are messed up and don't set all MTRRs the same! */
351void __init mtrr_state_warn(void) 354void __init mtrr_state_warn(void)
352{ 355{
353 unsigned long mask = smp_changes_mask; 356 unsigned long mask = smp_changes_mask;
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void)
355 if (!mask) 358 if (!mask)
356 return; 359 return;
357 if (mask & MTRR_CHANGE_MASK_FIXED) 360 if (mask & MTRR_CHANGE_MASK_FIXED)
358 printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); 361 pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
359 if (mask & MTRR_CHANGE_MASK_VARIABLE) 362 if (mask & MTRR_CHANGE_MASK_VARIABLE)
360 printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); 363 pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
361 if (mask & MTRR_CHANGE_MASK_DEFTYPE) 364 if (mask & MTRR_CHANGE_MASK_DEFTYPE)
362 printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); 365 pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
366
363 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); 367 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
364 printk(KERN_INFO "mtrr: corrected configuration.\n"); 368 printk(KERN_INFO "mtrr: corrected configuration.\n");
365} 369}
366 370
367/* Doesn't attempt to pass an error out to MTRR users 371/*
368 because it's quite complicated in some cases and probably not 372 * Doesn't attempt to pass an error out to MTRR users
369 worth it because the best error handling is to ignore it. */ 373 * because it's quite complicated in some cases and probably not
374 * worth it because the best error handling is to ignore it.
375 */
370void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) 376void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
371{ 377{
372 if (wrmsr_safe(msr, a, b) < 0) 378 if (wrmsr_safe(msr, a, b) < 0) {
373 printk(KERN_ERR 379 printk(KERN_ERR
374 "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", 380 "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
375 smp_processor_id(), msr, a, b); 381 smp_processor_id(), msr, a, b);
382 }
376} 383}
377 384
378/** 385/**
379 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have 386 * set_fixed_range - checks & updates a fixed-range MTRR if it
387 * differs from the value it should have
380 * @msr: MSR address of the MTTR which should be checked and updated 388 * @msr: MSR address of the MTTR which should be checked and updated
381 * @changed: pointer which indicates whether the MTRR needed to be changed 389 * @changed: pointer which indicates whether the MTRR needed to be changed
382 * @msrwords: pointer to the MSR values which the MSR should have 390 * @msrwords: pointer to the MSR values which the MSR should have
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
401 * 409 *
402 * Returns: The index of the region on success, else negative on error. 410 * Returns: The index of the region on success, else negative on error.
403 */ 411 */
404int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) 412int
413generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
405{ 414{
406 int i, max;
407 mtrr_type ltype;
408 unsigned long lbase, lsize; 415 unsigned long lbase, lsize;
416 mtrr_type ltype;
417 int i, max;
409 418
410 max = num_var_ranges; 419 max = num_var_ranges;
411 if (replace_reg >= 0 && replace_reg < max) 420 if (replace_reg >= 0 && replace_reg < max)
412 return replace_reg; 421 return replace_reg;
422
413 for (i = 0; i < max; ++i) { 423 for (i = 0; i < max; ++i) {
414 mtrr_if->get(i, &lbase, &lsize, &ltype); 424 mtrr_if->get(i, &lbase, &lsize, &ltype);
415 if (lsize == 0) 425 if (lsize == 0)
416 return i; 426 return i;
417 } 427 }
428
418 return -ENOSPC; 429 return -ENOSPC;
419} 430}
420 431
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
434 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 445 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
435 446
436 if ((mask_lo & 0x800) == 0) { 447 if ((mask_lo & 0x800) == 0) {
437 /* Invalid (i.e. free) range */ 448 /* Invalid (i.e. free) range */
438 *base = 0; 449 *base = 0;
439 *size = 0; 450 *size = 0;
440 *type = 0; 451 *type = 0;
@@ -471,27 +482,31 @@ out_put_cpu:
471} 482}
472 483
473/** 484/**
474 * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set 485 * set_fixed_ranges - checks & updates the fixed-range MTRRs if they
486 * differ from the saved set
475 * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() 487 * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
476 */ 488 */
477static int set_fixed_ranges(mtrr_type * frs) 489static int set_fixed_ranges(mtrr_type *frs)
478{ 490{
479 unsigned long long *saved = (unsigned long long *) frs; 491 unsigned long long *saved = (unsigned long long *)frs;
480 bool changed = false; 492 bool changed = false;
481 int block=-1, range; 493 int block = -1, range;
482 494
483 k8_check_syscfg_dram_mod_en(); 495 k8_check_syscfg_dram_mod_en();
484 496
485 while (fixed_range_blocks[++block].ranges) 497 while (fixed_range_blocks[++block].ranges) {
486 for (range=0; range < fixed_range_blocks[block].ranges; range++) 498 for (range = 0; range < fixed_range_blocks[block].ranges; range++)
487 set_fixed_range(fixed_range_blocks[block].base_msr + range, 499 set_fixed_range(fixed_range_blocks[block].base_msr + range,
488 &changed, (unsigned int *) saved++); 500 &changed, (unsigned int *)saved++);
501 }
489 502
490 return changed; 503 return changed;
491} 504}
492 505
493/* Set the MSR pair relating to a var range. Returns TRUE if 506/*
494 changes are made */ 507 * Set the MSR pair relating to a var range.
508 * Returns true if changes are made.
509 */
495static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) 510static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
496{ 511{
497 unsigned int lo, hi; 512 unsigned int lo, hi;
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
501 if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) 516 if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
502 || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != 517 || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
503 (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { 518 (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
519
504 mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); 520 mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
505 changed = true; 521 changed = true;
506 } 522 }
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi;
526 */ 542 */
527static unsigned long set_mtrr_state(void) 543static unsigned long set_mtrr_state(void)
528{ 544{
529 unsigned int i;
530 unsigned long change_mask = 0; 545 unsigned long change_mask = 0;
546 unsigned int i;
531 547
532 for (i = 0; i < num_var_ranges; i++) 548 for (i = 0; i < num_var_ranges; i++) {
533 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) 549 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
534 change_mask |= MTRR_CHANGE_MASK_VARIABLE; 550 change_mask |= MTRR_CHANGE_MASK_VARIABLE;
551 }
535 552
536 if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) 553 if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
537 change_mask |= MTRR_CHANGE_MASK_FIXED; 554 change_mask |= MTRR_CHANGE_MASK_FIXED;
538 555
539 /* Set_mtrr_restore restores the old value of MTRRdefType, 556 /*
540 so to set it we fiddle with the saved value */ 557 * Set_mtrr_restore restores the old value of MTRRdefType,
558 * so to set it we fiddle with the saved value:
559 */
541 if ((deftype_lo & 0xff) != mtrr_state.def_type 560 if ((deftype_lo & 0xff) != mtrr_state.def_type
542 || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { 561 || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
543 deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); 562
563 deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
564 (mtrr_state.enabled << 10);
544 change_mask |= MTRR_CHANGE_MASK_DEFTYPE; 565 change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
545 } 566 }
546 567
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void)
548} 569}
549 570
550 571
551static unsigned long cr4 = 0; 572static unsigned long cr4;
552static DEFINE_SPINLOCK(set_atomicity_lock); 573static DEFINE_SPINLOCK(set_atomicity_lock);
553 574
554/* 575/*
555 * Since we are disabling the cache don't allow any interrupts - they 576 * Since we are disabling the cache don't allow any interrupts,
556 * would run extremely slow and would only increase the pain. The caller must 577 * they would run extremely slow and would only increase the pain.
557 * ensure that local interrupts are disabled and are reenabled after post_set() 578 *
558 * has been called. 579 * The caller must ensure that local interrupts are disabled and
580 * are reenabled after post_set() has been called.
559 */ 581 */
560
561static void prepare_set(void) __acquires(set_atomicity_lock) 582static void prepare_set(void) __acquires(set_atomicity_lock)
562{ 583{
563 unsigned long cr0; 584 unsigned long cr0;
564 585
565 /* Note that this is not ideal, since the cache is only flushed/disabled 586 /*
566 for this CPU while the MTRRs are changed, but changing this requires 587 * Note that this is not ideal
567 more invasive changes to the way the kernel boots */ 588 * since the cache is only flushed/disabled for this CPU while the
589 * MTRRs are changed, but changing this requires more invasive
590 * changes to the way the kernel boots
591 */
568 592
569 spin_lock(&set_atomicity_lock); 593 spin_lock(&set_atomicity_lock);
570 594
571 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
572 cr0 = read_cr0() | X86_CR0_CD; 596 cr0 = read_cr0() | X86_CR0_CD;
573 write_cr0(cr0); 597 write_cr0(cr0);
574 wbinvd(); 598 wbinvd();
575 599
576 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 600 /* Save value of CR4 and clear Page Global Enable (bit 7) */
577 if ( cpu_has_pge ) { 601 if (cpu_has_pge) {
578 cr4 = read_cr4(); 602 cr4 = read_cr4();
579 write_cr4(cr4 & ~X86_CR4_PGE); 603 write_cr4(cr4 & ~X86_CR4_PGE);
580 } 604 }
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
582 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 606 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
583 __flush_tlb(); 607 __flush_tlb();
584 608
585 /* Save MTRR state */ 609 /* Save MTRR state */
586 rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); 610 rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
587 611
588 /* Disable MTRRs, and set the default type to uncached */ 612 /* Disable MTRRs, and set the default type to uncached */
589 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); 613 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
590} 614}
591 615
592static void post_set(void) __releases(set_atomicity_lock) 616static void post_set(void) __releases(set_atomicity_lock)
593{ 617{
594 /* Flush TLBs (no need to flush caches - they are disabled) */ 618 /* Flush TLBs (no need to flush caches - they are disabled) */
595 __flush_tlb(); 619 __flush_tlb();
596 620
597 /* Intel (P6) standard MTRRs */ 621 /* Intel (P6) standard MTRRs */
598 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); 622 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
599 623
600 /* Enable caches */ 624 /* Enable caches */
601 write_cr0(read_cr0() & 0xbfffffff); 625 write_cr0(read_cr0() & 0xbfffffff);
602 626
603 /* Restore value of CR4 */ 627 /* Restore value of CR4 */
604 if ( cpu_has_pge ) 628 if (cpu_has_pge)
605 write_cr4(cr4); 629 write_cr4(cr4);
606 spin_unlock(&set_atomicity_lock); 630 spin_unlock(&set_atomicity_lock);
607} 631}
@@ -623,24 +647,27 @@ static void generic_set_all(void)
623 post_set(); 647 post_set();
624 local_irq_restore(flags); 648 local_irq_restore(flags);
625 649
626 /* Use the atomic bitops to update the global mask */ 650 /* Use the atomic bitops to update the global mask */
627 for (count = 0; count < sizeof mask * 8; ++count) { 651 for (count = 0; count < sizeof mask * 8; ++count) {
628 if (mask & 0x01) 652 if (mask & 0x01)
629 set_bit(count, &smp_changes_mask); 653 set_bit(count, &smp_changes_mask);
630 mask >>= 1; 654 mask >>= 1;
631 } 655 }
632 656
633} 657}
634 658
659/**
660 * generic_set_mtrr - set variable MTRR register on the local CPU.
661 *
662 * @reg: The register to set.
663 * @base: The base address of the region.
664 * @size: The size of the region. If this is 0 the region is disabled.
665 * @type: The type of the region.
666 *
667 * Returns nothing.
668 */
635static void generic_set_mtrr(unsigned int reg, unsigned long base, 669static void generic_set_mtrr(unsigned int reg, unsigned long base,
636 unsigned long size, mtrr_type type) 670 unsigned long size, mtrr_type type)
637/* [SUMMARY] Set variable MTRR register on the local CPU.
638 <reg> The register to set.
639 <base> The base address of the region.
640 <size> The size of the region. If this is 0 the region is disabled.
641 <type> The type of the region.
642 [RETURNS] Nothing.
643*/
644{ 671{
645 unsigned long flags; 672 unsigned long flags;
646 struct mtrr_var_range *vr; 673 struct mtrr_var_range *vr;
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
651 prepare_set(); 678 prepare_set();
652 679
653 if (size == 0) { 680 if (size == 0) {
654 /* The invalid bit is kept in the mask, so we simply clear the 681 /*
655 relevant mask register to disable a range. */ 682 * The invalid bit is kept in the mask, so we simply
683 * clear the relevant mask register to disable a range.
684 */
656 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); 685 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
657 memset(vr, 0, sizeof(struct mtrr_var_range)); 686 memset(vr, 0, sizeof(struct mtrr_var_range));
658 } else { 687 } else {
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
669 local_irq_restore(flags); 698 local_irq_restore(flags);
670} 699}
671 700
672int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) 701int generic_validate_add_page(unsigned long base, unsigned long size,
702 unsigned int type)
673{ 703{
674 unsigned long lbase, last; 704 unsigned long lbase, last;
675 705
676 /* For Intel PPro stepping <= 7, must be 4 MiB aligned 706 /*
677 and not touch 0x70000000->0x7003FFFF */ 707 * For Intel PPro stepping <= 7
708 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
709 */
678 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && 710 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
679 boot_cpu_data.x86_model == 1 && 711 boot_cpu_data.x86_model == 1 &&
680 boot_cpu_data.x86_mask <= 7) { 712 boot_cpu_data.x86_mask <= 7) {
681 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { 713 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
682 printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); 714 pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
683 return -EINVAL; 715 return -EINVAL;
684 } 716 }
685 if (!(base + size < 0x70000 || base > 0x7003F) && 717 if (!(base + size < 0x70000 || base > 0x7003F) &&
686 (type == MTRR_TYPE_WRCOMB 718 (type == MTRR_TYPE_WRCOMB
687 || type == MTRR_TYPE_WRBACK)) { 719 || type == MTRR_TYPE_WRBACK)) {
688 printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); 720 pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
689 return -EINVAL; 721 return -EINVAL;
690 } 722 }
691 } 723 }
692 724
693 /* Check upper bits of base and last are equal and lower bits are 0 725 /*
694 for base and 1 for last */ 726 * Check upper bits of base and last are equal and lower bits are 0
727 * for base and 1 for last
728 */
695 last = base + size - 1; 729 last = base + size - 1;
696 for (lbase = base; !(lbase & 1) && (last & 1); 730 for (lbase = base; !(lbase & 1) && (last & 1);
697 lbase = lbase >> 1, last = last >> 1) ; 731 lbase = lbase >> 1, last = last >> 1)
732 ;
698 if (lbase != last) { 733 if (lbase != last) {
699 printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", 734 pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
700 base, size);
701 return -EINVAL; 735 return -EINVAL;
702 } 736 }
703 return 0; 737 return 0;
704} 738}
705 739
706
707static int generic_have_wrcomb(void) 740static int generic_have_wrcomb(void)
708{ 741{
709 unsigned long config, dummy; 742 unsigned long config, dummy;
710 rdmsr(MSR_MTRRcap, config, dummy); 743 rdmsr(MSR_MTRRcap, config, dummy);
711 return (config & (1 << 10)); 744 return config & (1 << 10);
712} 745}
713 746
714int positive_have_wrcomb(void) 747int positive_have_wrcomb(void)
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void)
716 return 1; 749 return 1;
717} 750}
718 751
719/* generic structure... 752/*
753 * Generic structure...
720 */ 754 */
721struct mtrr_ops generic_mtrr_ops = { 755struct mtrr_ops generic_mtrr_ops = {
722 .use_intel_if = 1, 756 .use_intel_if = 1,
723 .set_all = generic_set_all, 757 .set_all = generic_set_all,
724 .get = generic_get_mtrr, 758 .get = generic_get_mtrr,
725 .get_free_region = generic_get_free_region, 759 .get_free_region = generic_get_free_region,
726 .set = generic_set_mtrr, 760 .set = generic_set_mtrr,
727 .validate_add_page = generic_validate_add_page, 761 .validate_add_page = generic_validate_add_page,
728 .have_wrcomb = generic_have_wrcomb, 762 .have_wrcomb = generic_have_wrcomb,
729}; 763};
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index fb73a52913a4..08b6ea4c62b4 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,27 +1,28 @@
1#include <linux/init.h>
2#include <linux/proc_fs.h>
3#include <linux/capability.h> 1#include <linux/capability.h>
4#include <linux/ctype.h>
5#include <linux/module.h>
6#include <linux/seq_file.h> 2#include <linux/seq_file.h>
7#include <asm/uaccess.h> 3#include <linux/uaccess.h>
4#include <linux/proc_fs.h>
5#include <linux/module.h>
6#include <linux/ctype.h>
7#include <linux/init.h>
8 8
9#define LINE_SIZE 80 9#define LINE_SIZE 80
10 10
11#include <asm/mtrr.h> 11#include <asm/mtrr.h>
12
12#include "mtrr.h" 13#include "mtrr.h"
13 14
14#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) 15#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
15 16
16static const char *const mtrr_strings[MTRR_NUM_TYPES] = 17static const char *const mtrr_strings[MTRR_NUM_TYPES] =
17{ 18{
18 "uncachable", /* 0 */ 19 "uncachable", /* 0 */
19 "write-combining", /* 1 */ 20 "write-combining", /* 1 */
20 "?", /* 2 */ 21 "?", /* 2 */
21 "?", /* 3 */ 22 "?", /* 3 */
22 "write-through", /* 4 */ 23 "write-through", /* 4 */
23 "write-protect", /* 5 */ 24 "write-protect", /* 5 */
24 "write-back", /* 6 */ 25 "write-back", /* 6 */
25}; 26};
26 27
27const char *mtrr_attrib_to_str(int x) 28const char *mtrr_attrib_to_str(int x)
@@ -35,8 +36,8 @@ static int
35mtrr_file_add(unsigned long base, unsigned long size, 36mtrr_file_add(unsigned long base, unsigned long size,
36 unsigned int type, bool increment, struct file *file, int page) 37 unsigned int type, bool increment, struct file *file, int page)
37{ 38{
39 unsigned int *fcount = FILE_FCOUNT(file);
38 int reg, max; 40 int reg, max;
39 unsigned int *fcount = FILE_FCOUNT(file);
40 41
41 max = num_var_ranges; 42 max = num_var_ranges;
42 if (fcount == NULL) { 43 if (fcount == NULL) {
@@ -61,8 +62,8 @@ static int
61mtrr_file_del(unsigned long base, unsigned long size, 62mtrr_file_del(unsigned long base, unsigned long size,
62 struct file *file, int page) 63 struct file *file, int page)
63{ 64{
64 int reg;
65 unsigned int *fcount = FILE_FCOUNT(file); 65 unsigned int *fcount = FILE_FCOUNT(file);
66 int reg;
66 67
67 if (!page) { 68 if (!page) {
68 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) 69 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size,
81 return reg; 82 return reg;
82} 83}
83 84
84/* RED-PEN: seq_file can seek now. this is ignored. */ 85/*
86 * seq_file can seek but we ignore it.
87 *
88 * Format of control line:
89 * "base=%Lx size=%Lx type=%s" or "disable=%d"
90 */
85static ssize_t 91static ssize_t
86mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) 92mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
87/* Format of control line:
88 "base=%Lx size=%Lx type=%s" OR:
89 "disable=%d"
90*/
91{ 93{
92 int i, err; 94 int i, err;
93 unsigned long reg; 95 unsigned long reg;
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
100 return -EPERM; 102 return -EPERM;
101 if (!len) 103 if (!len)
102 return -EINVAL; 104 return -EINVAL;
105
103 memset(line, 0, LINE_SIZE); 106 memset(line, 0, LINE_SIZE);
104 if (len > LINE_SIZE) 107 if (len > LINE_SIZE)
105 len = LINE_SIZE; 108 len = LINE_SIZE;
106 if (copy_from_user(line, buf, len - 1)) 109 if (copy_from_user(line, buf, len - 1))
107 return -EFAULT; 110 return -EFAULT;
111
108 linelen = strlen(line); 112 linelen = strlen(line);
109 ptr = line + linelen - 1; 113 ptr = line + linelen - 1;
110 if (linelen && *ptr == '\n') 114 if (linelen && *ptr == '\n')
111 *ptr = '\0'; 115 *ptr = '\0';
116
112 if (!strncmp(line, "disable=", 8)) { 117 if (!strncmp(line, "disable=", 8)) {
113 reg = simple_strtoul(line + 8, &ptr, 0); 118 reg = simple_strtoul(line + 8, &ptr, 0);
114 err = mtrr_del_page(reg, 0, 0); 119 err = mtrr_del_page(reg, 0, 0);
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
116 return err; 121 return err;
117 return len; 122 return len;
118 } 123 }
124
119 if (strncmp(line, "base=", 5)) 125 if (strncmp(line, "base=", 5))
120 return -EINVAL; 126 return -EINVAL;
127
121 base = simple_strtoull(line + 5, &ptr, 0); 128 base = simple_strtoull(line + 5, &ptr, 0);
122 for (; isspace(*ptr); ++ptr) ; 129 for (; isspace(*ptr); ++ptr)
130 ;
131
123 if (strncmp(ptr, "size=", 5)) 132 if (strncmp(ptr, "size=", 5))
124 return -EINVAL; 133 return -EINVAL;
134
125 size = simple_strtoull(ptr + 5, &ptr, 0); 135 size = simple_strtoull(ptr + 5, &ptr, 0);
126 if ((base & 0xfff) || (size & 0xfff)) 136 if ((base & 0xfff) || (size & 0xfff))
127 return -EINVAL; 137 return -EINVAL;
128 for (; isspace(*ptr); ++ptr) ; 138 for (; isspace(*ptr); ++ptr)
139 ;
140
129 if (strncmp(ptr, "type=", 5)) 141 if (strncmp(ptr, "type=", 5))
130 return -EINVAL; 142 return -EINVAL;
131 ptr += 5; 143 ptr += 5;
132 for (; isspace(*ptr); ++ptr) ; 144 for (; isspace(*ptr); ++ptr)
145 ;
146
133 for (i = 0; i < MTRR_NUM_TYPES; ++i) { 147 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
134 if (strcmp(ptr, mtrr_strings[i])) 148 if (strcmp(ptr, mtrr_strings[i]))
135 continue; 149 continue;
136 base >>= PAGE_SHIFT; 150 base >>= PAGE_SHIFT;
137 size >>= PAGE_SHIFT; 151 size >>= PAGE_SHIFT;
138 err = 152 err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
139 mtrr_add_page((unsigned long) base, (unsigned long) size, i,
140 true);
141 if (err < 0) 153 if (err < 0)
142 return err; 154 return err;
143 return len; 155 return len;
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
181 case MTRRIOC32_SET_PAGE_ENTRY: 193 case MTRRIOC32_SET_PAGE_ENTRY:
182 case MTRRIOC32_DEL_PAGE_ENTRY: 194 case MTRRIOC32_DEL_PAGE_ENTRY:
183 case MTRRIOC32_KILL_PAGE_ENTRY: { 195 case MTRRIOC32_KILL_PAGE_ENTRY: {
184 struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; 196 struct mtrr_sentry32 __user *s32;
197
198 s32 = (struct mtrr_sentry32 __user *)__arg;
185 err = get_user(sentry.base, &s32->base); 199 err = get_user(sentry.base, &s32->base);
186 err |= get_user(sentry.size, &s32->size); 200 err |= get_user(sentry.size, &s32->size);
187 err |= get_user(sentry.type, &s32->type); 201 err |= get_user(sentry.type, &s32->type);
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
191 } 205 }
192 case MTRRIOC32_GET_ENTRY: 206 case MTRRIOC32_GET_ENTRY:
193 case MTRRIOC32_GET_PAGE_ENTRY: { 207 case MTRRIOC32_GET_PAGE_ENTRY: {
194 struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; 208 struct mtrr_gentry32 __user *g32;
209
210 g32 = (struct mtrr_gentry32 __user *)__arg;
195 err = get_user(gentry.regnum, &g32->regnum); 211 err = get_user(gentry.regnum, &g32->regnum);
196 err |= get_user(gentry.base, &g32->base); 212 err |= get_user(gentry.base, &g32->base);
197 err |= get_user(gentry.size, &g32->size); 213 err |= get_user(gentry.size, &g32->size);
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
314 if (err) 330 if (err)
315 return err; 331 return err;
316 332
317 switch(cmd) { 333 switch (cmd) {
318 case MTRRIOC_GET_ENTRY: 334 case MTRRIOC_GET_ENTRY:
319 case MTRRIOC_GET_PAGE_ENTRY: 335 case MTRRIOC_GET_PAGE_ENTRY:
320 if (copy_to_user(arg, &gentry, sizeof gentry)) 336 if (copy_to_user(arg, &gentry, sizeof gentry))
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
323#ifdef CONFIG_COMPAT 339#ifdef CONFIG_COMPAT
324 case MTRRIOC32_GET_ENTRY: 340 case MTRRIOC32_GET_ENTRY:
325 case MTRRIOC32_GET_PAGE_ENTRY: { 341 case MTRRIOC32_GET_PAGE_ENTRY: {
326 struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; 342 struct mtrr_gentry32 __user *g32;
343
344 g32 = (struct mtrr_gentry32 __user *)__arg;
327 err = put_user(gentry.base, &g32->base); 345 err = put_user(gentry.base, &g32->base);
328 err |= put_user(gentry.size, &g32->size); 346 err |= put_user(gentry.size, &g32->size);
329 err |= put_user(gentry.regnum, &g32->regnum); 347 err |= put_user(gentry.regnum, &g32->regnum);
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
335 return err; 353 return err;
336} 354}
337 355
338static int 356static int mtrr_close(struct inode *ino, struct file *file)
339mtrr_close(struct inode *ino, struct file *file)
340{ 357{
341 int i, max;
342 unsigned int *fcount = FILE_FCOUNT(file); 358 unsigned int *fcount = FILE_FCOUNT(file);
359 int i, max;
343 360
344 if (fcount != NULL) { 361 if (fcount != NULL) {
345 max = num_var_ranges; 362 max = num_var_ranges;
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset);
359 376
360static int mtrr_open(struct inode *inode, struct file *file) 377static int mtrr_open(struct inode *inode, struct file *file)
361{ 378{
362 if (!mtrr_if) 379 if (!mtrr_if)
363 return -EIO; 380 return -EIO;
364 if (!mtrr_if->get) 381 if (!mtrr_if->get)
365 return -ENXIO; 382 return -ENXIO;
366 return single_open(file, mtrr_seq_show, NULL); 383 return single_open(file, mtrr_seq_show, NULL);
367} 384}
368 385
369static const struct file_operations mtrr_fops = { 386static const struct file_operations mtrr_fops = {
370 .owner = THIS_MODULE, 387 .owner = THIS_MODULE,
371 .open = mtrr_open, 388 .open = mtrr_open,
372 .read = seq_read, 389 .read = seq_read,
373 .llseek = seq_lseek, 390 .llseek = seq_lseek,
374 .write = mtrr_write, 391 .write = mtrr_write,
375 .unlocked_ioctl = mtrr_ioctl, 392 .unlocked_ioctl = mtrr_ioctl,
376 .compat_ioctl = mtrr_ioctl, 393 .compat_ioctl = mtrr_ioctl,
377 .release = mtrr_close, 394 .release = mtrr_close,
378}; 395};
379 396
380static int mtrr_seq_show(struct seq_file *seq, void *offset) 397static int mtrr_seq_show(struct seq_file *seq, void *offset)
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
388 max = num_var_ranges; 405 max = num_var_ranges;
389 for (i = 0; i < max; i++) { 406 for (i = 0; i < max; i++) {
390 mtrr_if->get(i, &base, &size, &type); 407 mtrr_if->get(i, &base, &size, &type);
391 if (size == 0) 408 if (size == 0) {
392 mtrr_usage_table[i] = 0; 409 mtrr_usage_table[i] = 0;
393 else { 410 continue;
394 if (size < (0x100000 >> PAGE_SHIFT)) {
395 /* less than 1MB */
396 factor = 'K';
397 size <<= PAGE_SHIFT - 10;
398 } else {
399 factor = 'M';
400 size >>= 20 - PAGE_SHIFT;
401 }
402 /* RED-PEN: base can be > 32bit */
403 len += seq_printf(seq,
404 "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
405 i, base, base >> (20 - PAGE_SHIFT), size, factor,
406 mtrr_usage_table[i], mtrr_attrib_to_str(type));
407 } 411 }
412 if (size < (0x100000 >> PAGE_SHIFT)) {
413 /* less than 1MB */
414 factor = 'K';
415 size <<= PAGE_SHIFT - 10;
416 } else {
417 factor = 'M';
418 size >>= 20 - PAGE_SHIFT;
419 }
420 /* Base can be > 32bit */
421 len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
422 "(%5luMB), size=%5lu%cB, count=%d: %s\n",
423 i, base, base >> (20 - PAGE_SHIFT), size,
424 factor, mtrr_usage_table[i],
425 mtrr_attrib_to_str(type));
408 } 426 }
409 return 0; 427 return 0;
410} 428}
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void)
422 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); 440 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
423 return 0; 441 return 0;
424} 442}
425
426arch_initcall(mtrr_if_init); 443arch_initcall(mtrr_if_init);
427#endif /* CONFIG_PROC_FS */ 444#endif /* CONFIG_PROC_FS */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8fc248b5aeaf..7af0f88a4163 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -25,43 +25,48 @@
25 Operating System Writer's Guide" (Intel document number 242692), 25 Operating System Writer's Guide" (Intel document number 242692),
26 section 11.11.7 26 section 11.11.7
27 27
28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29 on 6-7 March 2002. 29 on 6-7 March 2002.
30 Source: Intel Architecture Software Developers Manual, Volume 3: 30 Source: Intel Architecture Software Developers Manual, Volume 3:
31 System Programming Guide; Section 9.11. (1997 edition - PPro). 31 System Programming Guide; Section 9.11. (1997 edition - PPro).
32*/ 32*/
33 33
34#define DEBUG
35
36#include <linux/types.h> /* FIXME: kvm_para.h needs this */
37
38#include <linux/kvm_para.h>
39#include <linux/uaccess.h>
34#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/mutex.h>
35#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/sort.h>
44#include <linux/cpu.h>
36#include <linux/pci.h> 45#include <linux/pci.h>
37#include <linux/smp.h> 46#include <linux/smp.h>
38#include <linux/cpu.h>
39#include <linux/mutex.h>
40#include <linux/sort.h>
41 47
48#include <asm/processor.h>
42#include <asm/e820.h> 49#include <asm/e820.h>
43#include <asm/mtrr.h> 50#include <asm/mtrr.h>
44#include <asm/uaccess.h>
45#include <asm/processor.h>
46#include <asm/msr.h> 51#include <asm/msr.h>
47#include <asm/kvm_para.h> 52
48#include "mtrr.h" 53#include "mtrr.h"
49 54
50u32 num_var_ranges = 0; 55u32 num_var_ranges;
51 56
52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 57unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 58static DEFINE_MUTEX(mtrr_mutex);
54 59
55u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
56 61
57static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; 62static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
58 63
59struct mtrr_ops * mtrr_if = NULL; 64struct mtrr_ops *mtrr_if;
60 65
61static void set_mtrr(unsigned int reg, unsigned long base, 66static void set_mtrr(unsigned int reg, unsigned long base,
62 unsigned long size, mtrr_type type); 67 unsigned long size, mtrr_type type);
63 68
64void set_mtrr_ops(struct mtrr_ops * ops) 69void set_mtrr_ops(struct mtrr_ops *ops)
65{ 70{
66 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 71 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
67 mtrr_ops[ops->vendor] = ops; 72 mtrr_ops[ops->vendor] = ops;
@@ -72,30 +77,36 @@ static int have_wrcomb(void)
72{ 77{
73 struct pci_dev *dev; 78 struct pci_dev *dev;
74 u8 rev; 79 u8 rev;
75 80
76 if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { 81 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
77 /* ServerWorks LE chipsets < rev 6 have problems with write-combining 82 if (dev != NULL) {
78 Don't allow it and leave room for other chipsets to be tagged */ 83 /*
84 * ServerWorks LE chipsets < rev 6 have problems with
85 * write-combining. Don't allow it and leave room for other
86 * chipsets to be tagged
87 */
79 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 88 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
80 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { 89 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
81 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); 90 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
82 if (rev <= 5) { 91 if (rev <= 5) {
83 printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 92 pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
84 pci_dev_put(dev); 93 pci_dev_put(dev);
85 return 0; 94 return 0;
86 } 95 }
87 } 96 }
88 /* Intel 450NX errata # 23. Non ascending cacheline evictions to 97 /*
89 write combining memory may resulting in data corruption */ 98 * Intel 450NX errata # 23. Non ascending cacheline evictions to
99 * write combining memory may resulting in data corruption
100 */
90 if (dev->vendor == PCI_VENDOR_ID_INTEL && 101 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
91 dev->device == PCI_DEVICE_ID_INTEL_82451NX) { 102 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
92 printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); 103 pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
93 pci_dev_put(dev); 104 pci_dev_put(dev);
94 return 0; 105 return 0;
95 } 106 }
96 pci_dev_put(dev); 107 pci_dev_put(dev);
97 } 108 }
98 return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); 109 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
99} 110}
100 111
101/* This function returns the number of variable MTRRs */ 112/* This function returns the number of variable MTRRs */
@@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void)
103{ 114{
104 unsigned long config = 0, dummy; 115 unsigned long config = 0, dummy;
105 116
106 if (use_intel()) { 117 if (use_intel())
107 rdmsr(MSR_MTRRcap, config, dummy); 118 rdmsr(MSR_MTRRcap, config, dummy);
108 } else if (is_cpu(AMD)) 119 else if (is_cpu(AMD))
109 config = 2; 120 config = 2;
110 else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) 121 else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
111 config = 8; 122 config = 8;
123
112 num_var_ranges = config & 0xff; 124 num_var_ranges = config & 0xff;
113} 125}
114 126
@@ -130,10 +142,12 @@ struct set_mtrr_data {
130 mtrr_type smp_type; 142 mtrr_type smp_type;
131}; 143};
132 144
145/**
146 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
147 *
148 * Returns nothing.
149 */
133static void ipi_handler(void *info) 150static void ipi_handler(void *info)
134/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
135 [RETURNS] Nothing.
136*/
137{ 151{
138#ifdef CONFIG_SMP 152#ifdef CONFIG_SMP
139 struct set_mtrr_data *data = info; 153 struct set_mtrr_data *data = info;
@@ -142,18 +156,19 @@ static void ipi_handler(void *info)
142 local_irq_save(flags); 156 local_irq_save(flags);
143 157
144 atomic_dec(&data->count); 158 atomic_dec(&data->count);
145 while(!atomic_read(&data->gate)) 159 while (!atomic_read(&data->gate))
146 cpu_relax(); 160 cpu_relax();
147 161
148 /* The master has cleared me to execute */ 162 /* The master has cleared me to execute */
149 if (data->smp_reg != ~0U) 163 if (data->smp_reg != ~0U) {
150 mtrr_if->set(data->smp_reg, data->smp_base, 164 mtrr_if->set(data->smp_reg, data->smp_base,
151 data->smp_size, data->smp_type); 165 data->smp_size, data->smp_type);
152 else 166 } else {
153 mtrr_if->set_all(); 167 mtrr_if->set_all();
168 }
154 169
155 atomic_dec(&data->count); 170 atomic_dec(&data->count);
156 while(atomic_read(&data->gate)) 171 while (atomic_read(&data->gate))
157 cpu_relax(); 172 cpu_relax();
158 173
159 atomic_dec(&data->count); 174 atomic_dec(&data->count);
@@ -161,7 +176,8 @@ static void ipi_handler(void *info)
161#endif 176#endif
162} 177}
163 178
164static inline int types_compatible(mtrr_type type1, mtrr_type type2) { 179static inline int types_compatible(mtrr_type type1, mtrr_type type2)
180{
165 return type1 == MTRR_TYPE_UNCACHABLE || 181 return type1 == MTRR_TYPE_UNCACHABLE ||
166 type2 == MTRR_TYPE_UNCACHABLE || 182 type2 == MTRR_TYPE_UNCACHABLE ||
167 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || 183 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
@@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
176 * @type: mtrr type 192 * @type: mtrr type
177 * 193 *
178 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 194 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
179 * 195 *
180 * 1. Send IPI to do the following: 196 * 1. Send IPI to do the following:
181 * 2. Disable Interrupts 197 * 2. Disable Interrupts
182 * 3. Wait for all procs to do so 198 * 3. Wait for all procs to do so
183 * 4. Enter no-fill cache mode 199 * 4. Enter no-fill cache mode
184 * 5. Flush caches 200 * 5. Flush caches
185 * 6. Clear PGE bit 201 * 6. Clear PGE bit
@@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
189 * 10. Enable all range registers 205 * 10. Enable all range registers
190 * 11. Flush all TLBs and caches again 206 * 11. Flush all TLBs and caches again
191 * 12. Enter normal cache mode and reenable caching 207 * 12. Enter normal cache mode and reenable caching
192 * 13. Set PGE 208 * 13. Set PGE
193 * 14. Wait for buddies to catch up 209 * 14. Wait for buddies to catch up
194 * 15. Enable interrupts. 210 * 15. Enable interrupts.
195 * 211 *
196 * What does that mean for us? Well, first we set data.count to the number 212 * What does that mean for us? Well, first we set data.count to the number
197 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait 213 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
198 * until it hits 0 and proceed. We set the data.gate flag and reset data.count. 214 * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
199 * Meanwhile, they are waiting for that flag to be set. Once it's set, each 215 * Meanwhile, they are waiting for that flag to be set. Once it's set, each
200 * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 216 * CPU goes through the transition of updating MTRRs.
201 * differently, so we call mtrr_if->set() callback and let them take care of it. 217 * The CPU vendors may each do it differently,
202 * When they're done, they again decrement data->count and wait for data.gate to 218 * so we call mtrr_if->set() callback and let them take care of it.
203 * be reset. 219 * When they're done, they again decrement data->count and wait for data.gate
204 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. 220 * to be reset.
221 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
205 * Everyone then enables interrupts and we all continue on. 222 * Everyone then enables interrupts and we all continue on.
206 * 223 *
207 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 224 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
208 * becomes nops. 225 * becomes nops.
209 */ 226 */
210static void set_mtrr(unsigned int reg, unsigned long base, 227static void
211 unsigned long size, mtrr_type type) 228set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
212{ 229{
213 struct set_mtrr_data data; 230 struct set_mtrr_data data;
214 unsigned long flags; 231 unsigned long flags;
@@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base,
218 data.smp_size = size; 235 data.smp_size = size;
219 data.smp_type = type; 236 data.smp_type = type;
220 atomic_set(&data.count, num_booting_cpus() - 1); 237 atomic_set(&data.count, num_booting_cpus() - 1);
221 /* make sure data.count is visible before unleashing other CPUs */ 238
239 /* Make sure data.count is visible before unleashing other CPUs */
222 smp_wmb(); 240 smp_wmb();
223 atomic_set(&data.gate,0); 241 atomic_set(&data.gate, 0);
224 242
225 /* Start the ball rolling on other CPUs */ 243 /* Start the ball rolling on other CPUs */
226 if (smp_call_function(ipi_handler, &data, 0) != 0) 244 if (smp_call_function(ipi_handler, &data, 0) != 0)
227 panic("mtrr: timed out waiting for other CPUs\n"); 245 panic("mtrr: timed out waiting for other CPUs\n");
228 246
229 local_irq_save(flags); 247 local_irq_save(flags);
230 248
231 while(atomic_read(&data.count)) 249 while (atomic_read(&data.count))
232 cpu_relax(); 250 cpu_relax();
233 251
234 /* ok, reset count and toggle gate */ 252 /* Ok, reset count and toggle gate */
235 atomic_set(&data.count, num_booting_cpus() - 1); 253 atomic_set(&data.count, num_booting_cpus() - 1);
236 smp_wmb(); 254 smp_wmb();
237 atomic_set(&data.gate,1); 255 atomic_set(&data.gate, 1);
238 256
239 /* do our MTRR business */ 257 /* Do our MTRR business */
240 258
241 /* HACK! 259 /*
260 * HACK!
242 * We use this same function to initialize the mtrrs on boot. 261 * We use this same function to initialize the mtrrs on boot.
243 * The state of the boot cpu's mtrrs has been saved, and we want 262 * The state of the boot cpu's mtrrs has been saved, and we want
244 * to replicate across all the APs. 263 * to replicate across all the APs.
245 * If we're doing that @reg is set to something special... 264 * If we're doing that @reg is set to something special...
246 */ 265 */
247 if (reg != ~0U) 266 if (reg != ~0U)
248 mtrr_if->set(reg,base,size,type); 267 mtrr_if->set(reg, base, size, type);
249 268
250 /* wait for the others */ 269 /* Wait for the others */
251 while(atomic_read(&data.count)) 270 while (atomic_read(&data.count))
252 cpu_relax(); 271 cpu_relax();
253 272
254 atomic_set(&data.count, num_booting_cpus() - 1); 273 atomic_set(&data.count, num_booting_cpus() - 1);
255 smp_wmb(); 274 smp_wmb();
256 atomic_set(&data.gate,0); 275 atomic_set(&data.gate, 0);
257 276
258 /* 277 /*
259 * Wait here for everyone to have seen the gate change 278 * Wait here for everyone to have seen the gate change
260 * So we're the last ones to touch 'data' 279 * So we're the last ones to touch 'data'
261 */ 280 */
262 while(atomic_read(&data.count)) 281 while (atomic_read(&data.count))
263 cpu_relax(); 282 cpu_relax();
264 283
265 local_irq_restore(flags); 284 local_irq_restore(flags);
266} 285}
267 286
268/** 287/**
269 * mtrr_add_page - Add a memory type region 288 * mtrr_add_page - Add a memory type region
270 * @base: Physical base address of region in pages (in units of 4 kB!) 289 * @base: Physical base address of region in pages (in units of 4 kB!)
271 * @size: Physical size of region in pages (4 kB) 290 * @size: Physical size of region in pages (4 kB)
272 * @type: Type of MTRR desired 291 * @type: Type of MTRR desired
273 * @increment: If this is true do usage counting on the region 292 * @increment: If this is true do usage counting on the region
274 * 293 *
275 * Memory type region registers control the caching on newer Intel and 294 * Memory type region registers control the caching on newer Intel and
276 * non Intel processors. This function allows drivers to request an 295 * non Intel processors. This function allows drivers to request an
277 * MTRR is added. The details and hardware specifics of each processor's 296 * MTRR is added. The details and hardware specifics of each processor's
278 * implementation are hidden from the caller, but nevertheless the 297 * implementation are hidden from the caller, but nevertheless the
279 * caller should expect to need to provide a power of two size on an 298 * caller should expect to need to provide a power of two size on an
280 * equivalent power of two boundary. 299 * equivalent power of two boundary.
281 * 300 *
282 * If the region cannot be added either because all regions are in use 301 * If the region cannot be added either because all regions are in use
283 * or the CPU cannot support it a negative value is returned. On success 302 * or the CPU cannot support it a negative value is returned. On success
284 * the register number for this entry is returned, but should be treated 303 * the register number for this entry is returned, but should be treated
285 * as a cookie only. 304 * as a cookie only.
286 * 305 *
287 * On a multiprocessor machine the changes are made to all processors. 306 * On a multiprocessor machine the changes are made to all processors.
288 * This is required on x86 by the Intel processors. 307 * This is required on x86 by the Intel processors.
289 * 308 *
290 * The available types are 309 * The available types are
291 * 310 *
292 * %MTRR_TYPE_UNCACHABLE - No caching 311 * %MTRR_TYPE_UNCACHABLE - No caching
293 * 312 *
294 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 313 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
295 * 314 *
296 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 315 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
297 * 316 *
298 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 317 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
299 * 318 *
300 * BUGS: Needs a quiet flag for the cases where drivers do not mind 319 * BUGS: Needs a quiet flag for the cases where drivers do not mind
301 * failures and do not wish system log messages to be sent. 320 * failures and do not wish system log messages to be sent.
302 */ 321 */
303 322int mtrr_add_page(unsigned long base, unsigned long size,
304int mtrr_add_page(unsigned long base, unsigned long size,
305 unsigned int type, bool increment) 323 unsigned int type, bool increment)
306{ 324{
325 unsigned long lbase, lsize;
307 int i, replace, error; 326 int i, replace, error;
308 mtrr_type ltype; 327 mtrr_type ltype;
309 unsigned long lbase, lsize;
310 328
311 if (!mtrr_if) 329 if (!mtrr_if)
312 return -ENXIO; 330 return -ENXIO;
313 331
314 if ((error = mtrr_if->validate_add_page(base,size,type))) 332 error = mtrr_if->validate_add_page(base, size, type);
333 if (error)
315 return error; 334 return error;
316 335
317 if (type >= MTRR_NUM_TYPES) { 336 if (type >= MTRR_NUM_TYPES) {
318 printk(KERN_WARNING "mtrr: type: %u invalid\n", type); 337 pr_warning("mtrr: type: %u invalid\n", type);
319 return -EINVAL; 338 return -EINVAL;
320 } 339 }
321 340
322 /* If the type is WC, check that this processor supports it */ 341 /* If the type is WC, check that this processor supports it */
323 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { 342 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
324 printk(KERN_WARNING 343 pr_warning("mtrr: your processor doesn't support write-combining\n");
325 "mtrr: your processor doesn't support write-combining\n");
326 return -ENOSYS; 344 return -ENOSYS;
327 } 345 }
328 346
329 if (!size) { 347 if (!size) {
330 printk(KERN_WARNING "mtrr: zero sized request\n"); 348 pr_warning("mtrr: zero sized request\n");
331 return -EINVAL; 349 return -EINVAL;
332 } 350 }
333 351
334 if (base & size_or_mask || size & size_or_mask) { 352 if (base & size_or_mask || size & size_or_mask) {
335 printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); 353 pr_warning("mtrr: base or size exceeds the MTRR width\n");
336 return -EINVAL; 354 return -EINVAL;
337 } 355 }
338 356
@@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size,
341 359
342 /* No CPU hotplug when we change MTRR entries */ 360 /* No CPU hotplug when we change MTRR entries */
343 get_online_cpus(); 361 get_online_cpus();
344 /* Search for existing MTRR */ 362
363 /* Search for existing MTRR */
345 mutex_lock(&mtrr_mutex); 364 mutex_lock(&mtrr_mutex);
346 for (i = 0; i < num_var_ranges; ++i) { 365 for (i = 0; i < num_var_ranges; ++i) {
347 mtrr_if->get(i, &lbase, &lsize, &ltype); 366 mtrr_if->get(i, &lbase, &lsize, &ltype);
348 if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) 367 if (!lsize || base > lbase + lsize - 1 ||
368 base + size - 1 < lbase)
349 continue; 369 continue;
350 /* At this point we know there is some kind of overlap/enclosure */ 370 /*
371 * At this point we know there is some kind of
372 * overlap/enclosure
373 */
351 if (base < lbase || base + size - 1 > lbase + lsize - 1) { 374 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
352 if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { 375 if (base <= lbase &&
376 base + size - 1 >= lbase + lsize - 1) {
353 /* New region encloses an existing region */ 377 /* New region encloses an existing region */
354 if (type == ltype) { 378 if (type == ltype) {
355 replace = replace == -1 ? i : -2; 379 replace = replace == -1 ? i : -2;
356 continue; 380 continue;
357 } 381 } else if (types_compatible(type, ltype))
358 else if (types_compatible(type, ltype))
359 continue; 382 continue;
360 } 383 }
361 printk(KERN_WARNING 384 pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
362 "mtrr: 0x%lx000,0x%lx000 overlaps existing" 385 " 0x%lx000,0x%lx000\n", base, size, lbase,
363 " 0x%lx000,0x%lx000\n", base, size, lbase, 386 lsize);
364 lsize);
365 goto out; 387 goto out;
366 } 388 }
367 /* New region is enclosed by an existing region */ 389 /* New region is enclosed by an existing region */
368 if (ltype != type) { 390 if (ltype != type) {
369 if (types_compatible(type, ltype)) 391 if (types_compatible(type, ltype))
370 continue; 392 continue;
371 printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", 393 pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
372 base, size, mtrr_attrib_to_str(ltype), 394 base, size, mtrr_attrib_to_str(ltype),
373 mtrr_attrib_to_str(type)); 395 mtrr_attrib_to_str(type));
374 goto out; 396 goto out;
375 } 397 }
376 if (increment) 398 if (increment)
@@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
378 error = i; 400 error = i;
379 goto out; 401 goto out;
380 } 402 }
381 /* Search for an empty MTRR */ 403 /* Search for an empty MTRR */
382 i = mtrr_if->get_free_region(base, size, replace); 404 i = mtrr_if->get_free_region(base, size, replace);
383 if (i >= 0) { 405 if (i >= 0) {
384 set_mtrr(i, base, size, type); 406 set_mtrr(i, base, size, type);
@@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
393 mtrr_usage_table[replace] = 0; 415 mtrr_usage_table[replace] = 0;
394 } 416 }
395 } 417 }
396 } else 418 } else {
397 printk(KERN_INFO "mtrr: no more MTRRs available\n"); 419 pr_info("mtrr: no more MTRRs available\n");
420 }
398 error = i; 421 error = i;
399 out: 422 out:
400 mutex_unlock(&mtrr_mutex); 423 mutex_unlock(&mtrr_mutex);
@@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
405static int mtrr_check(unsigned long base, unsigned long size) 428static int mtrr_check(unsigned long base, unsigned long size)
406{ 429{
407 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 430 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
408 printk(KERN_WARNING 431 pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
409 "mtrr: size and base must be multiples of 4 kiB\n"); 432 pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
410 printk(KERN_DEBUG
411 "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
412 dump_stack(); 433 dump_stack();
413 return -1; 434 return -1;
414 } 435 }
@@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size)
416} 437}
417 438
418/** 439/**
419 * mtrr_add - Add a memory type region 440 * mtrr_add - Add a memory type region
420 * @base: Physical base address of region 441 * @base: Physical base address of region
421 * @size: Physical size of region 442 * @size: Physical size of region
422 * @type: Type of MTRR desired 443 * @type: Type of MTRR desired
423 * @increment: If this is true do usage counting on the region 444 * @increment: If this is true do usage counting on the region
424 * 445 *
425 * Memory type region registers control the caching on newer Intel and 446 * Memory type region registers control the caching on newer Intel and
426 * non Intel processors. This function allows drivers to request an 447 * non Intel processors. This function allows drivers to request an
427 * MTRR is added. The details and hardware specifics of each processor's 448 * MTRR is added. The details and hardware specifics of each processor's
428 * implementation are hidden from the caller, but nevertheless the 449 * implementation are hidden from the caller, but nevertheless the
429 * caller should expect to need to provide a power of two size on an 450 * caller should expect to need to provide a power of two size on an
430 * equivalent power of two boundary. 451 * equivalent power of two boundary.
431 * 452 *
432 * If the region cannot be added either because all regions are in use 453 * If the region cannot be added either because all regions are in use
433 * or the CPU cannot support it a negative value is returned. On success 454 * or the CPU cannot support it a negative value is returned. On success
434 * the register number for this entry is returned, but should be treated 455 * the register number for this entry is returned, but should be treated
435 * as a cookie only. 456 * as a cookie only.
436 * 457 *
437 * On a multiprocessor machine the changes are made to all processors. 458 * On a multiprocessor machine the changes are made to all processors.
438 * This is required on x86 by the Intel processors. 459 * This is required on x86 by the Intel processors.
439 * 460 *
440 * The available types are 461 * The available types are
441 * 462 *
442 * %MTRR_TYPE_UNCACHABLE - No caching 463 * %MTRR_TYPE_UNCACHABLE - No caching
443 * 464 *
444 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 465 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
445 * 466 *
446 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 467 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
447 * 468 *
448 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 469 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
449 * 470 *
450 * BUGS: Needs a quiet flag for the cases where drivers do not mind 471 * BUGS: Needs a quiet flag for the cases where drivers do not mind
451 * failures and do not wish system log messages to be sent. 472 * failures and do not wish system log messages to be sent.
452 */ 473 */
453 474int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
454int 475 bool increment)
455mtrr_add(unsigned long base, unsigned long size, unsigned int type,
456 bool increment)
457{ 476{
458 if (mtrr_check(base, size)) 477 if (mtrr_check(base, size))
459 return -EINVAL; 478 return -EINVAL;
460 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 479 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
461 increment); 480 increment);
462} 481}
482EXPORT_SYMBOL(mtrr_add);
463 483
464/** 484/**
465 * mtrr_del_page - delete a memory type region 485 * mtrr_del_page - delete a memory type region
466 * @reg: Register returned by mtrr_add 486 * @reg: Register returned by mtrr_add
467 * @base: Physical base address 487 * @base: Physical base address
468 * @size: Size of region 488 * @size: Size of region
469 * 489 *
470 * If register is supplied then base and size are ignored. This is 490 * If register is supplied then base and size are ignored. This is
471 * how drivers should call it. 491 * how drivers should call it.
472 * 492 *
473 * Releases an MTRR region. If the usage count drops to zero the 493 * Releases an MTRR region. If the usage count drops to zero the
474 * register is freed and the region returns to default state. 494 * register is freed and the region returns to default state.
475 * On success the register is returned, on failure a negative error 495 * On success the register is returned, on failure a negative error
476 * code. 496 * code.
477 */ 497 */
478
479int mtrr_del_page(int reg, unsigned long base, unsigned long size) 498int mtrr_del_page(int reg, unsigned long base, unsigned long size)
480{ 499{
481 int i, max; 500 int i, max;
@@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
500 } 519 }
501 } 520 }
502 if (reg < 0) { 521 if (reg < 0) {
503 printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, 522 pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
504 size); 523 base, size);
505 goto out; 524 goto out;
506 } 525 }
507 } 526 }
508 if (reg >= max) { 527 if (reg >= max) {
509 printk(KERN_WARNING "mtrr: register: %d too big\n", reg); 528 pr_warning("mtrr: register: %d too big\n", reg);
510 goto out; 529 goto out;
511 } 530 }
512 mtrr_if->get(reg, &lbase, &lsize, &ltype); 531 mtrr_if->get(reg, &lbase, &lsize, &ltype);
513 if (lsize < 1) { 532 if (lsize < 1) {
514 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); 533 pr_warning("mtrr: MTRR %d not used\n", reg);
515 goto out; 534 goto out;
516 } 535 }
517 if (mtrr_usage_table[reg] < 1) { 536 if (mtrr_usage_table[reg] < 1) {
518 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); 537 pr_warning("mtrr: reg: %d has count=0\n", reg);
519 goto out; 538 goto out;
520 } 539 }
521 if (--mtrr_usage_table[reg] < 1) 540 if (--mtrr_usage_table[reg] < 1)
@@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
526 put_online_cpus(); 545 put_online_cpus();
527 return error; 546 return error;
528} 547}
548
529/** 549/**
530 * mtrr_del - delete a memory type region 550 * mtrr_del - delete a memory type region
531 * @reg: Register returned by mtrr_add 551 * @reg: Register returned by mtrr_add
532 * @base: Physical base address 552 * @base: Physical base address
533 * @size: Size of region 553 * @size: Size of region
534 * 554 *
535 * If register is supplied then base and size are ignored. This is 555 * If register is supplied then base and size are ignored. This is
536 * how drivers should call it. 556 * how drivers should call it.
537 * 557 *
538 * Releases an MTRR region. If the usage count drops to zero the 558 * Releases an MTRR region. If the usage count drops to zero the
539 * register is freed and the region returns to default state. 559 * register is freed and the region returns to default state.
540 * On success the register is returned, on failure a negative error 560 * On success the register is returned, on failure a negative error
541 * code. 561 * code.
542 */ 562 */
543 563int mtrr_del(int reg, unsigned long base, unsigned long size)
544int
545mtrr_del(int reg, unsigned long base, unsigned long size)
546{ 564{
547 if (mtrr_check(base, size)) 565 if (mtrr_check(base, size))
548 return -EINVAL; 566 return -EINVAL;
549 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 567 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
550} 568}
551
552EXPORT_SYMBOL(mtrr_add);
553EXPORT_SYMBOL(mtrr_del); 569EXPORT_SYMBOL(mtrr_del);
554 570
555/* HACK ALERT! 571/*
572 * HACK ALERT!
556 * These should be called implicitly, but we can't yet until all the initcall 573 * These should be called implicitly, but we can't yet until all the initcall
557 * stuff is done... 574 * stuff is done...
558 */ 575 */
@@ -576,29 +593,28 @@ struct mtrr_value {
576 593
577static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; 594static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
578 595
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 596static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
580{ 597{
581 int i; 598 int i;
582 599
583 for (i = 0; i < num_var_ranges; i++) { 600 for (i = 0; i < num_var_ranges; i++) {
584 mtrr_if->get(i, 601 mtrr_if->get(i, &mtrr_value[i].lbase,
585 &mtrr_value[i].lbase, 602 &mtrr_value[i].lsize,
586 &mtrr_value[i].lsize, 603 &mtrr_value[i].ltype);
587 &mtrr_value[i].ltype);
588 } 604 }
589 return 0; 605 return 0;
590} 606}
591 607
592static int mtrr_restore(struct sys_device * sysdev) 608static int mtrr_restore(struct sys_device *sysdev)
593{ 609{
594 int i; 610 int i;
595 611
596 for (i = 0; i < num_var_ranges; i++) { 612 for (i = 0; i < num_var_ranges; i++) {
597 if (mtrr_value[i].lsize) 613 if (mtrr_value[i].lsize) {
598 set_mtrr(i, 614 set_mtrr(i, mtrr_value[i].lbase,
599 mtrr_value[i].lbase, 615 mtrr_value[i].lsize,
600 mtrr_value[i].lsize, 616 mtrr_value[i].ltype);
601 mtrr_value[i].ltype); 617 }
602 } 618 }
603 return 0; 619 return 0;
604} 620}
@@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup;
615/** 631/**
616 * mtrr_bp_init - initialize mtrrs on the boot CPU 632 * mtrr_bp_init - initialize mtrrs on the boot CPU
617 * 633 *
618 * This needs to be called early; before any of the other CPUs are 634 * This needs to be called early; before any of the other CPUs are
619 * initialized (i.e. before smp_init()). 635 * initialized (i.e. before smp_init()).
620 * 636 *
621 */ 637 */
622void __init mtrr_bp_init(void) 638void __init mtrr_bp_init(void)
623{ 639{
624 u32 phys_addr; 640 u32 phys_addr;
641
625 init_ifs(); 642 init_ifs();
626 643
627 phys_addr = 32; 644 phys_addr = 32;
628 645
629 if (cpu_has_mtrr) { 646 if (cpu_has_mtrr) {
630 mtrr_if = &generic_mtrr_ops; 647 mtrr_if = &generic_mtrr_ops;
631 size_or_mask = 0xff000000; /* 36 bits */ 648 size_or_mask = 0xff000000; /* 36 bits */
632 size_and_mask = 0x00f00000; 649 size_and_mask = 0x00f00000;
633 phys_addr = 36; 650 phys_addr = 36;
634 651
635 /* This is an AMD specific MSR, but we assume(hope?) that 652 /*
636 Intel will implement it to when they extend the address 653 * This is an AMD specific MSR, but we assume(hope?) that
637 bus of the Xeon. */ 654 * Intel will implement it to when they extend the address
655 * bus of the Xeon.
656 */
638 if (cpuid_eax(0x80000000) >= 0x80000008) { 657 if (cpuid_eax(0x80000000) >= 0x80000008) {
639 phys_addr = cpuid_eax(0x80000008) & 0xff; 658 phys_addr = cpuid_eax(0x80000008) & 0xff;
640 /* CPUID workaround for Intel 0F33/0F34 CPU */ 659 /* CPUID workaround for Intel 0F33/0F34 CPU */
@@ -649,9 +668,11 @@ void __init mtrr_bp_init(void)
649 size_and_mask = ~size_or_mask & 0xfffff00000ULL; 668 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
650 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 669 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
651 boot_cpu_data.x86 == 6) { 670 boot_cpu_data.x86 == 6) {
652 /* VIA C* family have Intel style MTRRs, but 671 /*
653 don't support PAE */ 672 * VIA C* family have Intel style MTRRs,
654 size_or_mask = 0xfff00000; /* 32 bits */ 673 * but don't support PAE
674 */
675 size_or_mask = 0xfff00000; /* 32 bits */
655 size_and_mask = 0; 676 size_and_mask = 0;
656 phys_addr = 32; 677 phys_addr = 32;
657 } 678 }
@@ -694,7 +715,6 @@ void __init mtrr_bp_init(void)
694 changed_by_mtrr_cleanup = 1; 715 changed_by_mtrr_cleanup = 1;
695 mtrr_if->set_all(); 716 mtrr_if->set_all();
696 } 717 }
697
698 } 718 }
699 } 719 }
700} 720}
@@ -706,12 +726,17 @@ void mtrr_ap_init(void)
706 if (!mtrr_if || !use_intel()) 726 if (!mtrr_if || !use_intel())
707 return; 727 return;
708 /* 728 /*
709 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, 729 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
710 * but this routine will be called in cpu boot time, holding the lock 730 * changed, but this routine will be called in cpu boot time,
711 * breaks it. This routine is called in two cases: 1.very earily time 731 * holding the lock breaks it.
712 * of software resume, when there absolutely isn't mtrr entry changes; 732 *
713 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to 733 * This routine is called in two cases:
714 * prevent mtrr entry changes 734 *
735 * 1. very earily time of software resume, when there absolutely
736 * isn't mtrr entry changes;
737 *
738 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
739 * lock to prevent mtrr entry changes
715 */ 740 */
716 local_irq_save(flags); 741 local_irq_save(flags);
717 742
@@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void)
732{ 757{
733 if (!mtrr_if) 758 if (!mtrr_if)
734 return 0; 759 return 0;
760
735 if (use_intel()) { 761 if (use_intel()) {
736 if (!changed_by_mtrr_cleanup) 762 if (!changed_by_mtrr_cleanup)
737 mtrr_state_warn(); 763 mtrr_state_warn();
738 } else { 764 return 0;
739 /* The CPUs haven't MTRR and seem to not support SMP. They have
740 * specific drivers, we use a tricky method to support
741 * suspend/resume for them.
742 * TBD: is there any system with such CPU which supports
743 * suspend/resume? if no, we should remove the code.
744 */
745 sysdev_driver_register(&cpu_sysdev_class,
746 &mtrr_sysdev_driver);
747 } 765 }
766
767 /*
768 * The CPU has no MTRR and seems to not support SMP. They have
769 * specific drivers, we use a tricky method to support
770 * suspend/resume for them.
771 *
772 * TBD: is there any system with such CPU which supports
773 * suspend/resume? If no, we should remove the code.
774 */
775 sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
776
748 return 0; 777 return 0;
749} 778}
750subsys_initcall(mtrr_init_finialize); 779subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 7538b767f206..a501dee9a87a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * local mtrr defines. 2 * local MTRR defines.
3 */ 3 */
4 4
5#include <linux/types.h> 5#include <linux/types.h>
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
14struct mtrr_ops { 14struct mtrr_ops {
15 u32 vendor; 15 u32 vendor;
16 u32 use_intel_if; 16 u32 use_intel_if;
17// void (*init)(void);
18 void (*set)(unsigned int reg, unsigned long base, 17 void (*set)(unsigned int reg, unsigned long base,
19 unsigned long size, mtrr_type type); 18 unsigned long size, mtrr_type type);
20 void (*set_all)(void); 19 void (*set_all)(void);
21 20
22 void (*get)(unsigned int reg, unsigned long *base, 21 void (*get)(unsigned int reg, unsigned long *base,
23 unsigned long *size, mtrr_type * type); 22 unsigned long *size, mtrr_type *type);
24 int (*get_free_region)(unsigned long base, unsigned long size, 23 int (*get_free_region)(unsigned long base, unsigned long size,
25 int replace_reg); 24 int replace_reg);
26 int (*validate_add_page)(unsigned long base, unsigned long size, 25 int (*validate_add_page)(unsigned long base, unsigned long size,
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void);
39 38
40/* library functions for processor-specific routines */ 39/* library functions for processor-specific routines */
41struct set_mtrr_context { 40struct set_mtrr_context {
42 unsigned long flags; 41 unsigned long flags;
43 unsigned long cr4val; 42 unsigned long cr4val;
44 u32 deftype_lo; 43 u32 deftype_lo;
45 u32 deftype_hi; 44 u32 deftype_hi;
46 u32 ccr3; 45 u32 ccr3;
47}; 46};
48 47
49void set_mtrr_done(struct set_mtrr_context *ctxt); 48void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
54 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
55void get_mtrr_state(void); 54void get_mtrr_state(void);
56 55
57extern void set_mtrr_ops(struct mtrr_ops * ops); 56extern void set_mtrr_ops(struct mtrr_ops *ops);
58 57
59extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
60extern struct mtrr_ops * mtrr_if; 59extern struct mtrr_ops *mtrr_if;
61 60
62#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
63#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 1f5fb1588d1f..dfc80b4e6b0d 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -1,24 +1,25 @@
1#include <linux/mm.h>
2#include <linux/init.h> 1#include <linux/init.h>
3#include <asm/io.h> 2#include <linux/io.h>
4#include <asm/mtrr.h> 3#include <linux/mm.h>
5#include <asm/msr.h> 4
6#include <asm/processor-cyrix.h> 5#include <asm/processor-cyrix.h>
7#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
8#include "mtrr.h" 7#include <asm/mtrr.h>
8#include <asm/msr.h>
9 9
10#include "mtrr.h"
10 11
11/* Put the processor into a state where MTRRs can be safely set */ 12/* Put the processor into a state where MTRRs can be safely set */
12void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) 13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
13{ 14{
14 unsigned int cr0; 15 unsigned int cr0;
15 16
16 /* Disable interrupts locally */ 17 /* Disable interrupts locally */
17 local_irq_save(ctxt->flags); 18 local_irq_save(ctxt->flags);
18 19
19 if (use_intel() || is_cpu(CYRIX)) { 20 if (use_intel() || is_cpu(CYRIX)) {
20 21
21 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
22 if (cpu_has_pge) { 23 if (cpu_has_pge) {
23 ctxt->cr4val = read_cr4(); 24 ctxt->cr4val = read_cr4();
24 write_cr4(ctxt->cr4val & ~X86_CR4_PGE); 25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
33 write_cr0(cr0); 34 write_cr0(cr0);
34 wbinvd(); 35 wbinvd();
35 36
36 if (use_intel()) 37 if (use_intel()) {
37 /* Save MTRR state */ 38 /* Save MTRR state */
38 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); 39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
39 else 40 } else {
40 /* Cyrix ARRs - everything else were excluded at the top */ 41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
41 ctxt->ccr3 = getCx86(CX86_CCR3); 45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
42 } 47 }
43} 48}
44 49
45void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) 50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
46{ 51{
47 if (use_intel()) 52 if (use_intel()) {
48 /* Disable MTRRs, and set the default type to uncached */ 53 /* Disable MTRRs, and set the default type to uncached */
49 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, 54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
50 ctxt->deftype_hi); 55 ctxt->deftype_hi);
51 else if (is_cpu(CYRIX)) 56 } else {
52 /* Cyrix ARRs - everything else were excluded at the top */ 57 if (is_cpu(CYRIX)) {
53 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); 58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
54} 62}
55 63
56/* Restore the processor after a set_mtrr_prepare */ 64/* Restore the processor after a set_mtrr_prepare */
57void set_mtrr_done(struct set_mtrr_context *ctxt) 65void set_mtrr_done(struct set_mtrr_context *ctxt)
58{ 66{
59 if (use_intel() || is_cpu(CYRIX)) { 67 if (use_intel() || is_cpu(CYRIX)) {
60 68
61 /* Flush caches and TLBs */ 69 /* Flush caches and TLBs */
62 wbinvd(); 70 wbinvd();
63 71
64 /* Restore MTRRdefType */ 72 /* Restore MTRRdefType */
65 if (use_intel()) 73 if (use_intel()) {
66 /* Intel (P6) standard MTRRs */ 74 /* Intel (P6) standard MTRRs */
67 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); 75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
68 else 76 ctxt->deftype_hi);
69 /* Cyrix ARRs - everything else was excluded at the top */ 77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
70 setCx86(CX86_CCR3, ctxt->ccr3); 82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
71 84
72 /* Enable caches */ 85 /* Enable caches */
73 write_cr0(read_cr0() & 0xbfffffff); 86 write_cr0(read_cr0() & 0xbfffffff);
74 87
75 /* Restore value of CR4 */ 88 /* Restore value of CR4 */
76 if (cpu_has_pge) 89 if (cpu_has_pge)
77 write_cr4(ctxt->cr4val); 90 write_cr4(ctxt->cr4val);
78 } 91 }
79 /* Re-enable interrupts locally (if enabled previously) */ 92 /* Re-enable interrupts locally (if enabled previously) */
80 local_irq_restore(ctxt->flags); 93 local_irq_restore(ctxt->flags);
81} 94}
82
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index e60ed740d2b3..392bea43b890 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
68 /* returns the bit offset of the performance counter register */ 68 /* returns the bit offset of the performance counter register */
69 switch (boot_cpu_data.x86_vendor) { 69 switch (boot_cpu_data.x86_vendor) {
70 case X86_VENDOR_AMD: 70 case X86_VENDOR_AMD:
71 return (msr - MSR_K7_PERFCTR0); 71 return msr - MSR_K7_PERFCTR0;
72 case X86_VENDOR_INTEL: 72 case X86_VENDOR_INTEL:
73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
74 return (msr - MSR_ARCH_PERFMON_PERFCTR0); 74 return msr - MSR_ARCH_PERFMON_PERFCTR0;
75 75
76 switch (boot_cpu_data.x86) { 76 switch (boot_cpu_data.x86) {
77 case 6: 77 case 6:
78 return (msr - MSR_P6_PERFCTR0); 78 return msr - MSR_P6_PERFCTR0;
79 case 15: 79 case 15:
80 return (msr - MSR_P4_BPU_PERFCTR0); 80 return msr - MSR_P4_BPU_PERFCTR0;
81 } 81 }
82 } 82 }
83 return 0; 83 return 0;
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
92 /* returns the bit offset of the event selection register */ 92 /* returns the bit offset of the event selection register */
93 switch (boot_cpu_data.x86_vendor) { 93 switch (boot_cpu_data.x86_vendor) {
94 case X86_VENDOR_AMD: 94 case X86_VENDOR_AMD:
95 return (msr - MSR_K7_EVNTSEL0); 95 return msr - MSR_K7_EVNTSEL0;
96 case X86_VENDOR_INTEL: 96 case X86_VENDOR_INTEL:
97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
98 return (msr - MSR_ARCH_PERFMON_EVENTSEL0); 98 return msr - MSR_ARCH_PERFMON_EVENTSEL0;
99 99
100 switch (boot_cpu_data.x86) { 100 switch (boot_cpu_data.x86) {
101 case 6: 101 case 6:
102 return (msr - MSR_P6_EVNTSEL0); 102 return msr - MSR_P6_EVNTSEL0;
103 case 15: 103 case 15:
104 return (msr - MSR_P4_BSU_ESCR0); 104 return msr - MSR_P4_BSU_ESCR0;
105 } 105 }
106 } 106 }
107 return 0; 107 return 0;
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
113{ 113{
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 115
116 return (!test_bit(counter, perfctr_nmi_owner)); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118 118
119/* checks the an msr for availability */ 119/* checks the an msr for availability */
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
124 counter = nmi_perfctr_msr_to_bit(msr); 124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 126
127 return (!test_bit(counter, perfctr_nmi_owner)); 127 return !test_bit(counter, perfctr_nmi_owner);
128} 128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 130
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
237 */ 237 */
238 counter_val = (u64)cpu_khz * 1000; 238 counter_val = (u64)cpu_khz * 1000;
239 do_div(counter_val, retval); 239 do_div(counter_val, retval);
240 if (counter_val > 0x7fffffffULL) { 240 if (counter_val > 0x7fffffffULL) {
241 u64 count = (u64)cpu_khz * 1000; 241 u64 count = (u64)cpu_khz * 1000;
242 do_div(count, 0x7fffffffUL); 242 do_div(count, 0x7fffffffUL);
243 retval = count + 1; 243 retval = count + 1;
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
251 u64 count = (u64)cpu_khz * 1000; 251 u64 count = (u64)cpu_khz * 1000;
252 252
253 do_div(count, nmi_hz); 253 do_div(count, nmi_hz);
254 if(descr) 254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count); 255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsrl(perfctr_msr, 0 - count); 256 wrmsrl(perfctr_msr, 0 - count);
257} 257}
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
262 u64 count = (u64)cpu_khz * 1000; 262 u64 count = (u64)cpu_khz * 1000;
263 263
264 do_div(count, nmi_hz); 264 do_div(count, nmi_hz);
265 if(descr) 265 if (descr)
266 pr_debug("setting %s to -0x%08Lx\n", descr, count); 266 pr_debug("setting %s to -0x%08Lx\n", descr, count);
267 wrmsr(perfctr_msr, (u32)(-count), 0); 267 wrmsr(perfctr_msr, (u32)(-count), 0);
268} 268}
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
296 296
297 /* setup the timer */ 297 /* setup the timer */
298 wrmsr(evntsel_msr, evntsel, 0); 298 wrmsr(evntsel_msr, evntsel, 0);
299 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); 299 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
300 300
301 /* initialize the wd struct before enabling */ 301 /* initialize the wd struct before enabling */
302 wd->perfctr_msr = perfctr_msr; 302 wd->perfctr_msr = perfctr_msr;
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
387 /* setup the timer */ 387 /* setup the timer */
388 wrmsr(evntsel_msr, evntsel, 0); 388 wrmsr(evntsel_msr, evntsel, 0);
389 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 389 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
390 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); 390 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
391 391
392 /* initialize the wd struct before enabling */ 392 /* initialize the wd struct before enabling */
393 wd->perfctr_msr = perfctr_msr; 393 wd->perfctr_msr = perfctr_msr;
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
415 apic_write(APIC_LVTPC, APIC_DM_NMI); 415 apic_write(APIC_LVTPC, APIC_DM_NMI);
416 416
417 /* P6/ARCH_PERFMON has 32 bit counter write */ 417 /* P6/ARCH_PERFMON has 32 bit counter write */
418 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 418 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
419} 419}
420 420
421static const struct wd_ops p6_wd_ops = { 421static const struct wd_ops p6_wd_ops = {
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
490 if (smp_num_siblings == 2) { 490 if (smp_num_siblings == 2) {
491 unsigned int ebx, apicid; 491 unsigned int ebx, apicid;
492 492
493 ebx = cpuid_ebx(1); 493 ebx = cpuid_ebx(1);
494 apicid = (ebx >> 24) & 0xff; 494 apicid = (ebx >> 24) & 0xff;
495 ht_num = apicid & 1; 495 ht_num = apicid & 1;
496 } else 496 } else
497#endif 497#endif
498 ht_num = 0; 498 ht_num = 0;
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
544 } 544 }
545 545
546 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 546 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
547 | P4_ESCR_OS 547 | P4_ESCR_OS
548 | P4_ESCR_USR; 548 | P4_ESCR_USR;
549 549
550 cccr_val |= P4_CCCR_THRESHOLD(15) 550 cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
612{ 612{
613 unsigned dummy; 613 unsigned dummy;
614 /* 614 /*
615 * P4 quirks: 615 * P4 quirks:
616 * - An overflown perfctr will assert its interrupt 616 * - An overflown perfctr will assert its interrupt
617 * until the OVF flag in its CCCR is cleared. 617 * until the OVF flag in its CCCR is cleared.
618 * - LVTPC is masked on interrupt and must be 618 * - LVTPC is masked on interrupt and must be
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
662 * NOTE: Corresponding bit = 0 in ebx indicates event present. 662 * NOTE: Corresponding bit = 0 in ebx indicates event present.
663 */ 663 */
664 cpuid(10, &(eax.full), &ebx, &unused, &unused); 664 cpuid(10, &(eax.full), &ebx, &unused, &unused);
665 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || 665 if ((eax.split.mask_length <
666 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
666 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 667 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
667 return 0; 668 return 0;
668 669
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d5e30397246b..62ac8cb6ba27 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
116 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); 116 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
117#endif 117#endif
118 seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); 118 seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
119#ifdef CONFIG_X86_64
120 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); 119 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
121 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", 120 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
122 c->x86_phys_bits, c->x86_virt_bits); 121 c->x86_phys_bits, c->x86_virt_bits);
123#endif
124 122
125 seq_printf(m, "power management:"); 123 seq_printf(m, "power management:");
126 for (i = 0; i < 32; i++) { 124 for (i = 0; i < 32; i++) {
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
128 if (i < ARRAY_SIZE(x86_power_flags) && 126 if (i < ARRAY_SIZE(x86_power_flags) &&
129 x86_power_flags[i]) 127 x86_power_flags[i])
130 seq_printf(m, "%s%s", 128 seq_printf(m, "%s%s",
131 x86_power_flags[i][0]?" ":"", 129 x86_power_flags[i][0] ? " " : "",
132 x86_power_flags[i]); 130 x86_power_flags[i]);
133 else 131 else
134 seq_printf(m, " [%d]", i); 132 seq_printf(m, " [%d]", i);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 284c399e3234..bc24f514ec93 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,17 +49,17 @@ static inline int __vmware_platform(void)
49 49
50static unsigned long __vmware_get_tsc_khz(void) 50static unsigned long __vmware_get_tsc_khz(void)
51{ 51{
52 uint64_t tsc_hz; 52 uint64_t tsc_hz;
53 uint32_t eax, ebx, ecx, edx; 53 uint32_t eax, ebx, ecx, edx;
54 54
55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); 55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
56 56
57 if (ebx == UINT_MAX) 57 if (ebx == UINT_MAX)
58 return 0; 58 return 0;
59 tsc_hz = eax | (((uint64_t)ebx) << 32); 59 tsc_hz = eax | (((uint64_t)ebx) << 32);
60 do_div(tsc_hz, 1000); 60 do_div(tsc_hz, 1000);
61 BUG_ON(tsc_hz >> 32); 61 BUG_ON(tsc_hz >> 32);
62 return tsc_hz; 62 return tsc_hz;
63} 63}
64 64
65/* 65/*
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index b4f14c6c09d9..37250fe490b1 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -27,9 +27,7 @@ static void doublefault_fn(void)
27 27
28 if (ptr_ok(gdt)) { 28 if (ptr_ok(gdt)) {
29 gdt += GDT_ENTRY_TSS << 3; 29 gdt += GDT_ENTRY_TSS << 3;
30 tss = *(u16 *)(gdt+2); 30 tss = get_desc_base((struct desc_struct *)gdt);
31 tss += *(u8 *)(gdt+4) << 16;
32 tss += *(u8 *)(gdt+7) << 24;
33 printk(KERN_EMERG "double fault, tss at %08lx\n", tss); 31 printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
34 32
35 if (ptr_ok(tss)) { 33 if (ptr_ok(tss)) {
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 48bfe1386038..ef42a038f1a6 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -509,15 +509,15 @@ enum bts_field {
509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask) 509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
510}; 510};
511 511
512static inline unsigned long bts_get(const char *base, enum bts_field field) 512static inline unsigned long bts_get(const char *base, unsigned long field)
513{ 513{
514 base += (ds_cfg.sizeof_ptr_field * field); 514 base += (ds_cfg.sizeof_ptr_field * field);
515 return *(unsigned long *)base; 515 return *(unsigned long *)base;
516} 516}
517 517
518static inline void bts_set(char *base, enum bts_field field, unsigned long val) 518static inline void bts_set(char *base, unsigned long field, unsigned long val)
519{ 519{
520 base += (ds_cfg.sizeof_ptr_field * field);; 520 base += (ds_cfg.sizeof_ptr_field * field);
521 (*(unsigned long *)base) = val; 521 (*(unsigned long *)base) = val;
522} 522}
523 523
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c8405718a4c3..2d8a371d4339 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,7 +15,6 @@
15#include <linux/bug.h> 15#include <linux/bug.h>
16#include <linux/nmi.h> 16#include <linux/nmi.h>
17#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18#include <linux/ftrace.h>
19 18
20#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
21 20
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 5cb5725b2bae..147005a1cc3c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
115{ 115{
116 int x = e820x->nr_map; 116 int x = e820x->nr_map;
117 117
118 if (x == ARRAY_SIZE(e820x->map)) { 118 if (x >= ARRAY_SIZE(e820x->map)) {
119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
120 return; 120 return;
121 } 121 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index cc827ac9e8d3..7ffec6b3b331 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP
439 jne 1f 439 jne 1f
440 movl $per_cpu__gdt_page,%eax 440 movl $per_cpu__gdt_page,%eax
441 movl $per_cpu__stack_canary,%ecx 441 movl $per_cpu__stack_canary,%ecx
442 subl $20, %ecx
443 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 442 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
444 shrl $16, %ecx 443 shrl $16, %ecx
445 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 444 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 3b09634a5153..7d35d0fe2329 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
218void fixup_irqs(void) 218void fixup_irqs(void)
219{ 219{
220 unsigned int irq; 220 unsigned int irq;
221 static int warned;
222 struct irq_desc *desc; 221 struct irq_desc *desc;
223 222
224 for_each_irq_desc(irq, desc) { 223 for_each_irq_desc(irq, desc) {
@@ -236,8 +235,8 @@ void fixup_irqs(void)
236 } 235 }
237 if (desc->chip->set_affinity) 236 if (desc->chip->set_affinity)
238 desc->chip->set_affinity(irq, affinity); 237 desc->chip->set_affinity(irq, affinity);
239 else if (desc->action && !(warned++)) 238 else if (desc->action)
240 printk("Cannot set affinity for irq %i\n", irq); 239 printk_once("Cannot set affinity for irq %i\n", irq);
241 } 240 }
242 241
243#if 0 242#if 0
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 651c93b28862..fcd513bf2846 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type)
482 MP_bus_info(&bus); 482 MP_bus_info(&bus);
483 } 483 }
484 484
485 ioapic.type = MP_IOAPIC; 485 ioapic.type = MP_IOAPIC;
486 ioapic.apicid = 2; 486 ioapic.apicid = 2;
487 ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; 487 ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
488 ioapic.flags = MPC_APIC_USABLE; 488 ioapic.flags = MPC_APIC_USABLE;
489 ioapic.apicaddr = 0xFEC00000; 489 ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE;
490 MP_ioapic_info(&ioapic); 490 MP_ioapic_info(&ioapic);
491 491
492 /* 492 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 98fd6cd4e3a4..7dd950094178 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,7 @@
1/* ----------------------------------------------------------------------- * 1/* ----------------------------------------------------------------------- *
2 * 2 *
3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved 3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
4 * Copyright 2009 Intel Corporation; author: H. Peter Anvin
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf,
80 81
81 for (; count; count -= 8) { 82 for (; count; count -= 8) {
82 err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); 83 err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
83 if (err) { 84 if (err)
84 if (err == -EFAULT) /* Fix idiotic error code */
85 err = -EIO;
86 break; 85 break;
87 }
88 if (copy_to_user(tmp, &data, 8)) { 86 if (copy_to_user(tmp, &data, 8)) {
89 err = -EFAULT; 87 err = -EFAULT;
90 break; 88 break;
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
115 break; 113 break;
116 } 114 }
117 err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); 115 err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
118 if (err) { 116 if (err)
119 if (err == -EFAULT) /* Fix idiotic error code */
120 err = -EIO;
121 break; 117 break;
122 }
123 tmp += 2; 118 tmp += 2;
124 bytes += 8; 119 bytes += 8;
125 } 120 }
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
127 return bytes ? bytes : err; 122 return bytes ? bytes : err;
128} 123}
129 124
125static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
126{
127 u32 __user *uregs = (u32 __user *)arg;
128 u32 regs[8];
129 int cpu = iminor(file->f_path.dentry->d_inode);
130 int err;
131
132 switch (ioc) {
133 case X86_IOC_RDMSR_REGS:
134 if (!(file->f_mode & FMODE_READ)) {
135 err = -EBADF;
136 break;
137 }
138 if (copy_from_user(&regs, uregs, sizeof regs)) {
139 err = -EFAULT;
140 break;
141 }
142 err = rdmsr_safe_regs_on_cpu(cpu, regs);
143 if (err)
144 break;
145 if (copy_to_user(uregs, &regs, sizeof regs))
146 err = -EFAULT;
147 break;
148
149 case X86_IOC_WRMSR_REGS:
150 if (!(file->f_mode & FMODE_WRITE)) {
151 err = -EBADF;
152 break;
153 }
154 if (copy_from_user(&regs, uregs, sizeof regs)) {
155 err = -EFAULT;
156 break;
157 }
158 err = wrmsr_safe_regs_on_cpu(cpu, regs);
159 if (err)
160 break;
161 if (copy_to_user(uregs, &regs, sizeof regs))
162 err = -EFAULT;
163 break;
164
165 default:
166 err = -ENOTTY;
167 break;
168 }
169
170 return err;
171}
172
130static int msr_open(struct inode *inode, struct file *file) 173static int msr_open(struct inode *inode, struct file *file)
131{ 174{
132 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 175 unsigned int cpu = iminor(file->f_path.dentry->d_inode);
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = {
157 .read = msr_read, 200 .read = msr_read,
158 .write = msr_write, 201 .write = msr_write,
159 .open = msr_open, 202 .open = msr_open,
203 .unlocked_ioctl = msr_ioctl,
204 .compat_ioctl = msr_ioctl,
160}; 205};
161 206
162static int __cpuinit msr_device_create(int cpu) 207static int __cpuinit msr_device_create(int cpu)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 70ec9b951d76..f5b0b4a01fb2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = {
362#endif 362#endif
363 .wbinvd = native_wbinvd, 363 .wbinvd = native_wbinvd,
364 .read_msr = native_read_msr_safe, 364 .read_msr = native_read_msr_safe,
365 .read_msr_amd = native_read_msr_amd_safe, 365 .rdmsr_regs = native_rdmsr_safe_regs,
366 .write_msr = native_write_msr_safe, 366 .write_msr = native_write_msr_safe,
367 .wrmsr_regs = native_wrmsr_safe_regs,
367 .read_tsc = native_read_tsc, 368 .read_tsc = native_read_tsc,
368 .read_pmc = native_read_pmc, 369 .read_pmc = native_read_pmc,
369 .read_tscp = native_read_tscp, 370 .read_tscp = native_read_tscp,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..4cf79567cdab 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@
61 61
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 63
64DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
65EXPORT_PER_CPU_SYMBOL(current_task);
66
67/* 64/*
68 * Return saved PC of a blocked thread. 65 * Return saved PC of a blocked thread.
69 */ 66 */
@@ -350,14 +347,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
350 *next = &next_p->thread; 347 *next = &next_p->thread;
351 int cpu = smp_processor_id(); 348 int cpu = smp_processor_id();
352 struct tss_struct *tss = &per_cpu(init_tss, cpu); 349 struct tss_struct *tss = &per_cpu(init_tss, cpu);
350 bool preload_fpu;
353 351
354 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 352 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
355 353
356 __unlazy_fpu(prev_p); 354 /*
355 * If the task has used fpu the last 5 timeslices, just do a full
356 * restore of the math state immediately to avoid the trap; the
357 * chances of needing FPU soon are obviously high now
358 */
359 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
357 360
361 __unlazy_fpu(prev_p);
358 362
359 /* we're going to use this soon, after a few expensive things */ 363 /* we're going to use this soon, after a few expensive things */
360 if (next_p->fpu_counter > 5) 364 if (preload_fpu)
361 prefetch(next->xstate); 365 prefetch(next->xstate);
362 366
363 /* 367 /*
@@ -398,6 +402,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
398 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 402 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
399 __switch_to_xtra(prev_p, next_p, tss); 403 __switch_to_xtra(prev_p, next_p, tss);
400 404
405 /* If we're going to preload the fpu context, make sure clts
406 is run while we're batching the cpu state updates. */
407 if (preload_fpu)
408 clts();
409
401 /* 410 /*
402 * Leave lazy mode, flushing any hypercalls made here. 411 * Leave lazy mode, flushing any hypercalls made here.
403 * This must be done before restoring TLS segments so 412 * This must be done before restoring TLS segments so
@@ -407,15 +416,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 */ 416 */
408 arch_end_context_switch(next_p); 417 arch_end_context_switch(next_p);
409 418
410 /* If the task has used fpu the last 5 timeslices, just do a full 419 if (preload_fpu)
411 * restore of the math state immediately to avoid the trap; the 420 __math_state_restore();
412 * chances of needing FPU soon are obviously high now
413 *
414 * tsk_used_math() checks prevent calling math_state_restore(),
415 * which can sleep in the case of !tsk_used_math()
416 */
417 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
418 math_state_restore();
419 421
420 /* 422 /*
421 * Restore %gs if needed (which is common) 423 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..ad535b683170 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@
55 55
56asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
57 57
58DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
59EXPORT_PER_CPU_SYMBOL(current_task);
60
61DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
62static DEFINE_PER_CPU(unsigned char, is_idle); 59static DEFINE_PER_CPU(unsigned char, is_idle);
63 60
@@ -386,9 +383,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 383 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 384 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 385 unsigned fsindex, gsindex;
386 bool preload_fpu;
387
388 /*
389 * If the task has used fpu the last 5 timeslices, just do a full
390 * restore of the math state immediately to avoid the trap; the
391 * chances of needing FPU soon are obviously high now
392 */
393 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
389 394
390 /* we're going to use this soon, after a few expensive things */ 395 /* we're going to use this soon, after a few expensive things */
391 if (next_p->fpu_counter > 5) 396 if (preload_fpu)
392 prefetch(next->xstate); 397 prefetch(next->xstate);
393 398
394 /* 399 /*
@@ -419,6 +424,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
419 424
420 load_TLS(next, cpu); 425 load_TLS(next, cpu);
421 426
427 /* Must be after DS reload */
428 unlazy_fpu(prev_p);
429
430 /* Make sure cpu is ready for new context */
431 if (preload_fpu)
432 clts();
433
422 /* 434 /*
423 * Leave lazy mode, flushing any hypercalls made here. 435 * Leave lazy mode, flushing any hypercalls made here.
424 * This must be done before restoring TLS segments so 436 * This must be done before restoring TLS segments so
@@ -459,9 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
459 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 471 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
460 prev->gsindex = gsindex; 472 prev->gsindex = gsindex;
461 473
462 /* Must be after DS reload */
463 unlazy_fpu(prev_p);
464
465 /* 474 /*
466 * Switch the PDA and FPU contexts. 475 * Switch the PDA and FPU contexts.
467 */ 476 */
@@ -480,15 +489,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
480 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 489 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
481 __switch_to_xtra(prev_p, next_p, tss); 490 __switch_to_xtra(prev_p, next_p, tss);
482 491
483 /* If the task has used fpu the last 5 timeslices, just do a full 492 /*
484 * restore of the math state immediately to avoid the trap; the 493 * Preload the FPU context, now that we've determined that the
485 * chances of needing FPU soon are obviously high now 494 * task is likely to be using it.
486 *
487 * tsk_used_math() checks prevent calling math_state_restore(),
488 * which can sleep in the case of !tsk_used_math()
489 */ 495 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 496 if (preload_fpu)
491 math_state_restore(); 497 __math_state_restore();
492 return prev_p; 498 return prev_p;
493} 499}
494 500
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee64..c36cc1452cdc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -434,7 +434,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
434 * For perf, we return last level cache shared map. 434 * For perf, we return last level cache shared map.
435 * And for power savings, we return cpu_core_map 435 * And for power savings, we return cpu_core_map
436 */ 436 */
437 if (sched_mc_power_savings || sched_smt_power_savings) 437 if ((sched_mc_power_savings || sched_smt_power_savings) &&
438 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
438 return cpu_core_mask(cpu); 439 return cpu_core_mask(cpu);
439 else 440 else
440 return c->llc_shared_map; 441 return c->llc_shared_map;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index e8b9863ef8c4..3149032ff107 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -4,6 +4,7 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/ptrace.h> 6#include <linux/ptrace.h>
7#include <asm/desc.h>
7 8
8unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) 9unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
9{ 10{
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
23 * and APM bios ones we just ignore here. 24 * and APM bios ones we just ignore here.
24 */ 25 */
25 if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { 26 if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
26 u32 *desc; 27 struct desc_struct *desc;
27 unsigned long base; 28 unsigned long base;
28 29
29 seg &= ~7UL; 30 seg &= ~7UL;
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
33 addr = -1L; /* bogus selector, access would fault */ 34 addr = -1L; /* bogus selector, access would fault */
34 else { 35 else {
35 desc = child->mm->context.ldt + seg; 36 desc = child->mm->context.ldt + seg;
36 base = ((desc[0] >> 16) | 37 base = get_desc_base(desc);
37 ((desc[1] & 0xff) << 16) |
38 (desc[1] & 0xff000000));
39 38
40 /* 16-bit code segment? */ 39 /* 16-bit code segment? */
41 if (!((desc[1] >> 22) & 1)) 40 if (!desc->d)
42 addr &= 0xffff; 41 addr &= 0xffff;
43 addr += base; 42 addr += base;
44 } 43 }
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 77b9689f8edb..503c1f2e8835 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void)
640 if (!is_uv_system()) 640 if (!is_uv_system())
641 return 0; 641 return 0;
642 642
643 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); 643 proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
644 &proc_uv_ptc_operations);
644 if (!proc_uv_ptc) { 645 if (!proc_uv_ptc) {
645 printk(KERN_ERR "unable to create %s proc entry\n", 646 printk(KERN_ERR "unable to create %s proc entry\n",
646 UV_PTC_BASENAME); 647 UV_PTC_BASENAME);
647 return -EINVAL; 648 return -EINVAL;
648 } 649 }
649 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
650 return 0; 650 return 0;
651} 651}
652 652
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475d..83264922a878 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -76,7 +76,7 @@ char ignore_fpu_irq;
76 * F0 0F bug workaround.. We have a special link segment 76 * F0 0F bug workaround.. We have a special link segment
77 * for this. 77 * for this.
78 */ 78 */
79gate_desc idt_table[256] 79gate_desc idt_table[NR_VECTORS]
80 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 80 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
81#endif 81#endif
82 82
@@ -786,33 +786,34 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
786#endif 786#endif
787} 787}
788 788
789#ifdef CONFIG_X86_32 789asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
790unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
791{ 790{
792 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
793 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
794 unsigned long new_kesp = kesp - base;
795 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
796 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
797
798 /* Set up base for espfix segment */
799 desc &= 0x00f0ff0000000000ULL;
800 desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
801 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
802 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
803 (lim_pages & 0xffff);
804 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
805
806 return new_kesp;
807} 791}
808#endif
809 792
810asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) 793asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
811{ 794{
812} 795}
813 796
814asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) 797/*
798 * __math_state_restore assumes that cr0.TS is already clear and the
799 * fpu state is all ready for use. Used during context switch.
800 */
801void __math_state_restore(void)
815{ 802{
803 struct thread_info *thread = current_thread_info();
804 struct task_struct *tsk = thread->task;
805
806 /*
807 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
808 */
809 if (unlikely(restore_fpu_checking(tsk))) {
810 stts();
811 force_sig(SIGSEGV, tsk);
812 return;
813 }
814
815 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
816 tsk->fpu_counter++;
816} 817}
817 818
818/* 819/*
@@ -846,17 +847,8 @@ asmlinkage void math_state_restore(void)
846 } 847 }
847 848
848 clts(); /* Allow maths ops (or we recurse) */ 849 clts(); /* Allow maths ops (or we recurse) */
849 /*
850 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
851 */
852 if (unlikely(restore_fpu_checking(tsk))) {
853 stts();
854 force_sig(SIGSEGV, tsk);
855 return;
856 }
857 850
858 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 851 __math_state_restore();
859 tsk->fpu_counter++;
860} 852}
861EXPORT_SYMBOL_GPL(math_state_restore); 853EXPORT_SYMBOL_GPL(math_state_restore);
862 854
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d4529011828..633ccc7400a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2297,12 +2297,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2297 unsigned int bytes, 2297 unsigned int bytes,
2298 struct kvm_vcpu *vcpu) 2298 struct kvm_vcpu *vcpu)
2299{ 2299{
2300 static int reported; 2300 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
2301
2302 if (!reported) {
2303 reported = 1;
2304 printk(KERN_WARNING "kvm: emulating exchange as write\n");
2305 }
2306#ifndef CONFIG_X86_64 2301#ifndef CONFIG_X86_64
2307 /* guests cmpxchg8b have to be emulated atomically */ 2302 /* guests cmpxchg8b have to be emulated atomically */
2308 if (bytes == 8) { 2303 if (bytes == 8) {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 07c31899c9c2..9e609206fac9 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -9,6 +9,8 @@ lib-y += thunk_$(BITS).o
9lib-y += usercopy_$(BITS).o getuser.o putuser.o 9lib-y += usercopy_$(BITS).o getuser.o putuser.o
10lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
11 11
12obj-y += msr-reg.o msr-reg-export.o
13
12ifeq ($(CONFIG_X86_32),y) 14ifeq ($(CONFIG_X86_32),y)
13 obj-y += atomic64_32.o 15 obj-y += atomic64_32.o
14 lib-y += checksum_32.o 16 lib-y += checksum_32.o
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c
new file mode 100644
index 000000000000..a311cc59b65d
--- /dev/null
+++ b/arch/x86/lib/msr-reg-export.c
@@ -0,0 +1,5 @@
1#include <linux/module.h>
2#include <asm/msr.h>
3
4EXPORT_SYMBOL(native_rdmsr_safe_regs);
5EXPORT_SYMBOL(native_wrmsr_safe_regs);
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
new file mode 100644
index 000000000000..69fa10623f21
--- /dev/null
+++ b/arch/x86/lib/msr-reg.S
@@ -0,0 +1,102 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <asm/dwarf2.h>
4#include <asm/asm.h>
5#include <asm/msr.h>
6
7#ifdef CONFIG_X86_64
8/*
9 * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
10 *
11 * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
12 *
13 */
14.macro op_safe_regs op
15ENTRY(native_\op\()_safe_regs)
16 CFI_STARTPROC
17 pushq_cfi %rbx
18 pushq_cfi %rbp
19 movq %rdi, %r10 /* Save pointer */
20 xorl %r11d, %r11d /* Return value */
21 movl (%rdi), %eax
22 movl 4(%rdi), %ecx
23 movl 8(%rdi), %edx
24 movl 12(%rdi), %ebx
25 movl 20(%rdi), %ebp
26 movl 24(%rdi), %esi
27 movl 28(%rdi), %edi
28 CFI_REMEMBER_STATE
291: \op
302: movl %eax, (%r10)
31 movl %r11d, %eax /* Return value */
32 movl %ecx, 4(%r10)
33 movl %edx, 8(%r10)
34 movl %ebx, 12(%r10)
35 movl %ebp, 20(%r10)
36 movl %esi, 24(%r10)
37 movl %edi, 28(%r10)
38 popq_cfi %rbp
39 popq_cfi %rbx
40 ret
413:
42 CFI_RESTORE_STATE
43 movl $-EIO, %r11d
44 jmp 2b
45
46 _ASM_EXTABLE(1b, 3b)
47 CFI_ENDPROC
48ENDPROC(native_\op\()_safe_regs)
49.endm
50
51#else /* X86_32 */
52
53.macro op_safe_regs op
54ENTRY(native_\op\()_safe_regs)
55 CFI_STARTPROC
56 pushl_cfi %ebx
57 pushl_cfi %ebp
58 pushl_cfi %esi
59 pushl_cfi %edi
60 pushl_cfi $0 /* Return value */
61 pushl_cfi %eax
62 movl 4(%eax), %ecx
63 movl 8(%eax), %edx
64 movl 12(%eax), %ebx
65 movl 20(%eax), %ebp
66 movl 24(%eax), %esi
67 movl 28(%eax), %edi
68 movl (%eax), %eax
69 CFI_REMEMBER_STATE
701: \op
712: pushl_cfi %eax
72 movl 4(%esp), %eax
73 popl_cfi (%eax)
74 addl $4, %esp
75 CFI_ADJUST_CFA_OFFSET -4
76 movl %ecx, 4(%eax)
77 movl %edx, 8(%eax)
78 movl %ebx, 12(%eax)
79 movl %ebp, 20(%eax)
80 movl %esi, 24(%eax)
81 movl %edi, 28(%eax)
82 popl_cfi %eax
83 popl_cfi %edi
84 popl_cfi %esi
85 popl_cfi %ebp
86 popl_cfi %ebx
87 ret
883:
89 CFI_RESTORE_STATE
90 movl $-EIO, 4(%esp)
91 jmp 2b
92
93 _ASM_EXTABLE(1b, 3b)
94 CFI_ENDPROC
95ENDPROC(native_\op\()_safe_regs)
96.endm
97
98#endif
99
100op_safe_regs rdmsr
101op_safe_regs wrmsr
102
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index caa24aca8115..33a1e3ca22d8 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -175,3 +175,52 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
175 return err ? err : rv.err; 175 return err ? err : rv.err;
176} 176}
177EXPORT_SYMBOL(wrmsr_safe_on_cpu); 177EXPORT_SYMBOL(wrmsr_safe_on_cpu);
178
179/*
180 * These variants are significantly slower, but allows control over
181 * the entire 32-bit GPR set.
182 */
183struct msr_regs_info {
184 u32 *regs;
185 int err;
186};
187
188static void __rdmsr_safe_regs_on_cpu(void *info)
189{
190 struct msr_regs_info *rv = info;
191
192 rv->err = rdmsr_safe_regs(rv->regs);
193}
194
195static void __wrmsr_safe_regs_on_cpu(void *info)
196{
197 struct msr_regs_info *rv = info;
198
199 rv->err = wrmsr_safe_regs(rv->regs);
200}
201
202int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
203{
204 int err;
205 struct msr_regs_info rv;
206
207 rv.regs = regs;
208 rv.err = -EIO;
209 err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
210
211 return err ? err : rv.err;
212}
213EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
214
215int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
216{
217 int err;
218 struct msr_regs_info rv;
219
220 rv.regs = regs;
221 rv.err = -EIO;
222 err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
223
224 return err ? err : rv.err;
225}
226EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index eefdeee8a871..9b5a9f59a478 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,9 @@
1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o gup.o 2 pat.o pgtable.o physaddr.o gup.o
3
4# Make sure __phys_addr has no stackprotector
5nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp)
3 7
4obj-$(CONFIG_SMP) += tlb.o 8obj-$(CONFIG_SMP) += tlb.o
5 9
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bfae139182ff..775a020990a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
285 tsk->thread.screen_bitmap |= 1 << bit; 285 tsk->thread.screen_bitmap |= 1 << bit;
286} 286}
287 287
288static void dump_pagetable(unsigned long address) 288static bool low_pfn(unsigned long pfn)
289{ 289{
290 __typeof__(pte_val(__pte(0))) page; 290 return pfn < max_low_pfn;
291}
291 292
292 page = read_cr3(); 293static void dump_pagetable(unsigned long address)
293 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; 294{
295 pgd_t *base = __va(read_cr3());
296 pgd_t *pgd = &base[pgd_index(address)];
297 pmd_t *pmd;
298 pte_t *pte;
294 299
295#ifdef CONFIG_X86_PAE 300#ifdef CONFIG_X86_PAE
296 printk("*pdpt = %016Lx ", page); 301 printk("*pdpt = %016Lx ", pgd_val(*pgd));
297 if ((page >> PAGE_SHIFT) < max_low_pfn 302 if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
298 && page & _PAGE_PRESENT) { 303 goto out;
299 page &= PAGE_MASK;
300 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
301 & (PTRS_PER_PMD - 1)];
302 printk(KERN_CONT "*pde = %016Lx ", page);
303 page &= ~_PAGE_NX;
304 }
305#else
306 printk("*pde = %08lx ", page);
307#endif 304#endif
305 pmd = pmd_offset(pud_offset(pgd, address), address);
306 printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
308 307
309 /* 308 /*
310 * We must not directly access the pte in the highpte 309 * We must not directly access the pte in the highpte
@@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address)
312 * And let's rather not kmap-atomic the pte, just in case 311 * And let's rather not kmap-atomic the pte, just in case
313 * it's allocated already: 312 * it's allocated already:
314 */ 313 */
315 if ((page >> PAGE_SHIFT) < max_low_pfn 314 if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
316 && (page & _PAGE_PRESENT) 315 goto out;
317 && !(page & _PAGE_PSE)) {
318
319 page &= PAGE_MASK;
320 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
321 & (PTRS_PER_PTE - 1)];
322 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
323 }
324 316
317 pte = pte_offset_kernel(pmd, address);
318 printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
319out:
325 printk("\n"); 320 printk("\n");
326} 321}
327 322
@@ -450,16 +445,12 @@ static int bad_address(void *p)
450 445
451static void dump_pagetable(unsigned long address) 446static void dump_pagetable(unsigned long address)
452{ 447{
453 pgd_t *pgd; 448 pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
449 pgd_t *pgd = base + pgd_index(address);
454 pud_t *pud; 450 pud_t *pud;
455 pmd_t *pmd; 451 pmd_t *pmd;
456 pte_t *pte; 452 pte_t *pte;
457 453
458 pgd = (pgd_t *)read_cr3();
459
460 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
461
462 pgd += pgd_index(address);
463 if (bad_address(pgd)) 454 if (bad_address(pgd))
464 goto bad; 455 goto bad;
465 456
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 2112ed55e7ea..1617958a3805 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
24 * no global lock is needed and because the kmap code must perform a global TLB 24 * no global lock is needed and because the kmap code must perform a global TLB
25 * invalidation when the kmap pool wraps. 25 * invalidation when the kmap pool wraps.
26 * 26 *
27 * However when holding an atomic kmap is is not legal to sleep, so atomic 27 * However when holding an atomic kmap it is not legal to sleep, so atomic
28 * kmaps are appropriate for short, tight code paths only. 28 * kmaps are appropriate for short, tight code paths only.
29 */ 29 */
30void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) 30void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8a450930834f..04e1ad60c63a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,77 +22,7 @@
22#include <asm/pgalloc.h> 22#include <asm/pgalloc.h>
23#include <asm/pat.h> 23#include <asm/pat.h>
24 24
25static inline int phys_addr_valid(resource_size_t addr) 25#include "physaddr.h"
26{
27#ifdef CONFIG_PHYS_ADDR_T_64BIT
28 return !(addr >> boot_cpu_data.x86_phys_bits);
29#else
30 return 1;
31#endif
32}
33
34#ifdef CONFIG_X86_64
35
36unsigned long __phys_addr(unsigned long x)
37{
38 if (x >= __START_KERNEL_map) {
39 x -= __START_KERNEL_map;
40 VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
41 x += phys_base;
42 } else {
43 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
44 x -= PAGE_OFFSET;
45 VIRTUAL_BUG_ON(!phys_addr_valid(x));
46 }
47 return x;
48}
49EXPORT_SYMBOL(__phys_addr);
50
51bool __virt_addr_valid(unsigned long x)
52{
53 if (x >= __START_KERNEL_map) {
54 x -= __START_KERNEL_map;
55 if (x >= KERNEL_IMAGE_SIZE)
56 return false;
57 x += phys_base;
58 } else {
59 if (x < PAGE_OFFSET)
60 return false;
61 x -= PAGE_OFFSET;
62 if (!phys_addr_valid(x))
63 return false;
64 }
65
66 return pfn_valid(x >> PAGE_SHIFT);
67}
68EXPORT_SYMBOL(__virt_addr_valid);
69
70#else
71
72#ifdef CONFIG_DEBUG_VIRTUAL
73unsigned long __phys_addr(unsigned long x)
74{
75 /* VMALLOC_* aren't constants */
76 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
77 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
78 return x - PAGE_OFFSET;
79}
80EXPORT_SYMBOL(__phys_addr);
81#endif
82
83bool __virt_addr_valid(unsigned long x)
84{
85 if (x < PAGE_OFFSET)
86 return false;
87 if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
88 return false;
89 if (x >= FIXADDR_START)
90 return false;
91 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
92}
93EXPORT_SYMBOL(__virt_addr_valid);
94
95#endif
96 26
97int page_is_ram(unsigned long pagenr) 27int page_is_ram(unsigned long pagenr)
98{ 28{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 352aa9e927e2..b2f7d3e59b86 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -827,7 +827,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
827 return 0; 827 return 0;
828} 828}
829 829
830static struct seq_operations memtype_seq_ops = { 830static const struct seq_operations memtype_seq_ops = {
831 .start = memtype_seq_start, 831 .start = memtype_seq_start,
832 .next = memtype_seq_next, 832 .next = memtype_seq_next,
833 .stop = memtype_seq_stop, 833 .stop = memtype_seq_stop,
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
new file mode 100644
index 000000000000..d2e2735327b4
--- /dev/null
+++ b/arch/x86/mm/physaddr.c
@@ -0,0 +1,70 @@
1#include <linux/mmdebug.h>
2#include <linux/module.h>
3#include <linux/mm.h>
4
5#include <asm/page.h>
6
7#include "physaddr.h"
8
9#ifdef CONFIG_X86_64
10
11unsigned long __phys_addr(unsigned long x)
12{
13 if (x >= __START_KERNEL_map) {
14 x -= __START_KERNEL_map;
15 VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
16 x += phys_base;
17 } else {
18 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
19 x -= PAGE_OFFSET;
20 VIRTUAL_BUG_ON(!phys_addr_valid(x));
21 }
22 return x;
23}
24EXPORT_SYMBOL(__phys_addr);
25
26bool __virt_addr_valid(unsigned long x)
27{
28 if (x >= __START_KERNEL_map) {
29 x -= __START_KERNEL_map;
30 if (x >= KERNEL_IMAGE_SIZE)
31 return false;
32 x += phys_base;
33 } else {
34 if (x < PAGE_OFFSET)
35 return false;
36 x -= PAGE_OFFSET;
37 if (!phys_addr_valid(x))
38 return false;
39 }
40
41 return pfn_valid(x >> PAGE_SHIFT);
42}
43EXPORT_SYMBOL(__virt_addr_valid);
44
45#else
46
47#ifdef CONFIG_DEBUG_VIRTUAL
48unsigned long __phys_addr(unsigned long x)
49{
50 /* VMALLOC_* aren't constants */
51 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
52 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
53 return x - PAGE_OFFSET;
54}
55EXPORT_SYMBOL(__phys_addr);
56#endif
57
58bool __virt_addr_valid(unsigned long x)
59{
60 if (x < PAGE_OFFSET)
61 return false;
62 if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
63 return false;
64 if (x >= FIXADDR_START)
65 return false;
66 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
67}
68EXPORT_SYMBOL(__virt_addr_valid);
69
70#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/mm/physaddr.h b/arch/x86/mm/physaddr.h
new file mode 100644
index 000000000000..a3cd5a0c97b3
--- /dev/null
+++ b/arch/x86/mm/physaddr.h
@@ -0,0 +1,10 @@
1#include <asm/processor.h>
2
3static inline int phys_addr_valid(resource_size_t addr)
4{
5#ifdef CONFIG_PHYS_ADDR_T_64BIT
6 return !(addr >> boot_cpu_data.x86_phys_bits);
7#else
8 return 1;
9#endif
10}
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 29a0e37114f8..6f8aa33031c7 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void)
215 goto out_fail; 215 goto out_fail;
216 216
217 if (num_memory_chunks == 0) { 217 if (num_memory_chunks == 0) {
218 printk(KERN_WARNING 218 printk(KERN_DEBUG
219 "could not find any ACPI SRAT memory areas.\n"); 219 "could not find any ACPI SRAT memory areas.\n");
220 goto out_fail; 220 goto out_fail;
221 } 221 }
@@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void)
277 } 277 }
278 return 1; 278 return 1;
279out_fail: 279out_fail:
280 printk(KERN_ERR "failed to get NUMA memory information from SRAT" 280 printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
281 " table\n"); 281 " table\n");
282 return 0; 282 return 0;
283} 283}
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7410640db173..3bb4fc21f4f2 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -8,6 +8,7 @@ endif
8# Make sure early boot has no stackprotector 8# Make sure early boot has no stackprotector
9nostackp := $(call cc-option, -fno-stack-protector) 9nostackp := $(call cc-option, -fno-stack-protector)
10CFLAGS_enlighten.o := $(nostackp) 10CFLAGS_enlighten.o := $(nostackp)
11CFLAGS_mmu.o := $(nostackp)
11 12
12obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ 13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
13 time.o xen-asm.o xen-asm_$(BITS).o \ 14 time.o xen-asm.o xen-asm_$(BITS).o \
@@ -16,3 +17,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
16obj-$(CONFIG_SMP) += smp.o 17obj-$(CONFIG_SMP) += smp.o
17obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
18obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
20
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index eb33aaa8415d..0dd0c2c6cae0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,6 +51,7 @@
51#include <asm/pgtable.h> 51#include <asm/pgtable.h>
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/reboot.h> 53#include <asm/reboot.h>
54#include <asm/stackprotector.h>
54 55
55#include "xen-ops.h" 56#include "xen-ops.h"
56#include "mmu.h" 57#include "mmu.h"
@@ -330,18 +331,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
330 unsigned long frames[pages]; 331 unsigned long frames[pages];
331 int f; 332 int f;
332 333
333 /* A GDT can be up to 64k in size, which corresponds to 8192 334 /*
334 8-byte entries, or 16 4k pages.. */ 335 * A GDT can be up to 64k in size, which corresponds to 8192
336 * 8-byte entries, or 16 4k pages..
337 */
335 338
336 BUG_ON(size > 65536); 339 BUG_ON(size > 65536);
337 BUG_ON(va & ~PAGE_MASK); 340 BUG_ON(va & ~PAGE_MASK);
338 341
339 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 342 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
340 int level; 343 int level;
341 pte_t *ptep = lookup_address(va, &level); 344 pte_t *ptep;
342 unsigned long pfn, mfn; 345 unsigned long pfn, mfn;
343 void *virt; 346 void *virt;
344 347
348 /*
349 * The GDT is per-cpu and is in the percpu data area.
350 * That can be virtually mapped, so we need to do a
351 * page-walk to get the underlying MFN for the
352 * hypercall. The page can also be in the kernel's
353 * linear range, so we need to RO that mapping too.
354 */
355 ptep = lookup_address(va, &level);
345 BUG_ON(ptep == NULL); 356 BUG_ON(ptep == NULL);
346 357
347 pfn = pte_pfn(*ptep); 358 pfn = pte_pfn(*ptep);
@@ -358,6 +369,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
358 BUG(); 369 BUG();
359} 370}
360 371
372/*
373 * load_gdt for early boot, when the gdt is only mapped once
374 */
375static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
376{
377 unsigned long va = dtr->address;
378 unsigned int size = dtr->size + 1;
379 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
380 unsigned long frames[pages];
381 int f;
382
383 /*
384 * A GDT can be up to 64k in size, which corresponds to 8192
385 * 8-byte entries, or 16 4k pages..
386 */
387
388 BUG_ON(size > 65536);
389 BUG_ON(va & ~PAGE_MASK);
390
391 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
392 pte_t pte;
393 unsigned long pfn, mfn;
394
395 pfn = virt_to_pfn(va);
396 mfn = pfn_to_mfn(pfn);
397
398 pte = pfn_pte(pfn, PAGE_KERNEL_RO);
399
400 if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
401 BUG();
402
403 frames[f] = mfn;
404 }
405
406 if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
407 BUG();
408}
409
361static void load_TLS_descriptor(struct thread_struct *t, 410static void load_TLS_descriptor(struct thread_struct *t,
362 unsigned int cpu, unsigned int i) 411 unsigned int cpu, unsigned int i)
363{ 412{
@@ -581,6 +630,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
581 preempt_enable(); 630 preempt_enable();
582} 631}
583 632
633/*
634 * Version of write_gdt_entry for use at early boot-time needed to
635 * update an entry as simply as possible.
636 */
637static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
638 const void *desc, int type)
639{
640 switch (type) {
641 case DESC_LDT:
642 case DESC_TSS:
643 /* ignore */
644 break;
645
646 default: {
647 xmaddr_t maddr = virt_to_machine(&dt[entry]);
648
649 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
650 dt[entry] = *(struct desc_struct *)desc;
651 }
652
653 }
654}
655
584static void xen_load_sp0(struct tss_struct *tss, 656static void xen_load_sp0(struct tss_struct *tss,
585 struct thread_struct *thread) 657 struct thread_struct *thread)
586{ 658{
@@ -714,7 +786,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
714 set: 786 set:
715 base = ((u64)high << 32) | low; 787 base = ((u64)high << 32) | low;
716 if (HYPERVISOR_set_segment_base(which, base) != 0) 788 if (HYPERVISOR_set_segment_base(which, base) != 0)
717 ret = -EFAULT; 789 ret = -EIO;
718 break; 790 break;
719#endif 791#endif
720 792
@@ -965,6 +1037,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
965 .emergency_restart = xen_emergency_restart, 1037 .emergency_restart = xen_emergency_restart,
966}; 1038};
967 1039
1040/*
1041 * Set up the GDT and segment registers for -fstack-protector. Until
1042 * we do this, we have to be careful not to call any stack-protected
1043 * function, which is most of the kernel.
1044 */
1045static void __init xen_setup_stackprotector(void)
1046{
1047 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1048 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1049
1050 setup_stack_canary_segment(0);
1051 switch_to_new_gdt(0);
1052
1053 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
1054 pv_cpu_ops.load_gdt = xen_load_gdt;
1055}
1056
968/* First C function to be called on Xen boot */ 1057/* First C function to be called on Xen boot */
969asmlinkage void __init xen_start_kernel(void) 1058asmlinkage void __init xen_start_kernel(void)
970{ 1059{
@@ -983,13 +1072,28 @@ asmlinkage void __init xen_start_kernel(void)
983 pv_apic_ops = xen_apic_ops; 1072 pv_apic_ops = xen_apic_ops;
984 pv_mmu_ops = xen_mmu_ops; 1073 pv_mmu_ops = xen_mmu_ops;
985 1074
986#ifdef CONFIG_X86_64
987 /* 1075 /*
988 * Setup percpu state. We only need to do this for 64-bit 1076 * Set up some pagetable state before starting to set any ptes.
989 * because 32-bit already has %fs set properly.
990 */ 1077 */
991 load_percpu_segment(0); 1078
992#endif 1079 /* Prevent unwanted bits from being set in PTEs. */
1080 __supported_pte_mask &= ~_PAGE_GLOBAL;
1081 if (!xen_initial_domain())
1082 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1083
1084 __supported_pte_mask |= _PAGE_IOMAP;
1085
1086 xen_setup_features();
1087
1088 /* Get mfn list */
1089 if (!xen_feature(XENFEAT_auto_translated_physmap))
1090 xen_build_dynamic_phys_to_machine();
1091
1092 /*
1093 * Set up kernel GDT and segment registers, mainly so that
1094 * -fstack-protector code can be executed.
1095 */
1096 xen_setup_stackprotector();
993 1097
994 xen_init_irq_ops(); 1098 xen_init_irq_ops();
995 xen_init_cpuid_mask(); 1099 xen_init_cpuid_mask();
@@ -1001,8 +1105,6 @@ asmlinkage void __init xen_start_kernel(void)
1001 set_xen_basic_apic_ops(); 1105 set_xen_basic_apic_ops();
1002#endif 1106#endif
1003 1107
1004 xen_setup_features();
1005
1006 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1108 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1007 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1109 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1008 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1110 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1019,17 +1121,8 @@ asmlinkage void __init xen_start_kernel(void)
1019 1121
1020 xen_smp_init(); 1122 xen_smp_init();
1021 1123
1022 /* Get mfn list */
1023 if (!xen_feature(XENFEAT_auto_translated_physmap))
1024 xen_build_dynamic_phys_to_machine();
1025
1026 pgd = (pgd_t *)xen_start_info->pt_base; 1124 pgd = (pgd_t *)xen_start_info->pt_base;
1027 1125
1028 /* Prevent unwanted bits from being set in PTEs. */
1029 __supported_pte_mask &= ~_PAGE_GLOBAL;
1030 if (!xen_initial_domain())
1031 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1032
1033#ifdef CONFIG_X86_64 1126#ifdef CONFIG_X86_64
1034 /* Work out if we support NX */ 1127 /* Work out if we support NX */
1035 check_efer(); 1128 check_efer();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 429834ec1687..fe03eeed7b48 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
236 ctxt->user_regs.ss = __KERNEL_DS; 236 ctxt->user_regs.ss = __KERNEL_DS;
237#ifdef CONFIG_X86_32 237#ifdef CONFIG_X86_32
238 ctxt->user_regs.fs = __KERNEL_PERCPU; 238 ctxt->user_regs.fs = __KERNEL_PERCPU;
239 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
239#else 240#else
240 ctxt->gs_base_kernel = per_cpu_offset(cpu); 241 ctxt->gs_base_kernel = per_cpu_offset(cpu);
241#endif 242#endif
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 5601506f2dd9..36a5141108df 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -187,7 +187,6 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
187 struct xen_spinlock *prev; 187 struct xen_spinlock *prev;
188 int irq = __get_cpu_var(lock_kicker_irq); 188 int irq = __get_cpu_var(lock_kicker_irq);
189 int ret; 189 int ret;
190 unsigned long flags;
191 u64 start; 190 u64 start;
192 191
193 /* If kicker interrupts not initialized yet, just spin */ 192 /* If kicker interrupts not initialized yet, just spin */
@@ -199,16 +198,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
199 /* announce we're spinning */ 198 /* announce we're spinning */
200 prev = spinning_lock(xl); 199 prev = spinning_lock(xl);
201 200
202 flags = __raw_local_save_flags();
203 if (irq_enable) {
204 ADD_STATS(taken_slow_irqenable, 1);
205 raw_local_irq_enable();
206 }
207
208 ADD_STATS(taken_slow, 1); 201 ADD_STATS(taken_slow, 1);
209 ADD_STATS(taken_slow_nested, prev != NULL); 202 ADD_STATS(taken_slow_nested, prev != NULL);
210 203
211 do { 204 do {
205 unsigned long flags;
206
212 /* clear pending */ 207 /* clear pending */
213 xen_clear_irq_pending(irq); 208 xen_clear_irq_pending(irq);
214 209
@@ -228,6 +223,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
228 goto out; 223 goto out;
229 } 224 }
230 225
226 flags = __raw_local_save_flags();
227 if (irq_enable) {
228 ADD_STATS(taken_slow_irqenable, 1);
229 raw_local_irq_enable();
230 }
231
231 /* 232 /*
232 * Block until irq becomes pending. If we're 233 * Block until irq becomes pending. If we're
233 * interrupted at this point (after the trylock but 234 * interrupted at this point (after the trylock but
@@ -238,13 +239,15 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
238 * pending. 239 * pending.
239 */ 240 */
240 xen_poll_irq(irq); 241 xen_poll_irq(irq);
242
243 raw_local_irq_restore(flags);
244
241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 245 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 246 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
243 247
244 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 248 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
245 249
246out: 250out:
247 raw_local_irq_restore(flags);
248 unspinning_lock(xl, prev); 251 unspinning_lock(xl, prev);
249 spin_time_accum_blocked(start); 252 spin_time_accum_blocked(start);
250 253
@@ -323,8 +326,13 @@ static void xen_spin_unlock(struct raw_spinlock *lock)
323 smp_wmb(); /* make sure no writes get moved after unlock */ 326 smp_wmb(); /* make sure no writes get moved after unlock */
324 xl->lock = 0; /* release lock */ 327 xl->lock = 0; /* release lock */
325 328
326 /* make sure unlock happens before kick */ 329 /*
327 barrier(); 330 * Make sure unlock happens before checking for waiting
331 * spinners. We need a strong barrier to enforce the
332 * write-read ordering to different memory locations, as the
333 * CPU makes no implied guarantees about their ordering.
334 */
335 mb();
328 336
329 if (unlikely(xl->spinners)) 337 if (unlikely(xl->spinners))
330 xen_spin_unlock_slow(xl); 338 xen_spin_unlock_slow(xl);
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index dd1a7a4a1cea..beb3a6bdb61d 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -68,4 +68,7 @@
68#define SO_TIMESTAMPING 37 68#define SO_TIMESTAMPING 37
69#define SCM_TIMESTAMPING SO_TIMESTAMPING 69#define SCM_TIMESTAMPING SO_TIMESTAMPING
70 70
71#define SO_PROTOCOL 38
72#define SO_DOMAIN 39
73
71#endif /* _XTENSA_SOCKET_H */ 74#endif /* _XTENSA_SOCKET_H */
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index edad4156d89a..2f0b86b37cf9 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -545,7 +545,7 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
545 spin_unlock_irqrestore(&lp->lock, flags); 545 spin_unlock_irqrestore(&lp->lock, flags);
546 546
547 dev_kfree_skb(skb); 547 dev_kfree_skb(skb);
548 return 0; 548 return NETDEV_TX_OK;
549} 549}
550 550
551 551