aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c41
-rw-r--r--arch/x86/kernel/apic_64.c237
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c13
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c10
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h14
-rw-r--r--arch/x86/kernel/signal_32.c45
-rw-r--r--arch/x86/kernel/signal_64.c95
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/xsave.c316
30 files changed, 1896 insertions, 283 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..d6ea91abaebc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
38 38
39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
40obj-y += process.o 40obj-y += process.o
41obj-y += i387.o 41obj-y += i387.o xsave.o
42obj-y += ptrace.o 42obj-y += ptrace.o
43obj-y += ds.o 43obj-y += ds.o
44obj-$(CONFIG_X86_32) += tls.o 44obj-$(CONFIG_X86_32) += tls.o
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 104ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 106 obj-y += bios_uv.o
107 obj-y += genx2apic_cluster.o
108 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 109 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 110 obj-$(CONFIG_AUDIT) += audit_64.o
109 111
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd211cd..27ef365e757d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -775,7 +775,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
775 775
776 set_fixmap_nocache(FIX_APIC_BASE, address); 776 set_fixmap_nocache(FIX_APIC_BASE, address);
777 if (boot_cpu_physical_apicid == -1U) { 777 if (boot_cpu_physical_apicid == -1U) {
778 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 778 boot_cpu_physical_apicid = read_apic_id();
779#ifdef CONFIG_X86_32 779#ifdef CONFIG_X86_32
780 apic_version[boot_cpu_physical_apicid] = 780 apic_version[boot_cpu_physical_apicid] =
781 GET_APIC_VERSION(apic_read(APIC_LVR)); 781 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1351,7 +1351,9 @@ static void __init acpi_process_madt(void)
1351 acpi_ioapic = 1; 1351 acpi_ioapic = 1;
1352 1352
1353 smp_found_config = 1; 1353 smp_found_config = 1;
1354#ifdef CONFIG_X86_32
1354 setup_apic_routing(); 1355 setup_apic_routing();
1356#endif
1355 } 1357 }
1356 } 1358 }
1357 if (error == -EINVAL) { 1359 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b0..44cae65e32ef 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 145 return lapic_get_version() >= 0x14;
146} 146}
147 147
148void apic_wait_icr_idle(void) 148/*
149 * Paravirt kernels also might be using these below ops. So we still
150 * use generic apic_read()/apic_write(), which might be pointing to different
151 * ops in PARAVIRT case.
152 */
153void xapic_wait_icr_idle(void)
149{ 154{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 155 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 156 cpu_relax();
152} 157}
153 158
154u32 safe_apic_wait_icr_idle(void) 159u32 safe_xapic_wait_icr_idle(void)
155{ 160{
156 u32 send_status; 161 u32 send_status;
157 int timeout; 162 int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 172 return send_status;
168} 173}
169 174
175void xapic_icr_write(u32 low, u32 id)
176{
177 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
178 apic_write(APIC_ICR, low);
179}
180
181u64 xapic_icr_read(void)
182{
183 u32 icr1, icr2;
184
185 icr2 = apic_read(APIC_ICR2);
186 icr1 = apic_read(APIC_ICR);
187
188 return icr1 | ((u64)icr2 << 32);
189}
190
191static struct apic_ops xapic_ops = {
192 .read = native_apic_mem_read,
193 .write = native_apic_mem_write,
194 .icr_read = xapic_icr_read,
195 .icr_write = xapic_icr_write,
196 .wait_icr_idle = xapic_wait_icr_idle,
197 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
198};
199
200struct apic_ops __read_mostly *apic_ops = &xapic_ops;
201EXPORT_SYMBOL_GPL(apic_ops);
202
170/** 203/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 204 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 205 */
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1238 * default configuration (or the MP table is broken).
1206 */ 1239 */
1207 if (boot_cpu_physical_apicid == -1U) 1240 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1241 boot_cpu_physical_apicid = read_apic_id();
1209 1242
1210} 1243}
1211 1244
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1275 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1276 */
1244#ifdef CONFIG_CRASH_DUMP 1277#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1278 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1279#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1280 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1281
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831c..b6256587f99e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,6 +40,7 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
@@ -46,6 +48,11 @@
46static int disable_apic_timer __cpuinitdata; 48static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 49static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 50int disable_apic;
51int disable_x2apic;
52int x2apic;
53
54/* x2apic enabled before OS handover */
55int x2apic_preenabled;
49 56
50/* Local APIC timer works in C2 */ 57/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 58int local_apic_timer_c2_ok;
@@ -118,13 +125,13 @@ static int modern_apic(void)
118 return lapic_get_version() >= 0x14; 125 return lapic_get_version() >= 0x14;
119} 126}
120 127
121void apic_wait_icr_idle(void) 128void xapic_wait_icr_idle(void)
122{ 129{
123 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 130 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
124 cpu_relax(); 131 cpu_relax();
125} 132}
126 133
127u32 safe_apic_wait_icr_idle(void) 134u32 safe_xapic_wait_icr_idle(void)
128{ 135{
129 u32 send_status; 136 u32 send_status;
130 int timeout; 137 int timeout;
@@ -140,6 +147,69 @@ u32 safe_apic_wait_icr_idle(void)
140 return send_status; 147 return send_status;
141} 148}
142 149
150void xapic_icr_write(u32 low, u32 id)
151{
152 apic_write(APIC_ICR2, id << 24);
153 apic_write(APIC_ICR, low);
154}
155
156u64 xapic_icr_read(void)
157{
158 u32 icr1, icr2;
159
160 icr2 = apic_read(APIC_ICR2);
161 icr1 = apic_read(APIC_ICR);
162
163 return (icr1 | ((u64)icr2 << 32));
164}
165
166static struct apic_ops xapic_ops = {
167 .read = native_apic_mem_read,
168 .write = native_apic_mem_write,
169 .icr_read = xapic_icr_read,
170 .icr_write = xapic_icr_write,
171 .wait_icr_idle = xapic_wait_icr_idle,
172 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
173};
174
175struct apic_ops __read_mostly *apic_ops = &xapic_ops;
176
177EXPORT_SYMBOL_GPL(apic_ops);
178
179static void x2apic_wait_icr_idle(void)
180{
181 /* no need to wait for icr idle in x2apic */
182 return;
183}
184
185static u32 safe_x2apic_wait_icr_idle(void)
186{
187 /* no need to wait for icr idle in x2apic */
188 return 0;
189}
190
191void x2apic_icr_write(u32 low, u32 id)
192{
193 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
194}
195
196u64 x2apic_icr_read(void)
197{
198 unsigned long val;
199
200 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
201 return val;
202}
203
204static struct apic_ops x2apic_ops = {
205 .read = native_apic_msr_read,
206 .write = native_apic_msr_write,
207 .icr_read = x2apic_icr_read,
208 .icr_write = x2apic_icr_write,
209 .wait_icr_idle = x2apic_wait_icr_idle,
210 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
211};
212
143/** 213/**
144 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 214 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
145 */ 215 */
@@ -629,10 +699,10 @@ int __init verify_local_APIC(void)
629 /* 699 /*
630 * The ID register is read/write in a real APIC. 700 * The ID register is read/write in a real APIC.
631 */ 701 */
632 reg0 = read_apic_id(); 702 reg0 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 703 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
634 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 704 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
635 reg1 = read_apic_id(); 705 reg1 = apic_read(APIC_ID);
636 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 706 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
637 apic_write(APIC_ID, reg0); 707 apic_write(APIC_ID, reg0);
638 if (reg1 != (reg0 ^ APIC_ID_MASK)) 708 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -833,6 +903,125 @@ void __cpuinit end_local_APIC_setup(void)
833 apic_pm_activate(); 903 apic_pm_activate();
834} 904}
835 905
906void check_x2apic(void)
907{
908 int msr, msr2;
909
910 rdmsr(MSR_IA32_APICBASE, msr, msr2);
911
912 if (msr & X2APIC_ENABLE) {
913 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
914 x2apic_preenabled = x2apic = 1;
915 apic_ops = &x2apic_ops;
916 }
917}
918
919void enable_x2apic(void)
920{
921 int msr, msr2;
922
923 rdmsr(MSR_IA32_APICBASE, msr, msr2);
924 if (!(msr & X2APIC_ENABLE)) {
925 printk("Enabling x2apic\n");
926 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
927 }
928}
929
930void enable_IR_x2apic(void)
931{
932#ifdef CONFIG_INTR_REMAP
933 int ret;
934 unsigned long flags;
935
936 if (!cpu_has_x2apic)
937 return;
938
939 if (!x2apic_preenabled && disable_x2apic) {
940 printk(KERN_INFO
941 "Skipped enabling x2apic and Interrupt-remapping "
942 "because of nox2apic\n");
943 return;
944 }
945
946 if (x2apic_preenabled && disable_x2apic)
947 panic("Bios already enabled x2apic, can't enforce nox2apic");
948
949 if (!x2apic_preenabled && skip_ioapic_setup) {
950 printk(KERN_INFO
951 "Skipped enabling x2apic and Interrupt-remapping "
952 "because of skipping io-apic setup\n");
953 return;
954 }
955
956 ret = dmar_table_init();
957 if (ret) {
958 printk(KERN_INFO
959 "dmar_table_init() failed with %d:\n", ret);
960
961 if (x2apic_preenabled)
962 panic("x2apic enabled by bios. But IR enabling failed");
963 else
964 printk(KERN_INFO
965 "Not enabling x2apic,Intr-remapping\n");
966 return;
967 }
968
969 local_irq_save(flags);
970 mask_8259A();
971 save_mask_IO_APIC_setup();
972
973 ret = enable_intr_remapping(1);
974
975 if (ret && x2apic_preenabled) {
976 local_irq_restore(flags);
977 panic("x2apic enabled by bios. But IR enabling failed");
978 }
979
980 if (ret)
981 goto end;
982
983 if (!x2apic) {
984 x2apic = 1;
985 apic_ops = &x2apic_ops;
986 enable_x2apic();
987 }
988end:
989 if (ret)
990 /*
991 * IR enabling failed
992 */
993 restore_IO_APIC_setup();
994 else
995 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
996
997 unmask_8259A();
998 local_irq_restore(flags);
999
1000 if (!ret) {
1001 if (!x2apic_preenabled)
1002 printk(KERN_INFO
1003 "Enabled x2apic and interrupt-remapping\n");
1004 else
1005 printk(KERN_INFO
1006 "Enabled Interrupt-remapping\n");
1007 } else
1008 printk(KERN_ERR
1009 "Failed to enable Interrupt-remapping and x2apic\n");
1010#else
1011 if (!cpu_has_x2apic)
1012 return;
1013
1014 if (x2apic_preenabled)
1015 panic("x2apic enabled prior OS handover,"
1016 " enable CONFIG_INTR_REMAP");
1017
1018 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1019 " and x2apic\n");
1020#endif
1021
1022 return;
1023}
1024
836/* 1025/*
837 * Detect and enable local APICs on non-SMP boards. 1026 * Detect and enable local APICs on non-SMP boards.
838 * Original code written by Keir Fraser. 1027 * Original code written by Keir Fraser.
@@ -872,7 +1061,7 @@ void __init early_init_lapic_mapping(void)
872 * Fetch the APIC ID of the BSP in case we have a 1061 * Fetch the APIC ID of the BSP in case we have a
873 * default configuration (or the MP table is broken). 1062 * default configuration (or the MP table is broken).
874 */ 1063 */
875 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1064 boot_cpu_physical_apicid = read_apic_id();
876} 1065}
877 1066
878/** 1067/**
@@ -880,6 +1069,11 @@ void __init early_init_lapic_mapping(void)
880 */ 1069 */
881void __init init_apic_mappings(void) 1070void __init init_apic_mappings(void)
882{ 1071{
1072 if (x2apic) {
1073 boot_cpu_physical_apicid = read_apic_id();
1074 return;
1075 }
1076
883 /* 1077 /*
884 * If no local APIC can be found then set up a fake all 1078 * If no local APIC can be found then set up a fake all
885 * zeroes page to simulate the local APIC and another 1079 * zeroes page to simulate the local APIC and another
@@ -899,7 +1093,7 @@ void __init init_apic_mappings(void)
899 * Fetch the APIC ID of the BSP in case we have a 1093 * Fetch the APIC ID of the BSP in case we have a
900 * default configuration (or the MP table is broken). 1094 * default configuration (or the MP table is broken).
901 */ 1095 */
902 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1096 boot_cpu_physical_apicid = read_apic_id();
903} 1097}
904 1098
905/* 1099/*
@@ -918,6 +1112,9 @@ int __init APIC_init_uniprocessor(void)
918 return -1; 1112 return -1;
919 } 1113 }
920 1114
1115 enable_IR_x2apic();
1116 setup_apic_routing();
1117
921 verify_local_APIC(); 1118 verify_local_APIC();
922 1119
923 connect_bsp_APIC(); 1120 connect_bsp_APIC();
@@ -1093,6 +1290,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1093 cpu_set(cpu, cpu_present_map); 1290 cpu_set(cpu, cpu_present_map);
1094} 1291}
1095 1292
1293int hard_smp_processor_id(void)
1294{
1295 return read_apic_id();
1296}
1297
1096/* 1298/*
1097 * Power management 1299 * Power management
1098 */ 1300 */
@@ -1129,7 +1331,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1129 1331
1130 maxlvt = lapic_get_maxlvt(); 1332 maxlvt = lapic_get_maxlvt();
1131 1333
1132 apic_pm_state.apic_id = read_apic_id(); 1334 apic_pm_state.apic_id = apic_read(APIC_ID);
1133 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1335 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1134 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1336 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1135 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1337 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1164,10 +1366,14 @@ static int lapic_resume(struct sys_device *dev)
1164 maxlvt = lapic_get_maxlvt(); 1366 maxlvt = lapic_get_maxlvt();
1165 1367
1166 local_irq_save(flags); 1368 local_irq_save(flags);
1167 rdmsr(MSR_IA32_APICBASE, l, h); 1369 if (!x2apic) {
1168 l &= ~MSR_IA32_APICBASE_BASE; 1370 rdmsr(MSR_IA32_APICBASE, l, h);
1169 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1371 l &= ~MSR_IA32_APICBASE_BASE;
1170 wrmsr(MSR_IA32_APICBASE, l, h); 1372 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1373 wrmsr(MSR_IA32_APICBASE, l, h);
1374 } else
1375 enable_x2apic();
1376
1171 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1377 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1172 apic_write(APIC_ID, apic_pm_state.apic_id); 1378 apic_write(APIC_ID, apic_pm_state.apic_id);
1173 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1379 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1307,6 +1513,15 @@ __cpuinit int apic_is_clustered_box(void)
1307 return (clusters > 2); 1513 return (clusters > 2);
1308} 1514}
1309 1515
1516static __init int setup_nox2apic(char *str)
1517{
1518 disable_x2apic = 1;
1519 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1520 return 0;
1521}
1522early_param("nox2apic", setup_nox2apic);
1523
1524
1310/* 1525/*
1311 * APIC command line parameters 1526 * APIC command line parameters
1312 */ 1527 */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006fe..505543a75a56 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
22 22
23#define __NO_STUBS 1 23#define __NO_STUBS 1
24#undef __SYSCALL 24#undef __SYSCALL
25#undef _ASM_X86_64_UNISTD_H_ 25#undef ASM_X86__UNISTD_64_H
26#define __SYSCALL(nr, sym) [nr] = 1, 26#define __SYSCALL(nr, sym) [nr] = 1,
27static char syscalls[] = { 27static char syscalls[] = {
28#include <asm/unistd.h> 28#include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0785b3c8d043..c63ec65f484c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -739,9 +739,20 @@ void __cpuinit cpu_init(void)
739 /* 739 /*
740 * Force FPU initialization: 740 * Force FPU initialization:
741 */ 741 */
742 current_thread_info()->status = 0; 742 if (cpu_has_xsave)
743 current_thread_info()->status = TS_XSAVE;
744 else
745 current_thread_info()->status = 0;
743 clear_used_math(); 746 clear_used_math();
744 mxcsr_feature_mask_init(); 747 mxcsr_feature_mask_init();
748
749 /*
750 * Boot processor to setup the FP and extended state context info.
751 */
752 if (!smp_processor_id())
753 init_thread_xstate();
754
755 xsave_init();
745} 756}
746 757
747#ifdef CONFIG_HOTPLUG_CPU 758#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index c3afba5a81a7..af569a964e74 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -633,6 +633,8 @@ void __cpuinit cpu_init(void)
633 barrier(); 633 barrier();
634 634
635 check_efer(); 635 check_efer();
636 if (cpu != 0 && x2apic)
637 enable_x2apic();
636 638
637 /* 639 /*
638 * set up and load the per-CPU TSS 640 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb1444..6c9bfc9e1e95 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..fed9f68efd66
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int x2apic_read_id(void)
124{
125 return apic_read(APIC_ID);
126}
127
128static unsigned int phys_pkg_id(int index_msb)
129{
130 return x2apic_read_id() >> index_msb;
131}
132
133static void x2apic_send_IPI_self(int vector)
134{
135 apic_write(APIC_SELF_IPI, vector);
136}
137
138static void init_x2apic_ldr(void)
139{
140 int cpu = smp_processor_id();
141
142 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
143 return;
144}
145
146struct genapic apic_x2apic_cluster = {
147 .name = "cluster x2apic",
148 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
149 .int_delivery_mode = dest_LowestPrio,
150 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
151 .target_cpus = x2apic_target_cpus,
152 .vector_allocation_domain = x2apic_vector_allocation_domain,
153 .apic_id_registered = x2apic_apic_id_registered,
154 .init_apic_ldr = init_x2apic_ldr,
155 .send_IPI_all = x2apic_send_IPI_all,
156 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
157 .send_IPI_mask = x2apic_send_IPI_mask,
158 .send_IPI_self = x2apic_send_IPI_self,
159 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
160 .phys_pkg_id = phys_pkg_id,
161 .get_apic_id = get_apic_id,
162 .set_apic_id = set_apic_id,
163 .apic_id_mask = (0xFFFFFFFFu),
164};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..958d537b4cc9
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int x2apic_read_id(void)
122{
123 return apic_read(APIC_ID);
124}
125
126static unsigned int phys_pkg_id(int index_msb)
127{
128 return x2apic_read_id() >> index_msb;
129}
130
131void x2apic_send_IPI_self(int vector)
132{
133 apic_write(APIC_SELF_IPI, vector);
134}
135
136void init_x2apic_ldr(void)
137{
138 return;
139}
140
141struct genapic apic_x2apic_phys = {
142 .name = "physical x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_Fixed,
145 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16be..99269beabc7c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,35 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 58DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 59EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 60
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
123 return 1; 152 return 1;
124} 153}
125 154
155static void uv_init_apic_ldr(void)
156{
157}
158
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 159static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 160{
128 int cpu; 161 int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 171 return BAD_APICID;
139} 172}
140 173
174static unsigned int get_apic_id(unsigned long x)
175{
176 unsigned int id;
177
178 WARN_ON(preemptible() && num_online_cpus() > 1);
179 id = x | __get_cpu_var(x2apic_extra_bits);
180
181 return id;
182}
183
184static unsigned long set_apic_id(unsigned int id)
185{
186 unsigned long x;
187
188 /* maskout x2apic_extra_bits ? */
189 x = id;
190 return x;
191}
192
193static unsigned int uv_read_apic_id(void)
194{
195
196 return get_apic_id(apic_read(APIC_ID));
197}
198
141static unsigned int phys_pkg_id(int index_msb) 199static unsigned int phys_pkg_id(int index_msb)
142{ 200{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 201 return uv_read_apic_id() >> index_msb;
144} 202}
145 203
146#ifdef ZZZ /* Needs x2apic patch */ 204#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
152 210
153struct genapic apic_x2apic_uv_x = { 211struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 212 .name = "UV large system",
213 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 214 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 215 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 216 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 217 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 218 .apic_id_registered = uv_apic_id_registered,
219 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 220 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 221 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 222 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 223 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 224 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 225 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
226 .get_apic_id = get_apic_id,
227 .set_apic_id = set_apic_id,
228 .apic_id_mask = (0xFFFFFFFFu),
166}; 229};
167 230
168static __cpuinit void set_x2apic_extra_bits(int pnode) 231static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +464,5 @@ void __cpuinit uv_cpu_init(void)
401 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 464 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
402 set_x2apic_extra_bits(uv_hub_info->pnode); 465 set_x2apic_extra_bits(uv_hub_info->pnode);
403} 466}
467
468
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb82..45723f1fe198 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
21# include <asm/sigcontext32.h> 21# include <asm/sigcontext32.h>
22# include <asm/user32.h> 22# include <asm/user32.h>
23#else 23#else
24# define save_i387_ia32 save_i387 24# define save_i387_xstate_ia32 save_i387_xstate
25# define restore_i387_ia32 restore_i387 25# define restore_i387_xstate_ia32 restore_i387_xstate
26# define _fpstate_ia32 _fpstate 26# define _fpstate_ia32 _fpstate
27# define _xstate_ia32 _xstate
28# define sig_xstate_ia32_size sig_xstate_size
29# define fx_sw_reserved_ia32 fx_sw_reserved
27# define user_i387_ia32_struct user_i387_struct 30# define user_i387_ia32_struct user_i387_struct
28# define user32_fxsr_struct user_fxsr_struct 31# define user32_fxsr_struct user_fxsr_struct
29#endif 32#endif
@@ -36,6 +39,7 @@
36 39
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 40static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size; 41unsigned int xstate_size;
42unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
39static struct i387_fxsave_struct fx_scratch __cpuinitdata; 43static struct i387_fxsave_struct fx_scratch __cpuinitdata;
40 44
41void __cpuinit mxcsr_feature_mask_init(void) 45void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
61 return; 65 return;
62 } 66 }
63 67
68 if (cpu_has_xsave) {
69 xsave_cntxt_init();
70 return;
71 }
72
64 if (cpu_has_fxsr) 73 if (cpu_has_fxsr)
65 xstate_size = sizeof(struct i387_fxsave_struct); 74 xstate_size = sizeof(struct i387_fxsave_struct);
66#ifdef CONFIG_X86_32 75#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
83 92
84 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 93 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
85 94
95 /*
96 * Boot processor to setup the FP and extended state context info.
97 */
98 if (!smp_processor_id())
99 init_thread_xstate();
100 xsave_init();
101
86 mxcsr_feature_mask_init(); 102 mxcsr_feature_mask_init();
87 /* clean state in init */ 103 /* clean state in init */
88 current_thread_info()->status = 0; 104 if (cpu_has_xsave)
105 current_thread_info()->status = TS_XSAVE;
106 else
107 current_thread_info()->status = 0;
89 clear_used_math(); 108 clear_used_math();
90} 109}
91#endif /* CONFIG_X86_64 */ 110#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
195 */ 214 */
196 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 215 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
197 216
217 /*
218 * update the header bits in the xsave header, indicating the
219 * presence of FP and SSE state.
220 */
221 if (cpu_has_xsave)
222 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
223
198 return ret; 224 return ret;
199} 225}
200 226
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
395 if (!ret) 421 if (!ret)
396 convert_to_fxsr(target, &env); 422 convert_to_fxsr(target, &env);
397 423
424 /*
425 * update the header bit in the xsave header, indicating the
426 * presence of FP.
427 */
428 if (cpu_has_xsave)
429 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
398 return ret; 430 return ret;
399} 431}
400 432
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
407 struct task_struct *tsk = current; 439 struct task_struct *tsk = current;
408 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; 440 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
409 441
410 unlazy_fpu(tsk);
411 fp->status = fp->swd; 442 fp->status = fp->swd;
412 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) 443 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
413 return -1; 444 return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
421 struct user_i387_ia32_struct env; 452 struct user_i387_ia32_struct env;
422 int err = 0; 453 int err = 0;
423 454
424 unlazy_fpu(tsk);
425
426 convert_from_fxsr(&env, tsk); 455 convert_from_fxsr(&env, tsk);
427 if (__copy_to_user(buf, &env, sizeof(env))) 456 if (__copy_to_user(buf, &env, sizeof(env)))
428 return -1; 457 return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
432 if (err) 461 if (err)
433 return -1; 462 return -1;
434 463
435 if (__copy_to_user(&buf->_fxsr_env[0], fx, 464 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
436 sizeof(struct i387_fxsave_struct)))
437 return -1; 465 return -1;
438 return 1; 466 return 1;
439} 467}
440 468
441int save_i387_ia32(struct _fpstate_ia32 __user *buf) 469static int save_i387_xsave(void __user *buf)
470{
471 struct _fpstate_ia32 __user *fx = buf;
472 int err = 0;
473
474 if (save_i387_fxsave(fx) < 0)
475 return -1;
476
477 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
478 sizeof(struct _fpx_sw_bytes));
479 err |= __put_user(FP_XSTATE_MAGIC2,
480 (__u32 __user *) (buf + sig_xstate_ia32_size
481 - FP_XSTATE_MAGIC2_SIZE));
482 if (err)
483 return -1;
484
485 return 1;
486}
487
488int save_i387_xstate_ia32(void __user *buf)
442{ 489{
490 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
491 struct task_struct *tsk = current;
492
443 if (!used_math()) 493 if (!used_math())
444 return 0; 494 return 0;
495
496 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
497 return -EACCES;
445 /* 498 /*
446 * This will cause a "finit" to be triggered by the next 499 * This will cause a "finit" to be triggered by the next
447 * attempted FPU operation by the 'current' process. 500 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
451 if (!HAVE_HWFP) { 504 if (!HAVE_HWFP) {
452 return fpregs_soft_get(current, NULL, 505 return fpregs_soft_get(current, NULL,
453 0, sizeof(struct user_i387_ia32_struct), 506 0, sizeof(struct user_i387_ia32_struct),
454 NULL, buf) ? -1 : 1; 507 NULL, fp) ? -1 : 1;
455 } 508 }
456 509
510 unlazy_fpu(tsk);
511
512 if (cpu_has_xsave)
513 return save_i387_xsave(fp);
457 if (cpu_has_fxsr) 514 if (cpu_has_fxsr)
458 return save_i387_fxsave(buf); 515 return save_i387_fxsave(fp);
459 else 516 else
460 return save_i387_fsave(buf); 517 return save_i387_fsave(fp);
461} 518}
462 519
463static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) 520static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
468 sizeof(struct i387_fsave_struct)); 525 sizeof(struct i387_fsave_struct));
469} 526}
470 527
471static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) 528static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
529 unsigned int size)
472{ 530{
473 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
474 struct user_i387_ia32_struct env; 532 struct user_i387_ia32_struct env;
475 int err; 533 int err;
476 534
477 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], 535 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
478 sizeof(struct i387_fxsave_struct)); 536 size);
479 /* mxcsr reserved bits must be masked to zero for security reasons */ 537 /* mxcsr reserved bits must be masked to zero for security reasons */
480 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 538 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
481 if (err || __copy_from_user(&env, buf, sizeof(env))) 539 if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
485 return 0; 543 return 0;
486} 544}
487 545
488int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 546static int restore_i387_xsave(void __user *buf)
547{
548 struct _fpx_sw_bytes fx_sw_user;
549 struct _fpstate_ia32 __user *fx_user =
550 ((struct _fpstate_ia32 __user *) buf);
551 struct i387_fxsave_struct __user *fx =
552 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
553 struct xsave_hdr_struct *xsave_hdr =
554 &current->thread.xstate->xsave.xsave_hdr;
555 u64 mask;
556 int err;
557
558 if (check_for_xstate(fx, buf, &fx_sw_user))
559 goto fx_only;
560
561 mask = fx_sw_user.xstate_bv;
562
563 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
564
565 xsave_hdr->xstate_bv &= pcntxt_mask;
566 /*
567 * These bits must be zero.
568 */
569 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
570
571 /*
572 * Init the state that is not present in the memory layout
573 * and enabled by the OS.
574 */
575 mask = ~(pcntxt_mask & ~mask);
576 xsave_hdr->xstate_bv &= mask;
577
578 return err;
579fx_only:
580 /*
581 * Couldn't find the extended state information in the memory
582 * layout. Restore the FP/SSE and init the other extended state
583 * enabled by the OS.
584 */
585 xsave_hdr->xstate_bv = XSTATE_FPSSE;
586 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
587}
588
589int restore_i387_xstate_ia32(void __user *buf)
489{ 590{
490 int err; 591 int err;
491 struct task_struct *tsk = current; 592 struct task_struct *tsk = current;
593 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
492 594
493 if (HAVE_HWFP) 595 if (HAVE_HWFP)
494 clear_fpu(tsk); 596 clear_fpu(tsk);
495 597
598 if (!buf) {
599 if (used_math()) {
600 clear_fpu(tsk);
601 clear_used_math();
602 }
603
604 return 0;
605 } else
606 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
607 return -EACCES;
608
496 if (!used_math()) { 609 if (!used_math()) {
497 err = init_fpu(tsk); 610 err = init_fpu(tsk);
498 if (err) 611 if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
500 } 613 }
501 614
502 if (HAVE_HWFP) { 615 if (HAVE_HWFP) {
503 if (cpu_has_fxsr) 616 if (cpu_has_xsave)
504 err = restore_i387_fxsave(buf); 617 err = restore_i387_xsave(buf);
618 else if (cpu_has_fxsr)
619 err = restore_i387_fxsave(fp, sizeof(struct
620 i387_fxsave_struct));
505 else 621 else
506 err = restore_i387_fsave(buf); 622 err = restore_i387_fsave(fp);
507 } else { 623 } else {
508 err = fpregs_soft_set(current, NULL, 624 err = fpregs_soft_set(current, NULL,
509 0, sizeof(struct user_i387_ia32_struct), 625 0, sizeof(struct user_i387_ia32_struct),
510 NULL, buf) != 0; 626 NULL, fp) != 0;
511 } 627 }
512 set_used_math(); 628 set_used_math();
513 629
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec4..26ea3ea3fb30 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,6 +46,7 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
@@ -1490,7 +1491,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1491 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1492 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1493 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1494 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1495 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1496 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1497 ver = GET_APIC_VERSION(v);
@@ -1698,8 +1699,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1699 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1700 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1701 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1702 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1703
1704 /* 1704 /*
1705 * Add it to the IO-APIC irq-routing table: 1705 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1725,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1725 unsigned char old_id;
1726 unsigned long flags; 1726 unsigned long flags;
1727 1727
1728#ifdef CONFIG_X86_NUMAQ 1728 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1729 return;
1731#endif
1732 1730
1733 /* 1731 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1732 * Don't check I/O APIC IDs for xAPIC systems. They have
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18f..e63282e78864 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,6 +50,7 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882 1031
883 entry.delivery_mode = INT_DELIVERY_MODE; 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
884 entry.dest_mode = INT_DEST_MODE; 1033 cpu_mask_to_apicid(mask), trigger, polarity,
885 entry.dest = cpu_mask_to_apicid(mask); 1034 cfg->vector)) {
886 entry.mask = 0; /* enable IRQ */ 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
887 entry.trigger = trigger; 1036 mp_ioapics[apic].mp_apicid, pin);
888 entry.polarity = polarity; 1037 __clear_irq_vector(irq);
889 entry.vector = cfg->vector; 1038 return;
890 1039 }
891 /* Mask level triggered irqs.
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
893 */
894 if (trigger)
895 entry.mask = 1;
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -1090,6 +1237,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1090void __apicdebuginit print_local_APIC(void * dummy) 1237void __apicdebuginit print_local_APIC(void * dummy)
1091{ 1238{
1092 unsigned int v, ver, maxlvt; 1239 unsigned int v, ver, maxlvt;
1240 unsigned long icr;
1093 1241
1094 if (apic_verbosity == APIC_QUIET) 1242 if (apic_verbosity == APIC_QUIET)
1095 return; 1243 return;
@@ -1097,7 +1245,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1245 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1246 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1247 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1248 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1249 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1250 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1251 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1281,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1281 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1282 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1283
1136 v = apic_read(APIC_ICR); 1284 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1285 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1286 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1287
1141 v = apic_read(APIC_LVTT); 1288 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1289 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1291,7 +1438,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1438 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1439 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1440 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1441 entry.dest = read_apic_id();
1295 1442
1296 /* 1443 /*
1297 * Add it to the IO-APIC irq-routing table: 1444 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1544 */
1398 1545
1399#ifdef CONFIG_SMP 1546#ifdef CONFIG_SMP
1547
1548#ifdef CONFIG_INTR_REMAP
1549static void ir_irq_migration(struct work_struct *work);
1550
1551static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1552
1553/*
1554 * Migrate the IO-APIC irq in the presence of intr-remapping.
1555 *
1556 * For edge triggered, irq migration is a simple atomic update(of vector
1557 * and cpu destination) of IRTE and flush the hardware cache.
1558 *
1559 * For level triggered, we need to modify the io-apic RTE aswell with the update
1560 * vector information, along with modifying IRTE with vector and destination.
1561 * So irq migration for level triggered is little bit more complex compared to
1562 * edge triggered migration. But the good news is, we use the same algorithm
1563 * for level triggered migration as we have today, only difference being,
1564 * we now initiate the irq migration from process context instead of the
1565 * interrupt context.
1566 *
1567 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1568 * suppression) to the IO-APIC, level triggered irq migration will also be
1569 * as simple as edge triggered migration and we can do the irq migration
1570 * with a simple atomic update to IO-APIC RTE.
1571 */
1572static void migrate_ioapic_irq(int irq, cpumask_t mask)
1573{
1574 struct irq_cfg *cfg = irq_cfg + irq;
1575 struct irq_desc *desc = irq_desc + irq;
1576 cpumask_t tmp, cleanup_mask;
1577 struct irte irte;
1578 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1579 unsigned int dest;
1580 unsigned long flags;
1581
1582 cpus_and(tmp, mask, cpu_online_map);
1583 if (cpus_empty(tmp))
1584 return;
1585
1586 if (get_irte(irq, &irte))
1587 return;
1588
1589 if (assign_irq_vector(irq, mask))
1590 return;
1591
1592 cpus_and(tmp, cfg->domain, mask);
1593 dest = cpu_mask_to_apicid(tmp);
1594
1595 if (modify_ioapic_rte) {
1596 spin_lock_irqsave(&ioapic_lock, flags);
1597 __target_IO_APIC_irq(irq, dest, cfg->vector);
1598 spin_unlock_irqrestore(&ioapic_lock, flags);
1599 }
1600
1601 irte.vector = cfg->vector;
1602 irte.dest_id = IRTE_DEST(dest);
1603
1604 /*
1605 * Modified the IRTE and flushes the Interrupt entry cache.
1606 */
1607 modify_irte(irq, &irte);
1608
1609 if (cfg->move_in_progress) {
1610 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1611 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1612 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1613 cfg->move_in_progress = 0;
1614 }
1615
1616 irq_desc[irq].affinity = mask;
1617}
1618
1619static int migrate_irq_remapped_level(int irq)
1620{
1621 int ret = -1;
1622
1623 mask_IO_APIC_irq(irq);
1624
1625 if (io_apic_level_ack_pending(irq)) {
1626 /*
1627 * Interrupt in progress. Migrating irq now will change the
1628 * vector information in the IO-APIC RTE and that will confuse
1629 * the EOI broadcast performed by cpu.
1630 * So, delay the irq migration to the next instance.
1631 */
1632 schedule_delayed_work(&ir_migration_work, 1);
1633 goto unmask;
1634 }
1635
1636 /* everthing is clear. we have right of way */
1637 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1638
1639 ret = 0;
1640 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1641 cpus_clear(irq_desc[irq].pending_mask);
1642
1643unmask:
1644 unmask_IO_APIC_irq(irq);
1645 return ret;
1646}
1647
1648static void ir_irq_migration(struct work_struct *work)
1649{
1650 int irq;
1651
1652 for (irq = 0; irq < NR_IRQS; irq++) {
1653 struct irq_desc *desc = irq_desc + irq;
1654 if (desc->status & IRQ_MOVE_PENDING) {
1655 unsigned long flags;
1656
1657 spin_lock_irqsave(&desc->lock, flags);
1658 if (!desc->chip->set_affinity ||
1659 !(desc->status & IRQ_MOVE_PENDING)) {
1660 desc->status &= ~IRQ_MOVE_PENDING;
1661 spin_unlock_irqrestore(&desc->lock, flags);
1662 continue;
1663 }
1664
1665 desc->chip->set_affinity(irq,
1666 irq_desc[irq].pending_mask);
1667 spin_unlock_irqrestore(&desc->lock, flags);
1668 }
1669 }
1670}
1671
1672/*
1673 * Migrates the IRQ destination in the process context.
1674 */
1675static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1676{
1677 if (irq_desc[irq].status & IRQ_LEVEL) {
1678 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1679 irq_desc[irq].pending_mask = mask;
1680 migrate_irq_remapped_level(irq);
1681 return;
1682 }
1683
1684 migrate_ioapic_irq(irq, mask);
1685}
1686#endif
1687
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1688asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1689{
1402 unsigned vector, me; 1690 unsigned vector, me;
@@ -1453,6 +1741,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1741#else
1454static inline void irq_complete_move(unsigned int irq) {} 1742static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1743#endif
1744#ifdef CONFIG_INTR_REMAP
1745static void ack_x2apic_level(unsigned int irq)
1746{
1747 ack_x2APIC_irq();
1748}
1749
1750static void ack_x2apic_edge(unsigned int irq)
1751{
1752 ack_x2APIC_irq();
1753}
1754#endif
1456 1755
1457static void ack_apic_edge(unsigned int irq) 1756static void ack_apic_edge(unsigned int irq)
1458{ 1757{
@@ -1527,6 +1826,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1826 .retrigger = ioapic_retrigger_irq,
1528}; 1827};
1529 1828
1829#ifdef CONFIG_INTR_REMAP
1830static struct irq_chip ir_ioapic_chip __read_mostly = {
1831 .name = "IR-IO-APIC",
1832 .startup = startup_ioapic_irq,
1833 .mask = mask_IO_APIC_irq,
1834 .unmask = unmask_IO_APIC_irq,
1835 .ack = ack_x2apic_edge,
1836 .eoi = ack_x2apic_level,
1837#ifdef CONFIG_SMP
1838 .set_affinity = set_ir_ioapic_affinity_irq,
1839#endif
1840 .retrigger = ioapic_retrigger_irq,
1841};
1842#endif
1843
1530static inline void init_IO_APIC_traps(void) 1844static inline void init_IO_APIC_traps(void)
1531{ 1845{
1532 int irq; 1846 int irq;
@@ -1712,6 +2026,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2026 * 8259A.
1713 */ 2027 */
1714 if (pin1 == -1) { 2028 if (pin1 == -1) {
2029 if (intr_remapping_enabled)
2030 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2031 pin1 = pin2;
1716 apic1 = apic2; 2032 apic1 = apic2;
1717 no_pin1 = 1; 2033 no_pin1 = 1;
@@ -1738,6 +2054,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2054 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2055 goto out;
1740 } 2056 }
2057 if (intr_remapping_enabled)
2058 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2059 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2060 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2061 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1977,6 +2295,9 @@ void destroy_irq(unsigned int irq)
1977 2295
1978 dynamic_irq_cleanup(irq); 2296 dynamic_irq_cleanup(irq);
1979 2297
2298#ifdef CONFIG_INTR_REMAP
2299 free_irte(irq);
2300#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2301 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2302 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2303 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2316,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2316
1996 tmp = TARGET_CPUS; 2317 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2318 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2319 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2320 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2321
2322 cpus_and(tmp, cfg->domain, tmp);
2323 dest = cpu_mask_to_apicid(tmp);
2324
2325#ifdef CONFIG_INTR_REMAP
2326 if (irq_remapped(irq)) {
2327 struct irte irte;
2328 int ir_index;
2329 u16 sub_handle;
2330
2331 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2332 BUG_ON(ir_index == -1);
2333
2334 memset (&irte, 0, sizeof(irte));
2335
2336 irte.present = 1;
2337 irte.dst_mode = INT_DEST_MODE;
2338 irte.trigger_mode = 0; /* edge */
2339 irte.dlvry_mode = INT_DELIVERY_MODE;
2340 irte.vector = cfg->vector;
2341 irte.dest_id = IRTE_DEST(dest);
2342
2343 modify_irte(irq, &irte);
2001 2344
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2345 msg->address_hi = MSI_ADDR_BASE_HI;
2346 msg->data = sub_handle;
2347 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2348 MSI_ADDR_IR_SHV |
2349 MSI_ADDR_IR_INDEX1(ir_index) |
2350 MSI_ADDR_IR_INDEX2(ir_index);
2351 } else
2352#endif
2353 {
2354 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2355 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2356 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2357 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2401,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2401 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2402 irq_desc[irq].affinity = mask;
2051} 2403}
2404
2405#ifdef CONFIG_INTR_REMAP
2406/*
2407 * Migrate the MSI irq to another cpumask. This migration is
2408 * done in the process context using interrupt-remapping hardware.
2409 */
2410static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2411{
2412 struct irq_cfg *cfg = irq_cfg + irq;
2413 unsigned int dest;
2414 cpumask_t tmp, cleanup_mask;
2415 struct irte irte;
2416
2417 cpus_and(tmp, mask, cpu_online_map);
2418 if (cpus_empty(tmp))
2419 return;
2420
2421 if (get_irte(irq, &irte))
2422 return;
2423
2424 if (assign_irq_vector(irq, mask))
2425 return;
2426
2427 cpus_and(tmp, cfg->domain, mask);
2428 dest = cpu_mask_to_apicid(tmp);
2429
2430 irte.vector = cfg->vector;
2431 irte.dest_id = IRTE_DEST(dest);
2432
2433 /*
2434 * atomically update the IRTE with the new destination and vector.
2435 */
2436 modify_irte(irq, &irte);
2437
2438 /*
2439 * After this point, all the interrupts will start arriving
2440 * at the new destination. So, time to cleanup the previous
2441 * vector allocation.
2442 */
2443 if (cfg->move_in_progress) {
2444 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2445 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2446 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2447 cfg->move_in_progress = 0;
2448 }
2449
2450 irq_desc[irq].affinity = mask;
2451}
2452#endif
2052#endif /* CONFIG_SMP */ 2453#endif /* CONFIG_SMP */
2053 2454
2054/* 2455/*
@@ -2066,26 +2467,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2467 .retrigger = ioapic_retrigger_irq,
2067}; 2468};
2068 2469
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2470#ifdef CONFIG_INTR_REMAP
2471static struct irq_chip msi_ir_chip = {
2472 .name = "IR-PCI-MSI",
2473 .unmask = unmask_msi_irq,
2474 .mask = mask_msi_irq,
2475 .ack = ack_x2apic_edge,
2476#ifdef CONFIG_SMP
2477 .set_affinity = ir_set_msi_irq_affinity,
2478#endif
2479 .retrigger = ioapic_retrigger_irq,
2480};
2481
2482/*
2483 * Map the PCI dev to the corresponding remapping hardware unit
2484 * and allocate 'nvec' consecutive interrupt-remapping table entries
2485 * in it.
2486 */
2487static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2488{
2489 struct intel_iommu *iommu;
2490 int index;
2491
2492 iommu = map_dev_to_ir(dev);
2493 if (!iommu) {
2494 printk(KERN_ERR
2495 "Unable to map PCI %s to iommu\n", pci_name(dev));
2496 return -ENOENT;
2497 }
2498
2499 index = alloc_irte(iommu, irq, nvec);
2500 if (index < 0) {
2501 printk(KERN_ERR
2502 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2503 pci_name(dev));
2504 return -ENOSPC;
2505 }
2506 return index;
2507}
2508#endif
2509
2510static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2070{ 2511{
2512 int ret;
2071 struct msi_msg msg; 2513 struct msi_msg msg;
2514
2515 ret = msi_compose_msg(dev, irq, &msg);
2516 if (ret < 0)
2517 return ret;
2518
2519 set_irq_msi(irq, desc);
2520 write_msi_msg(irq, &msg);
2521
2522#ifdef CONFIG_INTR_REMAP
2523 if (irq_remapped(irq)) {
2524 struct irq_desc *desc = irq_desc + irq;
2525 /*
2526 * irq migration in process context
2527 */
2528 desc->status |= IRQ_MOVE_PCNTXT;
2529 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2530 } else
2531#endif
2532 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2533
2534 return 0;
2535}
2536
2537int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2538{
2072 int irq, ret; 2539 int irq, ret;
2540
2073 irq = create_irq(); 2541 irq = create_irq();
2074 if (irq < 0) 2542 if (irq < 0)
2075 return irq; 2543 return irq;
2076 2544
2077 ret = msi_compose_msg(dev, irq, &msg); 2545#ifdef CONFIG_INTR_REMAP
2546 if (!intr_remapping_enabled)
2547 goto no_ir;
2548
2549 ret = msi_alloc_irte(dev, irq, 1);
2550 if (ret < 0)
2551 goto error;
2552no_ir:
2553#endif
2554 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2555 if (ret < 0) {
2079 destroy_irq(irq); 2556 destroy_irq(irq);
2080 return ret; 2557 return ret;
2081 } 2558 }
2559 return 0;
2082 2560
2083 set_irq_msi(irq, desc); 2561#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2562error:
2563 destroy_irq(irq);
2564 return ret;
2565#endif
2566}
2085 2567
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2568int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2569{
2570 int irq, ret, sub_handle;
2571 struct msi_desc *desc;
2572#ifdef CONFIG_INTR_REMAP
2573 struct intel_iommu *iommu = 0;
2574 int index = 0;
2575#endif
2087 2576
2577 sub_handle = 0;
2578 list_for_each_entry(desc, &dev->msi_list, list) {
2579 irq = create_irq();
2580 if (irq < 0)
2581 return irq;
2582#ifdef CONFIG_INTR_REMAP
2583 if (!intr_remapping_enabled)
2584 goto no_ir;
2585
2586 if (!sub_handle) {
2587 /*
2588 * allocate the consecutive block of IRTE's
2589 * for 'nvec'
2590 */
2591 index = msi_alloc_irte(dev, irq, nvec);
2592 if (index < 0) {
2593 ret = index;
2594 goto error;
2595 }
2596 } else {
2597 iommu = map_dev_to_ir(dev);
2598 if (!iommu) {
2599 ret = -ENOENT;
2600 goto error;
2601 }
2602 /*
2603 * setup the mapping between the irq and the IRTE
2604 * base index, the sub_handle pointing to the
2605 * appropriate interrupt remap table entry.
2606 */
2607 set_irte_irq(irq, iommu, index, sub_handle);
2608 }
2609no_ir:
2610#endif
2611 ret = setup_msi_irq(dev, desc, irq);
2612 if (ret < 0)
2613 goto error;
2614 sub_handle++;
2615 }
2088 return 0; 2616 return 0;
2617
2618error:
2619 destroy_irq(irq);
2620 return ret;
2089} 2621}
2090 2622
2091void arch_teardown_msi_irq(unsigned int irq) 2623void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2865,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2865 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2866 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2867 irq_polarity(irq_entry));
2868#ifdef CONFIG_INTR_REMAP
2869 else if (intr_remapping_enabled)
2870 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2871#endif
2336 else 2872 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2873 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2874 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725cb..f98f4e1dba09 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f010..4caff39078e0 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17e61cb..4090cd6f8436 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
373 373
374struct pv_apic_ops pv_apic_ops = { 374struct pv_apic_ops pv_apic_ops = {
375#ifdef CONFIG_X86_LOCAL_APIC 375#ifdef CONFIG_X86_LOCAL_APIC
376 .apic_write = native_apic_write,
377 .apic_read = native_apic_read,
378 .setup_boot_clock = setup_boot_APIC_clock, 376 .setup_boot_clock = setup_boot_APIC_clock,
379 .setup_secondary_clock = setup_secondary_APIC_clock, 377 .setup_secondary_clock = setup_secondary_APIC_clock,
380 .startup_ipi_hook = paravirt_nop, 378 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d38..673f12cf6eb0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -739,6 +739,8 @@ void __init setup_arch(char **cmdline_p)
739 num_physpages = max_pfn; 739 num_physpages = max_pfn;
740 740
741 check_efer(); 741 check_efer();
742 if (cpu_has_x2apic)
743 check_x2apic();
742 744
743 /* How many end-of-memory variables you have, grandma! */ 745 /* How many end-of-memory variables you have, grandma! */
744 /* need this before calling reserve_initrd */ 746 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb519d2dc..6dd7e2b70a4b 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
3 char __user *pretcode; 3 char __user *pretcode;
4 int sig; 4 int sig;
5 struct sigcontext sc; 5 struct sigcontext sc;
6 struct _fpstate fpstate; 6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
7 unsigned long extramask[_NSIG_WORDS-1]; 15 unsigned long extramask[_NSIG_WORDS-1];
8 char retcode[8]; 16 char retcode[8];
17 /* fp state follows here */
9}; 18};
10 19
11struct rt_sigframe { 20struct rt_sigframe {
@@ -15,13 +24,14 @@ struct rt_sigframe {
15 void __user *puc; 24 void __user *puc;
16 struct siginfo info; 25 struct siginfo info;
17 struct ucontext uc; 26 struct ucontext uc;
18 struct _fpstate fpstate;
19 char retcode[8]; 27 char retcode[8];
28 /* fp state follows here */
20}; 29};
21#else 30#else
22struct rt_sigframe { 31struct rt_sigframe {
23 char __user *pretcode; 32 char __user *pretcode;
24 struct ucontext uc; 33 struct ucontext uc;
25 struct siginfo info; 34 struct siginfo info;
35 /* fp state follows here */
26}; 36};
27#endif 37#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcdd8933..0f98d69fbdb0 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -159,28 +159,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
159 } 159 }
160 160
161 { 161 {
162 struct _fpstate __user *buf; 162 void __user *buf;
163 163
164 err |= __get_user(buf, &sc->fpstate); 164 err |= __get_user(buf, &sc->fpstate);
165 if (buf) { 165 err |= restore_i387_xstate(buf);
166 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
167 goto badframe;
168 err |= restore_i387(buf);
169 } else {
170 struct task_struct *me = current;
171
172 if (used_math()) {
173 clear_fpu(me);
174 clear_used_math();
175 }
176 }
177 } 166 }
178 167
179 err |= __get_user(*pax, &sc->ax); 168 err |= __get_user(*pax, &sc->ax);
180 return err; 169 return err;
181
182badframe:
183 return 1;
184} 170}
185 171
186asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 172asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -262,7 +248,7 @@ badframe:
262 * Set up a signal frame. 248 * Set up a signal frame.
263 */ 249 */
264static int 250static int
265setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, 251setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
266 struct pt_regs *regs, unsigned long mask) 252 struct pt_regs *regs, unsigned long mask)
267{ 253{
268 int tmp, err = 0; 254 int tmp, err = 0;
@@ -289,7 +275,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
289 err |= __put_user(regs->sp, &sc->sp_at_signal); 275 err |= __put_user(regs->sp, &sc->sp_at_signal);
290 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 276 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
291 277
292 tmp = save_i387(fpstate); 278 tmp = save_i387_xstate(fpstate);
293 if (tmp < 0) 279 if (tmp < 0)
294 err = 1; 280 err = 1;
295 else 281 else
@@ -306,7 +292,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
306 * Determine which stack to use.. 292 * Determine which stack to use..
307 */ 293 */
308static inline void __user * 294static inline void __user *
309get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) 295get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
296 void **fpstate)
310{ 297{
311 unsigned long sp; 298 unsigned long sp;
312 299
@@ -332,6 +319,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
332 sp = (unsigned long) ka->sa.sa_restorer; 319 sp = (unsigned long) ka->sa.sa_restorer;
333 } 320 }
334 321
322 if (used_math()) {
323 sp = sp - sig_xstate_size;
324 *fpstate = (struct _fpstate *) sp;
325 }
326
335 sp -= frame_size; 327 sp -= frame_size;
336 /* 328 /*
337 * Align the stack pointer according to the i386 ABI, 329 * Align the stack pointer according to the i386 ABI,
@@ -350,8 +342,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
350 void __user *restorer; 342 void __user *restorer;
351 int err = 0; 343 int err = 0;
352 int usig; 344 int usig;
345 void __user *fpstate = NULL;
353 346
354 frame = get_sigframe(ka, regs, sizeof(*frame)); 347 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
355 348
356 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 349 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
357 goto give_sigsegv; 350 goto give_sigsegv;
@@ -366,7 +359,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
366 if (err) 359 if (err)
367 goto give_sigsegv; 360 goto give_sigsegv;
368 361
369 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); 362 err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
370 if (err) 363 if (err)
371 goto give_sigsegv; 364 goto give_sigsegv;
372 365
@@ -427,8 +420,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
427 void __user *restorer; 420 void __user *restorer;
428 int err = 0; 421 int err = 0;
429 int usig; 422 int usig;
423 void __user *fpstate = NULL;
430 424
431 frame = get_sigframe(ka, regs, sizeof(*frame)); 425 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
432 426
433 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 427 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
434 goto give_sigsegv; 428 goto give_sigsegv;
@@ -447,13 +441,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
447 goto give_sigsegv; 441 goto give_sigsegv;
448 442
449 /* Create the ucontext. */ 443 /* Create the ucontext. */
450 err |= __put_user(0, &frame->uc.uc_flags); 444 if (cpu_has_xsave)
445 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
446 else
447 err |= __put_user(0, &frame->uc.uc_flags);
451 err |= __put_user(0, &frame->uc.uc_link); 448 err |= __put_user(0, &frame->uc.uc_link);
452 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 449 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
453 err |= __put_user(sas_ss_flags(regs->sp), 450 err |= __put_user(sas_ss_flags(regs->sp),
454 &frame->uc.uc_stack.ss_flags); 451 &frame->uc.uc_stack.ss_flags);
455 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 452 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
456 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 453 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
457 regs, set->sig[0]); 454 regs, set->sig[0]);
458 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 455 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
459 if (err) 456 if (err)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5b742c..6c581698ab56 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -54,69 +54,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
54} 54}
55 55
56/* 56/*
57 * Signal frame handlers.
58 */
59
60static inline int save_i387(struct _fpstate __user *buf)
61{
62 struct task_struct *tsk = current;
63 int err = 0;
64
65 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
66 sizeof(tsk->thread.xstate->fxsave));
67
68 if ((unsigned long)buf % 16)
69 printk("save_i387: bad fpstate %p\n", buf);
70
71 if (!used_math())
72 return 0;
73 clear_used_math(); /* trigger finit */
74 if (task_thread_info(tsk)->status & TS_USEDFPU) {
75 err = save_i387_checking((struct i387_fxsave_struct __user *)
76 buf);
77 if (err)
78 return err;
79 task_thread_info(tsk)->status &= ~TS_USEDFPU;
80 stts();
81 } else {
82 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
83 sizeof(struct i387_fxsave_struct)))
84 return -1;
85 }
86 return 1;
87}
88
89/*
90 * This restores directly out of user space. Exceptions are handled.
91 */
92static inline int restore_i387(struct _fpstate __user *buf)
93{
94 struct task_struct *tsk = current;
95 int err;
96
97 if (!used_math()) {
98 err = init_fpu(tsk);
99 if (err)
100 return err;
101 }
102
103 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
104 clts();
105 task_thread_info(current)->status |= TS_USEDFPU;
106 }
107 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
108 if (unlikely(err)) {
109 /*
110 * Encountered an error while doing the restore from the
111 * user buffer, clear the fpu state.
112 */
113 clear_fpu(tsk);
114 clear_used_math();
115 }
116 return err;
117}
118
119/*
120 * Do a signal return; undo the signal stack. 57 * Do a signal return; undo the signal stack.
121 */ 58 */
122static int 59static int
@@ -160,25 +97,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
160 { 97 {
161 struct _fpstate __user * buf; 98 struct _fpstate __user * buf;
162 err |= __get_user(buf, &sc->fpstate); 99 err |= __get_user(buf, &sc->fpstate);
163 100 err |= restore_i387_xstate(buf);
164 if (buf) {
165 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
166 goto badframe;
167 err |= restore_i387(buf);
168 } else {
169 struct task_struct *me = current;
170 if (used_math()) {
171 clear_fpu(me);
172 clear_used_math();
173 }
174 }
175 } 101 }
176 102
177 err |= __get_user(*pax, &sc->ax); 103 err |= __get_user(*pax, &sc->ax);
178 return err; 104 return err;
179
180badframe:
181 return 1;
182} 105}
183 106
184asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 107asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +192,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
269 sp = current->sas_ss_sp + current->sas_ss_size; 192 sp = current->sas_ss_sp + current->sas_ss_size;
270 } 193 }
271 194
272 return (void __user *)round_down(sp - size, 16); 195 return (void __user *)round_down(sp - size, 64);
273} 196}
274 197
275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 198static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
276 sigset_t *set, struct pt_regs * regs) 199 sigset_t *set, struct pt_regs * regs)
277{ 200{
278 struct rt_sigframe __user *frame; 201 struct rt_sigframe __user *frame;
279 struct _fpstate __user *fp = NULL; 202 void __user *fp = NULL;
280 int err = 0; 203 int err = 0;
281 struct task_struct *me = current; 204 struct task_struct *me = current;
282 205
283 if (used_math()) { 206 if (used_math()) {
284 fp = get_stack(ka, regs, sizeof(struct _fpstate)); 207 fp = get_stack(ka, regs, sig_xstate_size);
285 frame = (void __user *)round_down( 208 frame = (void __user *)round_down(
286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; 209 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
287 210
288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) 211 if (save_i387_xstate(fp) < 0)
289 goto give_sigsegv;
290
291 if (save_i387(fp) < 0)
292 err |= -1; 212 err |= -1;
293 } else 213 } else
294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; 214 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +223,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
303 } 223 }
304 224
305 /* Create the ucontext. */ 225 /* Create the ucontext. */
306 err |= __put_user(0, &frame->uc.uc_flags); 226 if (cpu_has_xsave)
227 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
228 else
229 err |= __put_user(0, &frame->uc.uc_flags);
307 err |= __put_user(0, &frame->uc.uc_link); 230 err |= __put_user(0, &frame->uc.uc_link);
308 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 231 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
309 err |= __put_user(sas_ss_flags(regs->sp), 232 err |= __put_user(sas_ss_flags(regs->sp),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f916..b25097c599a5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
550 printk(KERN_CONT 551 printk(KERN_CONT
551 "a previous APIC delivery may have failed\n"); 552 "a previous APIC delivery may have failed\n");
552 553
553 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 554 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
554 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
555 555
556 timeout = 0; 556 timeout = 0;
557 do { 557 do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
583 int maxlvt; 583 int maxlvt;
584 584
585 /* Target chip */ 585 /* Target chip */
586 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
587
588 /* Boot on the stack */ 586 /* Boot on the stack */
589 /* Kick the second */ 587 /* Kick the second */
590 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 588 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
591 589
592 pr_debug("Waiting for send to finish...\n"); 590 pr_debug("Waiting for send to finish...\n");
593 send_status = safe_apic_wait_icr_idle(); 591 send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
640 /* 638 /*
641 * Turn INIT on target chip 639 * Turn INIT on target chip
642 */ 640 */
643 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
644
645 /* 641 /*
646 * Send IPI 642 * Send IPI
647 */ 643 */
648 apic_write(APIC_ICR, 644 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
649 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 645 phys_apicid);
650 646
651 pr_debug("Waiting for send to finish...\n"); 647 pr_debug("Waiting for send to finish...\n");
652 send_status = safe_apic_wait_icr_idle(); 648 send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
656 pr_debug("Deasserting INIT.\n"); 652 pr_debug("Deasserting INIT.\n");
657 653
658 /* Target chip */ 654 /* Target chip */
659 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
660
661 /* Send IPI */ 655 /* Send IPI */
662 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 656 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
663 657
664 pr_debug("Waiting for send to finish...\n"); 658 pr_debug("Waiting for send to finish...\n");
665 send_status = safe_apic_wait_icr_idle(); 659 send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
702 */ 696 */
703 697
704 /* Target chip */ 698 /* Target chip */
705 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
706
707 /* Boot on the stack */ 699 /* Boot on the stack */
708 /* Kick the second */ 700 /* Kick the second */
709 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 701 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
702 phys_apicid);
710 703
711 /* 704 /*
712 * Give the other CPU some time to accept the IPI. 705 * Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1175 * Setup boot CPU information 1168 * Setup boot CPU information
1176 */ 1169 */
1177 smp_store_cpu_info(0); /* Final full version of the data */ 1170 smp_store_cpu_info(0); /* Final full version of the data */
1171#ifdef CONFIG_X86_32
1178 boot_cpu_logical_apicid = logical_smp_processor_id(); 1172 boot_cpu_logical_apicid = logical_smp_processor_id();
1173#endif
1179 current_thread_info()->cpu = 0; /* needed? */ 1174 current_thread_info()->cpu = 0; /* needed? */
1180 set_cpu_sibling_map(0); 1175 set_cpu_sibling_map(0);
1181 1176
1177#ifdef CONFIG_X86_64
1178 enable_IR_x2apic();
1179 setup_apic_routing();
1180#endif
1181
1182 if (smp_sanity_check(max_cpus) < 0) { 1182 if (smp_sanity_check(max_cpus) < 0) {
1183 printk(KERN_INFO "SMP disabled\n"); 1183 printk(KERN_INFO "SMP disabled\n");
1184 disable_smp(); 1184 disable_smp();
@@ -1186,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1186 } 1186 }
1187 1187
1188 preempt_disable(); 1188 preempt_disable();
1189 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1189 if (read_apic_id() != boot_cpu_physical_apicid) {
1190 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1190 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1191 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1191 read_apic_id(), boot_cpu_physical_apicid);
1192 /* Or can we switch back to PIC here? */ 1192 /* Or can we switch back to PIC here? */
1193 } 1193 }
1194 preempt_enable(); 1194 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044ba..7b987852e876 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c17487..3d1be4f0fac5 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
8#define __NO_STUBS 8#define __NO_STUBS
9 9
10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
11#undef _ASM_X86_64_UNISTD_H_ 11#undef ASM_X86__UNISTD_64_H
12#include <asm/unistd_64.h> 12#include <asm/unistd_64.h>
13 13
14#undef __SYSCALL 14#undef __SYSCALL
15#define __SYSCALL(nr, sym) [nr] = sym, 15#define __SYSCALL(nr, sym) [nr] = sym,
16#undef _ASM_X86_64_UNISTD_H_ 16#undef ASM_X86__UNISTD_64_H
17 17
18typedef void (*sys_call_ptr_t)(void); 18typedef void (*sys_call_ptr_t)(void);
19 19
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a1..da5a5964fccb 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
1228 1228
1229 set_bit(SYSCALL_VECTOR, used_vectors); 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1230 1230
1231 init_thread_xstate();
1232 /* 1231 /*
1233 * Should be a barrier for any external CPU state: 1232 * Should be a barrier for any external CPU state:
1234 */ 1233 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caaca7115..38eb76156a47 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void)
1134 /* 1134 /*
1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1136 */ 1136 */
1137 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1137 if (unlikely(restore_fpu_checking(me))) {
1138 stts(); 1138 stts();
1139 force_sig(SIGSEGV, me); 1139 force_sig(SIGSEGV, me);
1140 return; 1140 return;
@@ -1173,10 +1173,6 @@ void __init trap_init(void)
1173 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1173 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1174#endif 1174#endif
1175 /* 1175 /*
1176 * initialize the per thread extended state:
1177 */
1178 init_thread_xstate();
1179 /*
1180 * Should be a barrier for any external CPU state: 1176 * Should be a barrier for any external CPU state:
1181 */ 1177 */
1182 cpu_init(); 1178 cpu_init();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db54..61531d5c9507 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 000000000000..07713d64debe
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6#include <linux/bootmem.h>
7#include <linux/compat.h>
8#include <asm/i387.h>
9#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h>
11#endif
12#include <asm/xcr.h>
13
14/*
15 * Supported feature mask by the CPU and the kernel.
16 */
17u64 pcntxt_mask;
18
19struct _fpx_sw_bytes fx_sw_reserved;
20#ifdef CONFIG_IA32_EMULATION
21struct _fpx_sw_bytes fx_sw_reserved_ia32;
22#endif
23
24/*
25 * Check for the presence of extended state information in the
26 * user fpstate pointer in the sigcontext.
27 */
28int check_for_xstate(struct i387_fxsave_struct __user *buf,
29 void __user *fpstate,
30 struct _fpx_sw_bytes *fx_sw_user)
31{
32 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
33 sizeof(struct xsave_hdr_struct);
34 unsigned int magic2;
35 int err;
36
37 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
38 sizeof(struct _fpx_sw_bytes));
39
40 if (err)
41 return err;
42
43 /*
44 * First Magic check failed.
45 */
46 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
47 return -1;
48
49 /*
50 * Check for error scenarios.
51 */
52 if (fx_sw_user->xstate_size < min_xstate_size ||
53 fx_sw_user->xstate_size > xstate_size ||
54 fx_sw_user->xstate_size > fx_sw_user->extended_size)
55 return -1;
56
57 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
58 fx_sw_user->extended_size -
59 FP_XSTATE_MAGIC2_SIZE));
60 /*
61 * Check for the presence of second magic word at the end of memory
62 * layout. This detects the case where the user just copied the legacy
63 * fpstate layout with out copying the extended state information
64 * in the memory layout.
65 */
66 if (err || magic2 != FP_XSTATE_MAGIC2)
67 return -1;
68
69 return 0;
70}
71
72#ifdef CONFIG_X86_64
73/*
74 * Signal frame handlers.
75 */
76
77int save_i387_xstate(void __user *buf)
78{
79 struct task_struct *tsk = current;
80 int err = 0;
81
82 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
83 return -EACCES;
84
85 BUG_ON(sig_xstate_size < xstate_size);
86
87 if ((unsigned long)buf % 64)
88 printk("save_i387_xstate: bad fpstate %p\n", buf);
89
90 if (!used_math())
91 return 0;
92 clear_used_math(); /* trigger finit */
93 if (task_thread_info(tsk)->status & TS_USEDFPU) {
94 /*
95 * Start with clearing the user buffer. This will present a
96 * clean context for the bytes not touched by the fxsave/xsave.
97 */
98 __clear_user(buf, sig_xstate_size);
99
100 if (task_thread_info(tsk)->status & TS_XSAVE)
101 err = xsave_user(buf);
102 else
103 err = fxsave_user(buf);
104
105 if (err)
106 return err;
107 task_thread_info(tsk)->status &= ~TS_USEDFPU;
108 stts();
109 } else {
110 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
111 xstate_size))
112 return -1;
113 }
114
115 if (task_thread_info(tsk)->status & TS_XSAVE) {
116 struct _fpstate __user *fx = buf;
117
118 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
119 sizeof(struct _fpx_sw_bytes));
120
121 err |= __put_user(FP_XSTATE_MAGIC2,
122 (__u32 __user *) (buf + sig_xstate_size
123 - FP_XSTATE_MAGIC2_SIZE));
124 }
125
126 return 1;
127}
128
129/*
130 * Restore the extended state if present. Otherwise, restore the FP/SSE
131 * state.
132 */
133int restore_user_xstate(void __user *buf)
134{
135 struct _fpx_sw_bytes fx_sw_user;
136 u64 mask;
137 int err;
138
139 if (((unsigned long)buf % 64) ||
140 check_for_xstate(buf, buf, &fx_sw_user))
141 goto fx_only;
142
143 mask = fx_sw_user.xstate_bv;
144
145 /*
146 * restore the state passed by the user.
147 */
148 err = xrestore_user(buf, mask);
149 if (err)
150 return err;
151
152 /*
153 * init the state skipped by the user.
154 */
155 mask = pcntxt_mask & ~mask;
156
157 xrstor_state(init_xstate_buf, mask);
158
159 return 0;
160
161fx_only:
162 /*
163 * couldn't find the extended state information in the
164 * memory layout. Restore just the FP/SSE and init all
165 * the other extended state.
166 */
167 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
168 return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
169}
170
171/*
172 * This restores directly out of user space. Exceptions are handled.
173 */
174int restore_i387_xstate(void __user *buf)
175{
176 struct task_struct *tsk = current;
177 int err = 0;
178
179 if (!buf) {
180 if (used_math())
181 goto clear;
182 return 0;
183 } else
184 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
185 return -EACCES;
186
187 if (!used_math()) {
188 err = init_fpu(tsk);
189 if (err)
190 return err;
191 }
192
193 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
194 clts();
195 task_thread_info(current)->status |= TS_USEDFPU;
196 }
197 if (task_thread_info(tsk)->status & TS_XSAVE)
198 err = restore_user_xstate(buf);
199 else
200 err = fxrstor_checking((__force struct i387_fxsave_struct *)
201 buf);
202 if (unlikely(err)) {
203 /*
204 * Encountered an error while doing the restore from the
205 * user buffer, clear the fpu state.
206 */
207clear:
208 clear_fpu(tsk);
209 clear_used_math();
210 }
211 return err;
212}
213#endif
214
215/*
216 * Prepare the SW reserved portion of the fxsave memory layout, indicating
217 * the presence of the extended state information in the memory layout
218 * pointed by the fpstate pointer in the sigcontext.
219 * This will be saved when ever the FP and extended state context is
220 * saved on the user stack during the signal handler delivery to the user.
221 */
222void prepare_fx_sw_frame(void)
223{
224 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
225 FP_XSTATE_MAGIC2_SIZE;
226
227 sig_xstate_size = sizeof(struct _fpstate) + size_extended;
228
229#ifdef CONFIG_IA32_EMULATION
230 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
231#endif
232
233 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
234
235 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
236 fx_sw_reserved.extended_size = sig_xstate_size;
237 fx_sw_reserved.xstate_bv = pcntxt_mask;
238 fx_sw_reserved.xstate_size = xstate_size;
239#ifdef CONFIG_IA32_EMULATION
240 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
241 sizeof(struct _fpx_sw_bytes));
242 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
243#endif
244}
245
246/*
247 * Represents init state for the supported extended state.
248 */
249struct xsave_struct *init_xstate_buf;
250
251#ifdef CONFIG_X86_64
252unsigned int sig_xstate_size = sizeof(struct _fpstate);
253#endif
254
255/*
256 * Enable the extended processor state save/restore feature
257 */
258void __cpuinit xsave_init(void)
259{
260 if (!cpu_has_xsave)
261 return;
262
263 set_in_cr4(X86_CR4_OSXSAVE);
264
265 /*
266 * Enable all the features that the HW is capable of
267 * and the Linux kernel is aware of.
268 */
269 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
270}
271
272/*
273 * setup the xstate image representing the init state
274 */
275void setup_xstate_init(void)
276{
277 init_xstate_buf = alloc_bootmem(xstate_size);
278 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
279}
280
281/*
282 * Enable and initialize the xsave feature.
283 */
284void __init xsave_cntxt_init(void)
285{
286 unsigned int eax, ebx, ecx, edx;
287
288 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
289 pcntxt_mask = eax + ((u64)edx << 32);
290
291 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
292 printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
293 pcntxt_mask);
294 BUG();
295 }
296
297 /*
298 * for now OS knows only about FP/SSE
299 */
300 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
301 xsave_init();
302
303 /*
304 * Recompute the context size for enabled features
305 */
306 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
307 xstate_size = ebx;
308
309 prepare_fx_sw_frame();
310
311 setup_xstate_init();
312
313 printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
314 "cntxt size 0x%x\n",
315 pcntxt_mask, xstate_size);
316}