aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile9
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/e820_32.c4
-rw-r--r--arch/x86/kernel/e820_64.c4
-rw-r--r--arch/x86/kernel/efi.c18
-rw-r--r--arch/x86/kernel/efi_64.c12
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c17
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/i387.c114
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/nmi_32.c3
-rw-r--r--arch/x86/kernel/nmi_64.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c3
-rw-r--r--arch/x86/kernel/pci-dma.c (renamed from arch/x86/kernel/pci-dma_64.c)546
-rw-r--r--arch/x86/kernel/pci-dma_32.c177
-rw-r--r--arch/x86/kernel/pci-gart_64.c15
-rw-r--r--arch/x86/kernel/pci-nommu.c (renamed from arch/x86/kernel/pci-nommu_64.c)34
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c9
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c74
-rw-r--r--arch/x86/kernel/setup64.c4
-rw-r--r--arch/x86/kernel/setup_32.c4
-rw-r--r--arch/x86/kernel/setup_64.c9
-rw-r--r--arch/x86/kernel/smpboot.c29
-rw-r--r--arch/x86/kernel/traps_32.c35
-rw-r--r--arch/x86/kernel/traps_64.c36
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c23
34 files changed, 801 insertions, 520 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c3920ea8ac56..90e092d0af0c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,13 +22,14 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
25obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o 25obj-y += bootflag.o e820_$(BITS).o
26obj-y += quirks.o i8237.o topology.o kdebugfs.o 26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o 27obj-y += alternative.o i8253.o pci-nommu.o
28obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o 28obj-$(CONFIG_X86_64) += bugs_64.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 29obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 30
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o
32obj-y += i387.o 33obj-y += i387.o
33obj-y += ptrace.o 34obj-y += ptrace.o
34obj-y += ds.o 35obj-y += ds.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c6dc05af8827..c2502eb9aa83 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/cstate.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for SMP C-states on Intel CPUs 4 * - Added _PDC for SMP C-states on Intel CPUs
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 324eb0cab19c..de2d2e4ebad9 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/processor.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for platforms with Intel CPUs 4 * - Added _PDC for platforms with Intel CPUs
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 14791ec55cfd..199e4e05e5dc 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void)
289 if (c->x86_vendor != X86_VENDOR_INTEL) 289 if (c->x86_vendor != X86_VENDOR_INTEL)
290 return -ENODEV; 290 return -ENODEV;
291 291
292 if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || 292 if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
293 !test_bit(X86_FEATURE_ACC, c->x86_capability)) 293 !test_cpu_cap(c, X86_FEATURE_ACC))
294 return -ENODEV; 294 return -ENODEV;
295 295
296 ret = cpufreq_register_driver(&p4clockmod_driver); 296 ret = cpufreq_register_driver(&p4clockmod_driver);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 9b7e01daa1ca..1f4cc48c14c6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
3 * 2 *
4 * Thermal throttle event support code (such as syslog messaging and rate 3 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 4 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 0240cd778365..ed733e7cf4e6 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
475/* 475/*
476 * Find the highest page frame number we have available 476 * Find the highest page frame number we have available
477 */ 477 */
478void __init find_max_pfn(void) 478void __init propagate_e820_map(void)
479{ 479{
480 int i; 480 int i;
481 481
@@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg)
704 * size before original memory map is 704 * size before original memory map is
705 * reset. 705 * reset.
706 */ 706 */
707 find_max_pfn(); 707 propagate_e820_map();
708 saved_max_pfn = max_pfn; 708 saved_max_pfn = max_pfn;
709#endif 709#endif
710 e820.nr_map = 0; 710 e820.nr_map = 0;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 7f6c0c85c8f6..cbd42e51cb08 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void)
96} 96}
97 97
98/* Check for already reserved areas */ 98/* Check for already reserved areas */
99static inline int 99static inline int __init
100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) 100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
101{ 101{
102 int i; 102 int i;
@@ -116,7 +116,7 @@ again:
116} 116}
117 117
118/* Check for already reserved areas */ 118/* Check for already reserved areas */
119static inline int 119static inline int __init
120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) 120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
121{ 121{
122 int i; 122 int i;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 759e02bec070..77d424cf68b3 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void)
383{ 383{
384 efi_memory_desc_t *md; 384 efi_memory_desc_t *md;
385 void *p; 385 void *p;
386 u64 addr, npages;
386 387
387 /* Make EFI runtime service code area executable */ 388 /* Make EFI runtime service code area executable */
388 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 389 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void)
391 if (md->type != EFI_RUNTIME_SERVICES_CODE) 392 if (md->type != EFI_RUNTIME_SERVICES_CODE)
392 continue; 393 continue;
393 394
394 set_memory_x(md->virt_addr, md->num_pages); 395 addr = md->virt_addr;
396 npages = md->num_pages;
397 memrange_efi_to_native(&addr, &npages);
398 set_memory_x(addr, npages);
395 } 399 }
396} 400}
397 401
@@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void)
408 efi_memory_desc_t *md; 412 efi_memory_desc_t *md;
409 efi_status_t status; 413 efi_status_t status;
410 unsigned long size; 414 unsigned long size;
411 u64 end, systab; 415 u64 end, systab, addr, npages;
412 void *p, *va; 416 void *p, *va;
413 417
414 efi.systab = NULL; 418 efi.systab = NULL;
@@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void)
420 size = md->num_pages << EFI_PAGE_SHIFT; 424 size = md->num_pages << EFI_PAGE_SHIFT;
421 end = md->phys_addr + size; 425 end = md->phys_addr + size;
422 426
423 if ((end >> PAGE_SHIFT) <= max_pfn_mapped) 427 if (PFN_UP(end) <= max_pfn_mapped)
424 va = __va(md->phys_addr); 428 va = __va(md->phys_addr);
425 else 429 else
426 va = efi_ioremap(md->phys_addr, size); 430 va = efi_ioremap(md->phys_addr, size);
@@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void)
433 continue; 437 continue;
434 } 438 }
435 439
436 if (!(md->attribute & EFI_MEMORY_WB)) 440 if (!(md->attribute & EFI_MEMORY_WB)) {
437 set_memory_uc(md->virt_addr, md->num_pages); 441 addr = md->virt_addr;
442 npages = md->num_pages;
443 memrange_efi_to_native(&addr, &npages);
444 set_memory_uc(addr, npages);
445 }
438 446
439 systab = (u64) (unsigned long) efi_phys.systab; 447 systab = (u64) (unsigned long) efi_phys.systab;
440 if (md->phys_addr <= systab && systab < end) { 448 if (md->phys_addr <= systab && systab < end) {
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d143a1e76b30..d0060fdcccac 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void)
105 105
106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) 106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
107{ 107{
108 static unsigned pages_mapped; 108 static unsigned pages_mapped __initdata;
109 unsigned i, pages; 109 unsigned i, pages;
110 unsigned long offset;
110 111
111 /* phys_addr and size must be page aligned */ 112 pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
112 if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) 113 offset = phys_addr & ~PAGE_MASK;
113 return NULL; 114 phys_addr &= PAGE_MASK;
114 115
115 pages = size >> PAGE_SHIFT;
116 if (pages_mapped + pages > MAX_EFI_IO_PAGES) 116 if (pages_mapped + pages > MAX_EFI_IO_PAGES)
117 return NULL; 117 return NULL;
118 118
@@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
124 } 124 }
125 125
126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ 126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
127 (pages_mapped - pages)); 127 (pages_mapped - pages)) + offset;
128} 128}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ba49a26dff8..f0f8934fc303 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/entry.S
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 4 */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 5d77c9cd8e15..ebf13908a743 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
61 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 61 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
64 (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); 64 APIC_DM_INIT;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 mdelay(10);
67
68 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
69 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
70 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
71 APIC_DM_STARTUP;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 72 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 return 0; 73 return 0;
67} 74}
68 75
69static void uv_send_IPI_one(int cpu, int vector) 76static void uv_send_IPI_one(int cpu, int vector)
70{ 77{
71 unsigned long val, apicid; 78 unsigned long val, apicid, lapicid;
72 int nasid; 79 int nasid;
73 80
74 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 81 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
82 lapicid = apicid & 0x3f; /* ZZZ macro needed */
75 nasid = uv_apicid_to_nasid(apicid); 83 nasid = uv_apicid_to_nasid(apicid);
76 val = 84 val =
77 (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << 85 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
78 UVH_IPI_INT_APIC_ID_SHFT) | 86 UVH_IPI_INT_APIC_ID_SHFT) |
79 (vector << UVH_IPI_INT_VECTOR_SHFT); 87 (vector << UVH_IPI_INT_VECTOR_SHFT);
80 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 88 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
81 printk(KERN_DEBUG
82 "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
83 cpu, apicid, vector, nasid, val);
84} 89}
85 90
86static void uv_send_IPI_mask(cpumask_t mask, int vector) 91static void uv_send_IPI_mask(cpumask_t mask, int vector)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d6d54faa84df..993c76773256 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
146 146
147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
148 148
149#ifdef CONFIG_BLK_DEV_INITRD
149 /* Reserve INITRD */ 150 /* Reserve INITRD */
150 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 151 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
151 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 152 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
153 unsigned long ramdisk_end = ramdisk_image + ramdisk_size; 154 unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
154 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 155 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
155 } 156 }
157#endif
156 158
157 reserve_ebda_region(); 159 reserve_ebda_region();
158 160
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 826988a6e964..90f038af3adc 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code.
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 4 *
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 8f8102d967b3..db6839b53195 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -35,17 +35,18 @@
35#endif 35#endif
36 36
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size;
39static struct i387_fxsave_struct fx_scratch __cpuinitdata;
38 40
39void mxcsr_feature_mask_init(void) 41void __cpuinit mxcsr_feature_mask_init(void)
40{ 42{
41 unsigned long mask = 0; 43 unsigned long mask = 0;
42 44
43 clts(); 45 clts();
44 if (cpu_has_fxsr) { 46 if (cpu_has_fxsr) {
45 memset(&current->thread.i387.fxsave, 0, 47 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
46 sizeof(struct i387_fxsave_struct)); 48 asm volatile("fxsave %0" : : "m" (fx_scratch));
47 asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 49 mask = fx_scratch.mxcsr_mask;
48 mask = current->thread.i387.fxsave.mxcsr_mask;
49 if (mask == 0) 50 if (mask == 0)
50 mask = 0x0000ffbf; 51 mask = 0x0000ffbf;
51 } 52 }
@@ -53,6 +54,16 @@ void mxcsr_feature_mask_init(void)
53 stts(); 54 stts();
54} 55}
55 56
57void __init init_thread_xstate(void)
58{
59 if (cpu_has_fxsr)
60 xstate_size = sizeof(struct i387_fxsave_struct);
61#ifdef CONFIG_X86_32
62 else
63 xstate_size = sizeof(struct i387_fsave_struct);
64#endif
65}
66
56#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
57/* 68/*
58 * Called at bootup to set up the initial FPU state that is later cloned 69 * Called at bootup to set up the initial FPU state that is later cloned
@@ -61,10 +72,6 @@ void mxcsr_feature_mask_init(void)
61void __cpuinit fpu_init(void) 72void __cpuinit fpu_init(void)
62{ 73{
63 unsigned long oldcr0 = read_cr0(); 74 unsigned long oldcr0 = read_cr0();
64 extern void __bad_fxsave_alignment(void);
65
66 if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
67 __bad_fxsave_alignment();
68 75
69 set_in_cr4(X86_CR4_OSFXSR); 76 set_in_cr4(X86_CR4_OSFXSR);
70 set_in_cr4(X86_CR4_OSXMMEXCPT); 77 set_in_cr4(X86_CR4_OSXMMEXCPT);
@@ -84,32 +91,44 @@ void __cpuinit fpu_init(void)
84 * value at reset if we support XMM instructions and then 91 * value at reset if we support XMM instructions and then
85 * remeber the current task has used the FPU. 92 * remeber the current task has used the FPU.
86 */ 93 */
87void init_fpu(struct task_struct *tsk) 94int init_fpu(struct task_struct *tsk)
88{ 95{
89 if (tsk_used_math(tsk)) { 96 if (tsk_used_math(tsk)) {
90 if (tsk == current) 97 if (tsk == current)
91 unlazy_fpu(tsk); 98 unlazy_fpu(tsk);
92 return; 99 return 0;
100 }
101
102 /*
103 * Memory allocation at the first usage of the FPU and other state.
104 */
105 if (!tsk->thread.xstate) {
106 tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
107 GFP_KERNEL);
108 if (!tsk->thread.xstate)
109 return -ENOMEM;
93 } 110 }
94 111
95 if (cpu_has_fxsr) { 112 if (cpu_has_fxsr) {
96 memset(&tsk->thread.i387.fxsave, 0, 113 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
97 sizeof(struct i387_fxsave_struct)); 114
98 tsk->thread.i387.fxsave.cwd = 0x37f; 115 memset(fx, 0, xstate_size);
116 fx->cwd = 0x37f;
99 if (cpu_has_xmm) 117 if (cpu_has_xmm)
100 tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; 118 fx->mxcsr = MXCSR_DEFAULT;
101 } else { 119 } else {
102 memset(&tsk->thread.i387.fsave, 0, 120 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
103 sizeof(struct i387_fsave_struct)); 121 memset(fp, 0, xstate_size);
104 tsk->thread.i387.fsave.cwd = 0xffff037fu; 122 fp->cwd = 0xffff037fu;
105 tsk->thread.i387.fsave.swd = 0xffff0000u; 123 fp->swd = 0xffff0000u;
106 tsk->thread.i387.fsave.twd = 0xffffffffu; 124 fp->twd = 0xffffffffu;
107 tsk->thread.i387.fsave.fos = 0xffff0000u; 125 fp->fos = 0xffff0000u;
108 } 126 }
109 /* 127 /*
110 * Only the device not available exception or ptrace can call init_fpu. 128 * Only the device not available exception or ptrace can call init_fpu.
111 */ 129 */
112 set_stopped_child_used_math(tsk); 130 set_stopped_child_used_math(tsk);
131 return 0;
113} 132}
114 133
115int fpregs_active(struct task_struct *target, const struct user_regset *regset) 134int fpregs_active(struct task_struct *target, const struct user_regset *regset)
@@ -126,13 +145,17 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
126 unsigned int pos, unsigned int count, 145 unsigned int pos, unsigned int count,
127 void *kbuf, void __user *ubuf) 146 void *kbuf, void __user *ubuf)
128{ 147{
148 int ret;
149
129 if (!cpu_has_fxsr) 150 if (!cpu_has_fxsr)
130 return -ENODEV; 151 return -ENODEV;
131 152
132 init_fpu(target); 153 ret = init_fpu(target);
154 if (ret)
155 return ret;
133 156
134 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 157 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
135 &target->thread.i387.fxsave, 0, -1); 158 &target->thread.xstate->fxsave, 0, -1);
136} 159}
137 160
138int xfpregs_set(struct task_struct *target, const struct user_regset *regset, 161int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -144,16 +167,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
144 if (!cpu_has_fxsr) 167 if (!cpu_has_fxsr)
145 return -ENODEV; 168 return -ENODEV;
146 169
147 init_fpu(target); 170 ret = init_fpu(target);
171 if (ret)
172 return ret;
173
148 set_stopped_child_used_math(target); 174 set_stopped_child_used_math(target);
149 175
150 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 176 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
151 &target->thread.i387.fxsave, 0, -1); 177 &target->thread.xstate->fxsave, 0, -1);
152 178
153 /* 179 /*
154 * mxcsr reserved bits must be masked to zero for security reasons. 180 * mxcsr reserved bits must be masked to zero for security reasons.
155 */ 181 */
156 target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 182 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
157 183
158 return ret; 184 return ret;
159} 185}
@@ -233,7 +259,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
233static void 259static void
234convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 260convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
235{ 261{
236 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 262 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
237 struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; 263 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
238 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; 264 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
239 int i; 265 int i;
@@ -273,7 +299,7 @@ static void convert_to_fxsr(struct task_struct *tsk,
273 const struct user_i387_ia32_struct *env) 299 const struct user_i387_ia32_struct *env)
274 300
275{ 301{
276 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 302 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
277 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 303 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 304 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 int i; 305 int i;
@@ -302,15 +328,19 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
302 void *kbuf, void __user *ubuf) 328 void *kbuf, void __user *ubuf)
303{ 329{
304 struct user_i387_ia32_struct env; 330 struct user_i387_ia32_struct env;
331 int ret;
305 332
306 if (!HAVE_HWFP) 333 if (!HAVE_HWFP)
307 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
308 335
309 init_fpu(target); 336 ret = init_fpu(target);
337 if (ret)
338 return ret;
310 339
311 if (!cpu_has_fxsr) { 340 if (!cpu_has_fxsr) {
312 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
313 &target->thread.i387.fsave, 0, -1); 342 &target->thread.xstate->fsave, 0,
343 -1);
314 } 344 }
315 345
316 if (kbuf && pos == 0 && count == sizeof(env)) { 346 if (kbuf && pos == 0 && count == sizeof(env)) {
@@ -333,12 +363,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
333 if (!HAVE_HWFP) 363 if (!HAVE_HWFP)
334 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
335 365
336 init_fpu(target); 366 ret = init_fpu(target);
367 if (ret)
368 return ret;
369
337 set_stopped_child_used_math(target); 370 set_stopped_child_used_math(target);
338 371
339 if (!cpu_has_fxsr) { 372 if (!cpu_has_fxsr) {
340 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 373 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
341 &target->thread.i387.fsave, 0, -1); 374 &target->thread.xstate->fsave, 0, -1);
342 } 375 }
343 376
344 if (pos > 0 || count < sizeof(env)) 377 if (pos > 0 || count < sizeof(env))
@@ -358,11 +391,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
358static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) 391static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
359{ 392{
360 struct task_struct *tsk = current; 393 struct task_struct *tsk = current;
394 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
361 395
362 unlazy_fpu(tsk); 396 unlazy_fpu(tsk);
363 tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; 397 fp->status = fp->swd;
364 if (__copy_to_user(buf, &tsk->thread.i387.fsave, 398 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
365 sizeof(struct i387_fsave_struct)))
366 return -1; 399 return -1;
367 return 1; 400 return 1;
368} 401}
@@ -370,6 +403,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
370static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) 403static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
371{ 404{
372 struct task_struct *tsk = current; 405 struct task_struct *tsk = current;
406 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
373 struct user_i387_ia32_struct env; 407 struct user_i387_ia32_struct env;
374 int err = 0; 408 int err = 0;
375 409
@@ -379,12 +413,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
379 if (__copy_to_user(buf, &env, sizeof(env))) 413 if (__copy_to_user(buf, &env, sizeof(env)))
380 return -1; 414 return -1;
381 415
382 err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); 416 err |= __put_user(fx->swd, &buf->status);
383 err |= __put_user(X86_FXSR_MAGIC, &buf->magic); 417 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
384 if (err) 418 if (err)
385 return -1; 419 return -1;
386 420
387 if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, 421 if (__copy_to_user(&buf->_fxsr_env[0], fx,
388 sizeof(struct i387_fxsave_struct))) 422 sizeof(struct i387_fxsave_struct)))
389 return -1; 423 return -1;
390 return 1; 424 return 1;
@@ -417,7 +451,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
417 struct task_struct *tsk = current; 451 struct task_struct *tsk = current;
418 452
419 clear_fpu(tsk); 453 clear_fpu(tsk);
420 return __copy_from_user(&tsk->thread.i387.fsave, buf, 454 return __copy_from_user(&tsk->thread.xstate->fsave, buf,
421 sizeof(struct i387_fsave_struct)); 455 sizeof(struct i387_fsave_struct));
422} 456}
423 457
@@ -428,10 +462,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
428 int err; 462 int err;
429 463
430 clear_fpu(tsk); 464 clear_fpu(tsk);
431 err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], 465 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
432 sizeof(struct i387_fxsave_struct)); 466 sizeof(struct i387_fxsave_struct));
433 /* mxcsr reserved bits must be masked to zero for security reasons */ 467 /* mxcsr reserved bits must be masked to zero for security reasons */
434 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 468 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
435 if (err || __copy_from_user(&env, buf, sizeof(env))) 469 if (err || __copy_from_user(&env, buf, sizeof(env)))
436 return 1; 470 return 1;
437 convert_to_fxsr(tsk, &env); 471 convert_to_fxsr(tsk, &env);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 24362ecf5f9a..f47f0eb886b8 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,11 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#ifdef CONFIG_X86_32 49#include <mach_ipi.h>
50# include <mach_ipi.h>
51#else
52# include <asm/mach_apic.h>
53#endif
54 50
55/* 51/*
56 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 8421d0ac6f22..11b14bbaa61e 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
321 321
322extern void die_nmi(struct pt_regs *, const char *msg); 322extern void die_nmi(struct pt_regs *, const char *msg);
323 323
324__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 324notrace __kprobes int
325nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
325{ 326{
326 327
327 /* 328 /*
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 11f9130ac513..5a29ded994fa 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -313,7 +313,8 @@ void touch_nmi_watchdog(void)
313} 313}
314EXPORT_SYMBOL(touch_nmi_watchdog); 314EXPORT_SYMBOL(touch_nmi_watchdog);
315 315
316int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 316notrace __kprobes int
317nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
317{ 318{
318 int sum; 319 int sum;
319 int touched = 0; 320 int touched = 0;
@@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
384 385
385static unsigned ignore_nmis; 386static unsigned ignore_nmis;
386 387
387asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) 388asmlinkage notrace __kprobes void
389do_nmi(struct pt_regs *regs, long error_code)
388{ 390{
389 nmi_enter(); 391 nmi_enter();
390 add_pda(__nmi_count,1); 392 add_pda(__nmi_count,1);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1b5464c2434f..adb91e4b62da 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -470,10 +470,11 @@ error:
470 return 0; 470 return 0;
471} 471}
472 472
473static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, 473static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
474 size_t size, int direction) 474 size_t size, int direction)
475{ 475{
476 dma_addr_t dma_handle = bad_dma_address; 476 dma_addr_t dma_handle = bad_dma_address;
477 void *vaddr = phys_to_virt(paddr);
477 unsigned long uaddr; 478 unsigned long uaddr;
478 unsigned int npages; 479 unsigned int npages;
479 struct iommu_table *tbl = find_iommu_table(dev); 480 struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma.c
index ada5a0604992..388b113a7d88 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,61 +1,370 @@
1/* 1#include <linux/dma-mapping.h>
2 * Dynamic DMA mapping support.
3 */
4
5#include <linux/types.h>
6#include <linux/mm.h>
7#include <linux/string.h>
8#include <linux/pci.h>
9#include <linux/module.h>
10#include <linux/dmar.h> 2#include <linux/dmar.h>
11#include <asm/io.h> 3#include <linux/bootmem.h>
4#include <linux/pci.h>
5
6#include <asm/proto.h>
7#include <asm/dma.h>
12#include <asm/gart.h> 8#include <asm/gart.h>
13#include <asm/calgary.h> 9#include <asm/calgary.h>
14 10
15int iommu_merge __read_mostly = 0; 11int forbid_dac __read_mostly;
16 12EXPORT_SYMBOL(forbid_dac);
17dma_addr_t bad_dma_address __read_mostly;
18EXPORT_SYMBOL(bad_dma_address);
19 13
20/* This tells the BIO block layer to assume merging. Default to off 14const struct dma_mapping_ops *dma_ops;
21 because we cannot guarantee merging later. */ 15EXPORT_SYMBOL(dma_ops);
22int iommu_bio_merge __read_mostly = 0;
23EXPORT_SYMBOL(iommu_bio_merge);
24 16
25static int iommu_sac_force __read_mostly = 0; 17int iommu_sac_force __read_mostly = 0;
26 18
27int no_iommu __read_mostly;
28#ifdef CONFIG_IOMMU_DEBUG 19#ifdef CONFIG_IOMMU_DEBUG
29int panic_on_overflow __read_mostly = 1; 20int panic_on_overflow __read_mostly = 1;
30int force_iommu __read_mostly = 1; 21int force_iommu __read_mostly = 1;
31#else 22#else
32int panic_on_overflow __read_mostly = 0; 23int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 24int force_iommu __read_mostly = 0;
34#endif 25#endif
35 26
27int iommu_merge __read_mostly = 0;
28
29int no_iommu __read_mostly;
36/* Set this to 1 if there is a HW IOMMU in the system */ 30/* Set this to 1 if there is a HW IOMMU in the system */
37int iommu_detected __read_mostly = 0; 31int iommu_detected __read_mostly = 0;
38 32
33/* This tells the BIO block layer to assume merging. Default to off
34 because we cannot guarantee merging later. */
35int iommu_bio_merge __read_mostly = 0;
36EXPORT_SYMBOL(iommu_bio_merge);
37
38dma_addr_t bad_dma_address __read_mostly = 0;
39EXPORT_SYMBOL(bad_dma_address);
40
39/* Dummy device used for NULL arguments (normally ISA). Better would 41/* Dummy device used for NULL arguments (normally ISA). Better would
40 be probably a smaller DMA mask, but this is bug-to-bug compatible 42 be probably a smaller DMA mask, but this is bug-to-bug compatible
41 to i386. */ 43 to older i386. */
42struct device fallback_dev = { 44struct device fallback_dev = {
43 .bus_id = "fallback device", 45 .bus_id = "fallback device",
44 .coherent_dma_mask = DMA_32BIT_MASK, 46 .coherent_dma_mask = DMA_32BIT_MASK,
45 .dma_mask = &fallback_dev.coherent_dma_mask, 47 .dma_mask = &fallback_dev.coherent_dma_mask,
46}; 48};
47 49
50int dma_set_mask(struct device *dev, u64 mask)
51{
52 if (!dev->dma_mask || !dma_supported(dev, mask))
53 return -EIO;
54
55 *dev->dma_mask = mask;
56
57 return 0;
58}
59EXPORT_SYMBOL(dma_set_mask);
60
61#ifdef CONFIG_X86_64
62static __initdata void *dma32_bootmem_ptr;
63static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
64
65static int __init parse_dma32_size_opt(char *p)
66{
67 if (!p)
68 return -EINVAL;
69 dma32_bootmem_size = memparse(p, &p);
70 return 0;
71}
72early_param("dma32_size", parse_dma32_size_opt);
73
74void __init dma32_reserve_bootmem(void)
75{
76 unsigned long size, align;
77 if (end_pfn <= MAX_DMA32_PFN)
78 return;
79
80 align = 64ULL<<20;
81 size = round_up(dma32_bootmem_size, align);
82 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
83 __pa(MAX_DMA_ADDRESS));
84 if (dma32_bootmem_ptr)
85 dma32_bootmem_size = size;
86 else
87 dma32_bootmem_size = 0;
88}
89static void __init dma32_free_bootmem(void)
90{
91 int node;
92
93 if (end_pfn <= MAX_DMA32_PFN)
94 return;
95
96 if (!dma32_bootmem_ptr)
97 return;
98
99 for_each_online_node(node)
100 free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
101 dma32_bootmem_size);
102
103 dma32_bootmem_ptr = NULL;
104 dma32_bootmem_size = 0;
105}
106
107void __init pci_iommu_alloc(void)
108{
109 /* free the range so iommu could get some range less than 4G */
110 dma32_free_bootmem();
111 /*
112 * The order of these functions is important for
113 * fall-back/fail-over reasons
114 */
115#ifdef CONFIG_GART_IOMMU
116 gart_iommu_hole_init();
117#endif
118
119#ifdef CONFIG_CALGARY_IOMMU
120 detect_calgary();
121#endif
122
123 detect_intel_iommu();
124
125#ifdef CONFIG_SWIOTLB
126 pci_swiotlb_init();
127#endif
128}
129#endif
130
131/*
132 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
133 * documentation.
134 */
135static __init int iommu_setup(char *p)
136{
137 iommu_merge = 1;
138
139 if (!p)
140 return -EINVAL;
141
142 while (*p) {
143 if (!strncmp(p, "off", 3))
144 no_iommu = 1;
145 /* gart_parse_options has more force support */
146 if (!strncmp(p, "force", 5))
147 force_iommu = 1;
148 if (!strncmp(p, "noforce", 7)) {
149 iommu_merge = 0;
150 force_iommu = 0;
151 }
152
153 if (!strncmp(p, "biomerge", 8)) {
154 iommu_bio_merge = 4096;
155 iommu_merge = 1;
156 force_iommu = 1;
157 }
158 if (!strncmp(p, "panic", 5))
159 panic_on_overflow = 1;
160 if (!strncmp(p, "nopanic", 7))
161 panic_on_overflow = 0;
162 if (!strncmp(p, "merge", 5)) {
163 iommu_merge = 1;
164 force_iommu = 1;
165 }
166 if (!strncmp(p, "nomerge", 7))
167 iommu_merge = 0;
168 if (!strncmp(p, "forcesac", 8))
169 iommu_sac_force = 1;
170 if (!strncmp(p, "allowdac", 8))
171 forbid_dac = 0;
172 if (!strncmp(p, "nodac", 5))
173 forbid_dac = -1;
174 if (!strncmp(p, "usedac", 6)) {
175 forbid_dac = -1;
176 return 1;
177 }
178#ifdef CONFIG_SWIOTLB
179 if (!strncmp(p, "soft", 4))
180 swiotlb = 1;
181#endif
182
183#ifdef CONFIG_GART_IOMMU
184 gart_parse_options(p);
185#endif
186
187#ifdef CONFIG_CALGARY_IOMMU
188 if (!strncmp(p, "calgary", 7))
189 use_calgary = 1;
190#endif /* CONFIG_CALGARY_IOMMU */
191
192 p += strcspn(p, ",");
193 if (*p == ',')
194 ++p;
195 }
196 return 0;
197}
198early_param("iommu", iommu_setup);
199
200#ifdef CONFIG_X86_32
201int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
202 dma_addr_t device_addr, size_t size, int flags)
203{
204 void __iomem *mem_base = NULL;
205 int pages = size >> PAGE_SHIFT;
206 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
207
208 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
209 goto out;
210 if (!size)
211 goto out;
212 if (dev->dma_mem)
213 goto out;
214
215 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
216
217 mem_base = ioremap(bus_addr, size);
218 if (!mem_base)
219 goto out;
220
221 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
222 if (!dev->dma_mem)
223 goto out;
224 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
225 if (!dev->dma_mem->bitmap)
226 goto free1_out;
227
228 dev->dma_mem->virt_base = mem_base;
229 dev->dma_mem->device_base = device_addr;
230 dev->dma_mem->size = pages;
231 dev->dma_mem->flags = flags;
232
233 if (flags & DMA_MEMORY_MAP)
234 return DMA_MEMORY_MAP;
235
236 return DMA_MEMORY_IO;
237
238 free1_out:
239 kfree(dev->dma_mem);
240 out:
241 if (mem_base)
242 iounmap(mem_base);
243 return 0;
244}
245EXPORT_SYMBOL(dma_declare_coherent_memory);
246
247void dma_release_declared_memory(struct device *dev)
248{
249 struct dma_coherent_mem *mem = dev->dma_mem;
250
251 if (!mem)
252 return;
253 dev->dma_mem = NULL;
254 iounmap(mem->virt_base);
255 kfree(mem->bitmap);
256 kfree(mem);
257}
258EXPORT_SYMBOL(dma_release_declared_memory);
259
260void *dma_mark_declared_memory_occupied(struct device *dev,
261 dma_addr_t device_addr, size_t size)
262{
263 struct dma_coherent_mem *mem = dev->dma_mem;
264 int pos, err;
265 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
266
267 pages >>= PAGE_SHIFT;
268
269 if (!mem)
270 return ERR_PTR(-EINVAL);
271
272 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
273 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
274 if (err != 0)
275 return ERR_PTR(err);
276 return mem->virt_base + (pos << PAGE_SHIFT);
277}
278EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
279
280static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
281 dma_addr_t *dma_handle, void **ret)
282{
283 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
284 int order = get_order(size);
285
286 if (mem) {
287 int page = bitmap_find_free_region(mem->bitmap, mem->size,
288 order);
289 if (page >= 0) {
290 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
291 *ret = mem->virt_base + (page << PAGE_SHIFT);
292 memset(*ret, 0, size);
293 }
294 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
295 *ret = NULL;
296 }
297 return (mem != NULL);
298}
299
300static int dma_release_coherent(struct device *dev, int order, void *vaddr)
301{
302 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
303
304 if (mem && vaddr >= mem->virt_base && vaddr <
305 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
306 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
307
308 bitmap_release_region(mem->bitmap, page, order);
309 return 1;
310 }
311 return 0;
312}
313#else
314#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
315#define dma_release_coherent(dev, order, vaddr) (0)
316#endif /* CONFIG_X86_32 */
317
318int dma_supported(struct device *dev, u64 mask)
319{
320#ifdef CONFIG_PCI
321 if (mask > 0xffffffff && forbid_dac > 0) {
322 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
323 dev->bus_id);
324 return 0;
325 }
326#endif
327
328 if (dma_ops->dma_supported)
329 return dma_ops->dma_supported(dev, mask);
330
331 /* Copied from i386. Doesn't make much sense, because it will
332 only work for pci_alloc_coherent.
333 The caller just has to use GFP_DMA in this case. */
334 if (mask < DMA_24BIT_MASK)
335 return 0;
336
337 /* Tell the device to use SAC when IOMMU force is on. This
338 allows the driver to use cheaper accesses in some cases.
339
340 Problem with this is that if we overflow the IOMMU area and
341 return DAC as fallback address the device may not handle it
342 correctly.
343
344 As a special case some controllers have a 39bit address
345 mode that is as efficient as 32bit (aic79xx). Don't force
346 SAC for these. Assume all masks <= 40 bits are of this
347 type. Normally this doesn't make any difference, but gives
348 more gentle handling of IOMMU overflow. */
349 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
350 printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
351 dev->bus_id, mask);
352 return 0;
353 }
354
355 return 1;
356}
357EXPORT_SYMBOL(dma_supported);
358
48/* Allocate DMA memory on node near device */ 359/* Allocate DMA memory on node near device */
49noinline static void * 360noinline struct page *
50dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) 361dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
51{ 362{
52 struct page *page;
53 int node; 363 int node;
54 364
55 node = dev_to_node(dev); 365 node = dev_to_node(dev);
56 366
57 page = alloc_pages_node(node, gfp, order); 367 return alloc_pages_node(node, gfp, order);
58 return page ? page_address(page) : NULL;
59} 368}
60 369
61/* 370/*
@@ -65,9 +374,16 @@ void *
65dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 374dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
66 gfp_t gfp) 375 gfp_t gfp)
67{ 376{
68 void *memory; 377 void *memory = NULL;
378 struct page *page;
69 unsigned long dma_mask = 0; 379 unsigned long dma_mask = 0;
70 u64 bus; 380 dma_addr_t bus;
381
382 /* ignore region specifiers */
383 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
384
385 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
386 return memory;
71 387
72 if (!dev) 388 if (!dev)
73 dev = &fallback_dev; 389 dev = &fallback_dev;
@@ -82,26 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
82 /* Don't invoke OOM killer */ 398 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 399 gfp |= __GFP_NORETRY;
84 400
85 /* Kludge to make it bug-to-bug compatible with i386. i386 401#ifdef CONFIG_X86_64
86 uses the normal dma_mask for alloc_coherent. */
87 dma_mask &= *dev->dma_mask;
88
89 /* Why <=? Even when the mask is smaller than 4GB it is often 402 /* Why <=? Even when the mask is smaller than 4GB it is often
90 larger than 16MB and in this case we have a chance of 403 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 404 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 405 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= DMA_32BIT_MASK) 406 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 407 gfp |= GFP_DMA32;
408#endif
95 409
96 again: 410 again:
97 memory = dma_alloc_pages(dev, gfp, get_order(size)); 411 page = dma_alloc_pages(dev, gfp, get_order(size));
98 if (memory == NULL) 412 if (page == NULL)
99 return NULL; 413 return NULL;
100 414
101 { 415 {
102 int high, mmu; 416 int high, mmu;
103 bus = virt_to_bus(memory); 417 bus = page_to_phys(page);
104 high = (bus + size) >= dma_mask; 418 memory = page_address(page);
419 high = (bus + size) >= dma_mask;
105 mmu = high; 420 mmu = high;
106 if (force_iommu && !(gfp & GFP_DMA)) 421 if (force_iommu && !(gfp & GFP_DMA))
107 mmu = 1; 422 mmu = 1;
@@ -127,7 +442,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
127 442
128 memset(memory, 0, size); 443 memset(memory, 0, size);
129 if (!mmu) { 444 if (!mmu) {
130 *dma_handle = virt_to_bus(memory); 445 *dma_handle = bus;
131 return memory; 446 return memory;
132 } 447 }
133 } 448 }
@@ -139,7 +454,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
139 } 454 }
140 455
141 if (dma_ops->map_simple) { 456 if (dma_ops->map_simple) {
142 *dma_handle = dma_ops->map_simple(dev, memory, 457 *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
143 size, 458 size,
144 PCI_DMA_BIDIRECTIONAL); 459 PCI_DMA_BIDIRECTIONAL);
145 if (*dma_handle != bad_dma_address) 460 if (*dma_handle != bad_dma_address)
@@ -147,7 +462,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
147 } 462 }
148 463
149 if (panic_on_overflow) 464 if (panic_on_overflow)
150 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); 465 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
466 (unsigned long)size);
151 free_pages((unsigned long)memory, get_order(size)); 467 free_pages((unsigned long)memory, get_order(size));
152 return NULL; 468 return NULL;
153} 469}
@@ -160,153 +476,16 @@ EXPORT_SYMBOL(dma_alloc_coherent);
160void dma_free_coherent(struct device *dev, size_t size, 476void dma_free_coherent(struct device *dev, size_t size,
161 void *vaddr, dma_addr_t bus) 477 void *vaddr, dma_addr_t bus)
162{ 478{
479 int order = get_order(size);
163 WARN_ON(irqs_disabled()); /* for portability */ 480 WARN_ON(irqs_disabled()); /* for portability */
481 if (dma_release_coherent(dev, order, vaddr))
482 return;
164 if (dma_ops->unmap_single) 483 if (dma_ops->unmap_single)
165 dma_ops->unmap_single(dev, bus, size, 0); 484 dma_ops->unmap_single(dev, bus, size, 0);
166 free_pages((unsigned long)vaddr, get_order(size)); 485 free_pages((unsigned long)vaddr, order);
167} 486}
168EXPORT_SYMBOL(dma_free_coherent); 487EXPORT_SYMBOL(dma_free_coherent);
169 488
170static int forbid_dac __read_mostly;
171
172int dma_supported(struct device *dev, u64 mask)
173{
174#ifdef CONFIG_PCI
175 if (mask > 0xffffffff && forbid_dac > 0) {
176
177
178
179 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id);
180 return 0;
181 }
182#endif
183
184 if (dma_ops->dma_supported)
185 return dma_ops->dma_supported(dev, mask);
186
187 /* Copied from i386. Doesn't make much sense, because it will
188 only work for pci_alloc_coherent.
189 The caller just has to use GFP_DMA in this case. */
190 if (mask < DMA_24BIT_MASK)
191 return 0;
192
193 /* Tell the device to use SAC when IOMMU force is on. This
194 allows the driver to use cheaper accesses in some cases.
195
196 Problem with this is that if we overflow the IOMMU area and
197 return DAC as fallback address the device may not handle it
198 correctly.
199
200 As a special case some controllers have a 39bit address
201 mode that is as efficient as 32bit (aic79xx). Don't force
202 SAC for these. Assume all masks <= 40 bits are of this
203 type. Normally this doesn't make any difference, but gives
204 more gentle handling of IOMMU overflow. */
205 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
206 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
207 return 0;
208 }
209
210 return 1;
211}
212EXPORT_SYMBOL(dma_supported);
213
214int dma_set_mask(struct device *dev, u64 mask)
215{
216 if (!dev->dma_mask || !dma_supported(dev, mask))
217 return -EIO;
218 *dev->dma_mask = mask;
219 return 0;
220}
221EXPORT_SYMBOL(dma_set_mask);
222
223/*
224 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
225 * documentation.
226 */
227static __init int iommu_setup(char *p)
228{
229 iommu_merge = 1;
230
231 if (!p)
232 return -EINVAL;
233
234 while (*p) {
235 if (!strncmp(p, "off", 3))
236 no_iommu = 1;
237 /* gart_parse_options has more force support */
238 if (!strncmp(p, "force", 5))
239 force_iommu = 1;
240 if (!strncmp(p, "noforce", 7)) {
241 iommu_merge = 0;
242 force_iommu = 0;
243 }
244
245 if (!strncmp(p, "biomerge", 8)) {
246 iommu_bio_merge = 4096;
247 iommu_merge = 1;
248 force_iommu = 1;
249 }
250 if (!strncmp(p, "panic", 5))
251 panic_on_overflow = 1;
252 if (!strncmp(p, "nopanic", 7))
253 panic_on_overflow = 0;
254 if (!strncmp(p, "merge", 5)) {
255 iommu_merge = 1;
256 force_iommu = 1;
257 }
258 if (!strncmp(p, "nomerge", 7))
259 iommu_merge = 0;
260 if (!strncmp(p, "forcesac", 8))
261 iommu_sac_force = 1;
262 if (!strncmp(p, "allowdac", 8))
263 forbid_dac = 0;
264 if (!strncmp(p, "nodac", 5))
265 forbid_dac = -1;
266
267#ifdef CONFIG_SWIOTLB
268 if (!strncmp(p, "soft", 4))
269 swiotlb = 1;
270#endif
271
272#ifdef CONFIG_GART_IOMMU
273 gart_parse_options(p);
274#endif
275
276#ifdef CONFIG_CALGARY_IOMMU
277 if (!strncmp(p, "calgary", 7))
278 use_calgary = 1;
279#endif /* CONFIG_CALGARY_IOMMU */
280
281 p += strcspn(p, ",");
282 if (*p == ',')
283 ++p;
284 }
285 return 0;
286}
287early_param("iommu", iommu_setup);
288
289void __init pci_iommu_alloc(void)
290{
291 /*
292 * The order of these functions is important for
293 * fall-back/fail-over reasons
294 */
295#ifdef CONFIG_GART_IOMMU
296 gart_iommu_hole_init();
297#endif
298
299#ifdef CONFIG_CALGARY_IOMMU
300 detect_calgary();
301#endif
302
303 detect_intel_iommu();
304
305#ifdef CONFIG_SWIOTLB
306 pci_swiotlb_init();
307#endif
308}
309
310static int __init pci_iommu_init(void) 489static int __init pci_iommu_init(void)
311{ 490{
312#ifdef CONFIG_CALGARY_IOMMU 491#ifdef CONFIG_CALGARY_IOMMU
@@ -327,6 +506,8 @@ void pci_iommu_shutdown(void)
327{ 506{
328 gart_iommu_shutdown(); 507 gart_iommu_shutdown();
329} 508}
509/* Must execute after PCI subsystem */
510fs_initcall(pci_iommu_init);
330 511
331#ifdef CONFIG_PCI 512#ifdef CONFIG_PCI
332/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 513/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
@@ -334,11 +515,10 @@ void pci_iommu_shutdown(void)
334static __devinit void via_no_dac(struct pci_dev *dev) 515static __devinit void via_no_dac(struct pci_dev *dev)
335{ 516{
336 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 517 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
337 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); 518 printk(KERN_INFO "PCI: VIA PCI bridge detected."
519 "Disabling DAC.\n");
338 forbid_dac = 1; 520 forbid_dac = 1;
339 } 521 }
340} 522}
341DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); 523DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
342#endif 524#endif
343/* Must execute after PCI subsystem */
344fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c
deleted file mode 100644
index 51330321a5d3..000000000000
--- a/arch/x86/kernel/pci-dma_32.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * On i386 there is no hardware dynamic DMA address translation,
5 * so consistent alloc/free are merely page allocation/freeing.
6 * The rest of the dynamic DMA mapping interface is implemented
7 * in asm/pci.h.
8 */
9
10#include <linux/types.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/pci.h>
14#include <linux/module.h>
15#include <asm/io.h>
16
17struct dma_coherent_mem {
18 void *virt_base;
19 u32 device_base;
20 int size;
21 int flags;
22 unsigned long *bitmap;
23};
24
25void *dma_alloc_coherent(struct device *dev, size_t size,
26 dma_addr_t *dma_handle, gfp_t gfp)
27{
28 void *ret;
29 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
30 int order = get_order(size);
31 /* ignore region specifiers */
32 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
33
34 if (mem) {
35 int page = bitmap_find_free_region(mem->bitmap, mem->size,
36 order);
37 if (page >= 0) {
38 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
39 ret = mem->virt_base + (page << PAGE_SHIFT);
40 memset(ret, 0, size);
41 return ret;
42 }
43 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
44 return NULL;
45 }
46
47 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
48 gfp |= GFP_DMA;
49
50 ret = (void *)__get_free_pages(gfp, order);
51
52 if (ret != NULL) {
53 memset(ret, 0, size);
54 *dma_handle = virt_to_phys(ret);
55 }
56 return ret;
57}
58EXPORT_SYMBOL(dma_alloc_coherent);
59
60void dma_free_coherent(struct device *dev, size_t size,
61 void *vaddr, dma_addr_t dma_handle)
62{
63 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
64 int order = get_order(size);
65
66 WARN_ON(irqs_disabled()); /* for portability */
67 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
68 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
69
70 bitmap_release_region(mem->bitmap, page, order);
71 } else
72 free_pages((unsigned long)vaddr, order);
73}
74EXPORT_SYMBOL(dma_free_coherent);
75
76int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
77 dma_addr_t device_addr, size_t size, int flags)
78{
79 void __iomem *mem_base = NULL;
80 int pages = size >> PAGE_SHIFT;
81 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
82
83 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
84 goto out;
85 if (!size)
86 goto out;
87 if (dev->dma_mem)
88 goto out;
89
90 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
91
92 mem_base = ioremap(bus_addr, size);
93 if (!mem_base)
94 goto out;
95
96 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
97 if (!dev->dma_mem)
98 goto out;
99 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
100 if (!dev->dma_mem->bitmap)
101 goto free1_out;
102
103 dev->dma_mem->virt_base = mem_base;
104 dev->dma_mem->device_base = device_addr;
105 dev->dma_mem->size = pages;
106 dev->dma_mem->flags = flags;
107
108 if (flags & DMA_MEMORY_MAP)
109 return DMA_MEMORY_MAP;
110
111 return DMA_MEMORY_IO;
112
113 free1_out:
114 kfree(dev->dma_mem);
115 out:
116 if (mem_base)
117 iounmap(mem_base);
118 return 0;
119}
120EXPORT_SYMBOL(dma_declare_coherent_memory);
121
122void dma_release_declared_memory(struct device *dev)
123{
124 struct dma_coherent_mem *mem = dev->dma_mem;
125
126 if(!mem)
127 return;
128 dev->dma_mem = NULL;
129 iounmap(mem->virt_base);
130 kfree(mem->bitmap);
131 kfree(mem);
132}
133EXPORT_SYMBOL(dma_release_declared_memory);
134
135void *dma_mark_declared_memory_occupied(struct device *dev,
136 dma_addr_t device_addr, size_t size)
137{
138 struct dma_coherent_mem *mem = dev->dma_mem;
139 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
140 int pos, err;
141
142 if (!mem)
143 return ERR_PTR(-EINVAL);
144
145 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
146 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
147 if (err != 0)
148 return ERR_PTR(err);
149 return mem->virt_base + (pos << PAGE_SHIFT);
150}
151EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
152
153#ifdef CONFIG_PCI
154/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
155
156int forbid_dac;
157EXPORT_SYMBOL(forbid_dac);
158
159static __devinit void via_no_dac(struct pci_dev *dev)
160{
161 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
162 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
163 forbid_dac = 1;
164 }
165}
166DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
167
168static int check_iommu(char *s)
169{
170 if (!strcmp(s, "usedac")) {
171 forbid_dac = -1;
172 return 1;
173 }
174 return 0;
175}
176__setup("iommu=", check_iommu);
177#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 700e4647dd30..c07455d1695f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
264} 264}
265 265
266static dma_addr_t 266static dma_addr_t
267gart_map_simple(struct device *dev, char *buf, size_t size, int dir) 267gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
268{ 268{
269 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 269 dma_addr_t map = dma_map_area(dev, paddr, size, dir);
270 270
271 flush_gart(); 271 flush_gart();
272 272
@@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
275 275
276/* Map a single area into the IOMMU */ 276/* Map a single area into the IOMMU */
277static dma_addr_t 277static dma_addr_t
278gart_map_single(struct device *dev, void *addr, size_t size, int dir) 278gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
279{ 279{
280 unsigned long phys_mem, bus; 280 unsigned long bus;
281 281
282 if (!dev) 282 if (!dev)
283 dev = &fallback_dev; 283 dev = &fallback_dev;
284 284
285 phys_mem = virt_to_phys(addr); 285 if (!need_iommu(dev, paddr, size))
286 if (!need_iommu(dev, phys_mem, size)) 286 return paddr;
287 return phys_mem;
288 287
289 bus = gart_map_simple(dev, addr, size, dir); 288 bus = gart_map_simple(dev, paddr, size, dir);
290 289
291 return bus; 290 return bus;
292} 291}
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu.c
index ab08e1832228..aec43d56f49c 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
14static int 14static int
15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
16{ 16{
17 if (hwdev && bus + size > *hwdev->dma_mask) { 17 if (hwdev && bus + size > *hwdev->dma_mask) {
18 if (*hwdev->dma_mask >= DMA_32BIT_MASK) 18 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
19 printk(KERN_ERR 19 printk(KERN_ERR
20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", 20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -26,19 +26,17 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
26} 26}
27 27
28static dma_addr_t 28static dma_addr_t
29nommu_map_single(struct device *hwdev, void *ptr, size_t size, 29nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
30 int direction) 30 int direction)
31{ 31{
32 dma_addr_t bus = virt_to_bus(ptr); 32 dma_addr_t bus = paddr;
33 WARN_ON(size == 0);
33 if (!check_addr("map_single", hwdev, bus, size)) 34 if (!check_addr("map_single", hwdev, bus, size))
34 return bad_dma_address; 35 return bad_dma_address;
36 flush_write_buffers();
35 return bus; 37 return bus;
36} 38}
37 39
38static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
39 int direction)
40{
41}
42 40
43/* Map a set of buffers described by scatterlist in streaming 41/* Map a set of buffers described by scatterlist in streaming
44 * mode for DMA. This is the scatter-gather version of the 42 * mode for DMA. This is the scatter-gather version of the
@@ -61,30 +59,34 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
61 struct scatterlist *s; 59 struct scatterlist *s;
62 int i; 60 int i;
63 61
62 WARN_ON(nents == 0 || sg[0].length == 0);
63
64 for_each_sg(sg, s, nents, i) { 64 for_each_sg(sg, s, nents, i) {
65 BUG_ON(!sg_page(s)); 65 BUG_ON(!sg_page(s));
66 s->dma_address = virt_to_bus(sg_virt(s)); 66 s->dma_address = sg_phys(s);
67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) 67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
68 return 0; 68 return 0;
69 s->dma_length = s->length; 69 s->dma_length = s->length;
70 } 70 }
71 flush_write_buffers();
71 return nents; 72 return nents;
72} 73}
73 74
74/* Unmap a set of streaming mode DMA translations. 75/* Make sure we keep the same behaviour */
75 * Again, cpu read rules concerning calls here are the same as for 76static int nommu_mapping_error(dma_addr_t dma_addr)
76 * pci_unmap_single() above.
77 */
78static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79 int nents, int dir)
80{ 77{
78#ifdef CONFIG_X86_32
79 return 0;
80#else
81 return (dma_addr == bad_dma_address);
82#endif
81} 83}
82 84
85
83const struct dma_mapping_ops nommu_dma_ops = { 86const struct dma_mapping_ops nommu_dma_ops = {
84 .map_single = nommu_map_single, 87 .map_single = nommu_map_single,
85 .unmap_single = nommu_unmap_single,
86 .map_sg = nommu_map_sg, 88 .map_sg = nommu_map_sg,
87 .unmap_sg = nommu_unmap_sg, 89 .mapping_error = nommu_mapping_error,
88 .is_phys = 1, 90 .is_phys = 1,
89}; 91};
90 92
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82a0a674a003..490da7f4b8d0 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -11,11 +11,18 @@
11 11
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13 13
14static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction)
17{
18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
19}
20
14const struct dma_mapping_ops swiotlb_dma_ops = { 21const struct dma_mapping_ops swiotlb_dma_ops = {
15 .mapping_error = swiotlb_dma_mapping_error, 22 .mapping_error = swiotlb_dma_mapping_error,
16 .alloc_coherent = swiotlb_alloc_coherent, 23 .alloc_coherent = swiotlb_alloc_coherent,
17 .free_coherent = swiotlb_free_coherent, 24 .free_coherent = swiotlb_free_coherent,
18 .map_single = swiotlb_map_single, 25 .map_single = swiotlb_map_single_phys,
19 .unmap_single = swiotlb_unmap_single, 26 .unmap_single = swiotlb_unmap_single,
20 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 27 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
21 .sync_single_for_device = swiotlb_sync_single_for_device, 28 .sync_single_for_device = swiotlb_sync_single_for_device,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
new file mode 100644
index 000000000000..3004d716539d
--- /dev/null
+++ b/arch/x86/kernel/process.c
@@ -0,0 +1,44 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
5#include <linux/slab.h>
6#include <linux/sched.h>
7
8struct kmem_cache *task_xstate_cachep;
9
10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
11{
12 *dst = *src;
13 if (src->thread.xstate) {
14 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
15 GFP_KERNEL);
16 if (!dst->thread.xstate)
17 return -ENOMEM;
18 WARN_ON((unsigned long)dst->thread.xstate & 15);
19 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
20 }
21 return 0;
22}
23
24void free_thread_xstate(struct task_struct *tsk)
25{
26 if (tsk->thread.xstate) {
27 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
28 tsk->thread.xstate = NULL;
29 }
30}
31
32void free_thread_info(struct thread_info *ti)
33{
34 free_thread_xstate(ti->task);
35 free_pages((unsigned long)ti, get_order(THREAD_SIZE));
36}
37
38void arch_task_cache_init(void)
39{
40 task_xstate_cachep =
41 kmem_cache_create("task_xstate", xstate_size,
42 __alignof__(union thread_xstate),
43 SLAB_PANIC, NULL);
44}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3903a8f2eb97..7adad088e373 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -36,6 +36,7 @@
36#include <linux/personality.h> 36#include <linux/personality.h>
37#include <linux/tick.h> 37#include <linux/tick.h>
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/i387.h> 47#include <asm/i387.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/vm86.h>
49#ifdef CONFIG_MATH_EMULATION 49#ifdef CONFIG_MATH_EMULATION
50#include <asm/math_emu.h> 50#include <asm/math_emu.h>
51#endif 51#endif
@@ -521,14 +521,18 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
521 regs->cs = __USER_CS; 521 regs->cs = __USER_CS;
522 regs->ip = new_ip; 522 regs->ip = new_ip;
523 regs->sp = new_sp; 523 regs->sp = new_sp;
524 /*
525 * Free the old FP and other extended state
526 */
527 free_thread_xstate(current);
524} 528}
525EXPORT_SYMBOL_GPL(start_thread); 529EXPORT_SYMBOL_GPL(start_thread);
526 530
527#ifdef CONFIG_SECCOMP
528static void hard_disable_TSC(void) 531static void hard_disable_TSC(void)
529{ 532{
530 write_cr4(read_cr4() | X86_CR4_TSD); 533 write_cr4(read_cr4() | X86_CR4_TSD);
531} 534}
535
532void disable_TSC(void) 536void disable_TSC(void)
533{ 537{
534 preempt_disable(); 538 preempt_disable();
@@ -540,11 +544,47 @@ void disable_TSC(void)
540 hard_disable_TSC(); 544 hard_disable_TSC();
541 preempt_enable(); 545 preempt_enable();
542} 546}
547
543static void hard_enable_TSC(void) 548static void hard_enable_TSC(void)
544{ 549{
545 write_cr4(read_cr4() & ~X86_CR4_TSD); 550 write_cr4(read_cr4() & ~X86_CR4_TSD);
546} 551}
547#endif /* CONFIG_SECCOMP */ 552
553void enable_TSC(void)
554{
555 preempt_disable();
556 if (test_and_clear_thread_flag(TIF_NOTSC))
557 /*
558 * Must flip the CPU state synchronously with
559 * TIF_NOTSC in the current running context.
560 */
561 hard_enable_TSC();
562 preempt_enable();
563}
564
565int get_tsc_mode(unsigned long adr)
566{
567 unsigned int val;
568
569 if (test_thread_flag(TIF_NOTSC))
570 val = PR_TSC_SIGSEGV;
571 else
572 val = PR_TSC_ENABLE;
573
574 return put_user(val, (unsigned int __user *)adr);
575}
576
577int set_tsc_mode(unsigned int val)
578{
579 if (val == PR_TSC_SIGSEGV)
580 disable_TSC();
581 else if (val == PR_TSC_ENABLE)
582 enable_TSC();
583 else
584 return -EINVAL;
585
586 return 0;
587}
548 588
549static noinline void 589static noinline void
550__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 590__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
@@ -578,7 +618,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
578 set_debugreg(next->debugreg7, 7); 618 set_debugreg(next->debugreg7, 7);
579 } 619 }
580 620
581#ifdef CONFIG_SECCOMP
582 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 621 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
583 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 622 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
584 /* prev and next are different */ 623 /* prev and next are different */
@@ -587,7 +626,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
587 else 626 else
588 hard_enable_TSC(); 627 hard_enable_TSC();
589 } 628 }
590#endif
591 629
592#ifdef X86_BTS 630#ifdef X86_BTS
593 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 631 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
@@ -669,7 +707,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
669 707
670 /* we're going to use this soon, after a few expensive things */ 708 /* we're going to use this soon, after a few expensive things */
671 if (next_p->fpu_counter > 5) 709 if (next_p->fpu_counter > 5)
672 prefetch(&next->i387.fxsave); 710 prefetch(next->xstate);
673 711
674 /* 712 /*
675 * Reload esp0. 713 * Reload esp0.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e75ccc8a2b87..891af1a1b48a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -36,6 +36,7 @@
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/kdebug.h> 37#include <linux/kdebug.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -532,9 +533,71 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
532 regs->ss = __USER_DS; 533 regs->ss = __USER_DS;
533 regs->flags = 0x200; 534 regs->flags = 0x200;
534 set_fs(USER_DS); 535 set_fs(USER_DS);
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
535} 540}
536EXPORT_SYMBOL_GPL(start_thread); 541EXPORT_SYMBOL_GPL(start_thread);
537 542
543static void hard_disable_TSC(void)
544{
545 write_cr4(read_cr4() | X86_CR4_TSD);
546}
547
548void disable_TSC(void)
549{
550 preempt_disable();
551 if (!test_and_set_thread_flag(TIF_NOTSC))
552 /*
553 * Must flip the CPU state synchronously with
554 * TIF_NOTSC in the current running context.
555 */
556 hard_disable_TSC();
557 preempt_enable();
558}
559
560static void hard_enable_TSC(void)
561{
562 write_cr4(read_cr4() & ~X86_CR4_TSD);
563}
564
565void enable_TSC(void)
566{
567 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC))
569 /*
570 * Must flip the CPU state synchronously with
571 * TIF_NOTSC in the current running context.
572 */
573 hard_enable_TSC();
574 preempt_enable();
575}
576
577int get_tsc_mode(unsigned long adr)
578{
579 unsigned int val;
580
581 if (test_thread_flag(TIF_NOTSC))
582 val = PR_TSC_SIGSEGV;
583 else
584 val = PR_TSC_ENABLE;
585
586 return put_user(val, (unsigned int __user *)adr);
587}
588
589int set_tsc_mode(unsigned int val)
590{
591 if (val == PR_TSC_SIGSEGV)
592 disable_TSC();
593 else if (val == PR_TSC_ENABLE)
594 enable_TSC();
595 else
596 return -EINVAL;
597
598 return 0;
599}
600
538/* 601/*
539 * This special macro can be used to load a debugging register 602 * This special macro can be used to load a debugging register
540 */ 603 */
@@ -572,6 +635,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
572 loaddebug(next, 7); 635 loaddebug(next, 7);
573 } 636 }
574 637
638 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
639 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
640 /* prev and next are different */
641 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
642 hard_disable_TSC();
643 else
644 hard_enable_TSC();
645 }
646
575 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 647 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
576 /* 648 /*
577 * Copy the relevant range of the IO bitmap. 649 * Copy the relevant range of the IO bitmap.
@@ -614,7 +686,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
614 686
615 /* we're going to use this soon, after a few expensive things */ 687 /* we're going to use this soon, after a few expensive things */
616 if (next_p->fpu_counter>5) 688 if (next_p->fpu_counter>5)
617 prefetch(&next->i387.fxsave); 689 prefetch(next->xstate);
618 690
619 /* 691 /*
620 * Reload esp0, LDT and the page table pointer: 692 * Reload esp0, LDT and the page table pointer:
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 9042fb0e36f5..aee0e8200777 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -74,8 +74,8 @@ int force_personality32 = 0;
74Control non executable heap for 32bit processes. 74Control non executable heap for 32bit processes.
75To control the stack too use noexec=off 75To control the stack too use noexec=off
76 76
77on PROT_READ does not imply PROT_EXEC for 32bit processes 77on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
78off PROT_READ implies PROT_EXEC (default) 78off PROT_READ implies PROT_EXEC
79*/ 79*/
80static int __init nonx32_setup(char *str) 80static int __init nonx32_setup(char *str)
81{ 81{
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5b0bffb7fcc9..1c4799e68718 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p)
812 efi_init(); 812 efi_init();
813 813
814 /* update e820 for memory not covered by WB MTRRs */ 814 /* update e820 for memory not covered by WB MTRRs */
815 find_max_pfn(); 815 propagate_e820_map();
816 mtrr_bp_init(); 816 mtrr_bp_init();
817 if (mtrr_trim_uncached_memory(max_pfn)) 817 if (mtrr_trim_uncached_memory(max_pfn))
818 find_max_pfn(); 818 propagate_e820_map();
819 819
820 max_low_pfn = setup_memory(); 820 max_low_pfn = setup_memory();
821 821
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 674ef3510cdf..6b8e11f0c15d 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -398,6 +398,8 @@ void __init setup_arch(char **cmdline_p)
398 398
399 early_res_to_bootmem(); 399 early_res_to_bootmem();
400 400
401 dma32_reserve_bootmem();
402
401#ifdef CONFIG_ACPI_SLEEP 403#ifdef CONFIG_ACPI_SLEEP
402 /* 404 /*
403 * Reserve low memory region for sleep support. 405 * Reserve low memory region for sleep support.
@@ -420,11 +422,14 @@ void __init setup_arch(char **cmdline_p)
420 unsigned long end_of_mem = end_pfn << PAGE_SHIFT; 422 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
421 423
422 if (ramdisk_end <= end_of_mem) { 424 if (ramdisk_end <= end_of_mem) {
423 reserve_bootmem_generic(ramdisk_image, ramdisk_size); 425 /*
426 * don't need to reserve again, already reserved early
427 * in x86_64_start_kernel, and early_res_to_bootmem
428 * convert that to reserved in bootmem
429 */
424 initrd_start = ramdisk_image + PAGE_OFFSET; 430 initrd_start = ramdisk_image + PAGE_OFFSET;
425 initrd_end = initrd_start+ramdisk_size; 431 initrd_end = initrd_start+ramdisk_size;
426 } else { 432 } else {
427 /* Assumes everything on node 0 */
428 free_bootmem(ramdisk_image, ramdisk_size); 433 free_bootmem(ramdisk_image, ramdisk_size);
429 printk(KERN_ERR "initrd extends beyond end of memory " 434 printk(KERN_ERR "initrd extends beyond end of memory "
430 "(0x%08lx > 0x%08lx)\ndisabling initrd\n", 435 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e6abe8a49b1f..6a925394bc7e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -61,6 +61,7 @@
61#include <asm/mtrr.h> 61#include <asm/mtrr.h>
62#include <asm/nmi.h> 62#include <asm/nmi.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h>
64#include <linux/mc146818rtc.h> 65#include <linux/mc146818rtc.h>
65 66
66#include <mach_apic.h> 67#include <mach_apic.h>
@@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
677 unsigned long send_status, accept_status = 0; 678 unsigned long send_status, accept_status = 0;
678 int maxlvt, num_starts, j; 679 int maxlvt, num_starts, j;
679 680
681 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
682 send_status = uv_wakeup_secondary(phys_apicid, start_eip);
683 atomic_set(&init_deasserted, 1);
684 return send_status;
685 }
686
680 /* 687 /*
681 * Be paranoid about clearing APIC errors. 688 * Be paranoid about clearing APIC errors.
682 */ 689 */
@@ -918,16 +925,19 @@ do_rest:
918 925
919 atomic_set(&init_deasserted, 0); 926 atomic_set(&init_deasserted, 0);
920 927
921 Dprintk("Setting warm reset code and vector.\n"); 928 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
922 929
923 store_NMI_vector(&nmi_high, &nmi_low); 930 Dprintk("Setting warm reset code and vector.\n");
924 931
925 smpboot_setup_warm_reset_vector(start_ip); 932 store_NMI_vector(&nmi_high, &nmi_low);
926 /* 933
927 * Be paranoid about clearing APIC errors. 934 smpboot_setup_warm_reset_vector(start_ip);
928 */ 935 /*
929 apic_write(APIC_ESR, 0); 936 * Be paranoid about clearing APIC errors.
930 apic_read(APIC_ESR); 937 */
938 apic_write(APIC_ESR, 0);
939 apic_read(APIC_ESR);
940 }
931 941
932 /* 942 /*
933 * Starting actual IPI sequence... 943 * Starting actual IPI sequence...
@@ -966,7 +976,8 @@ do_rest:
966 else 976 else
967 /* trampoline code not run */ 977 /* trampoline code not run */
968 printk(KERN_ERR "Not responding.\n"); 978 printk(KERN_ERR "Not responding.\n");
969 inquire_remote_apic(apicid); 979 if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
980 inquire_remote_apic(apicid);
970 } 981 }
971 } 982 }
972 983
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 65791ca2824a..471e694d6713 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -681,7 +681,7 @@ gp_in_kernel:
681 } 681 }
682} 682}
683 683
684static __kprobes void 684static notrace __kprobes void
685mem_parity_error(unsigned char reason, struct pt_regs *regs) 685mem_parity_error(unsigned char reason, struct pt_regs *regs)
686{ 686{
687 printk(KERN_EMERG 687 printk(KERN_EMERG
@@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
707 clear_mem_error(reason); 707 clear_mem_error(reason);
708} 708}
709 709
710static __kprobes void 710static notrace __kprobes void
711io_check_error(unsigned char reason, struct pt_regs *regs) 711io_check_error(unsigned char reason, struct pt_regs *regs)
712{ 712{
713 unsigned long i; 713 unsigned long i;
@@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
727 outb(reason, 0x61); 727 outb(reason, 0x61);
728} 728}
729 729
730static __kprobes void 730static notrace __kprobes void
731unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 731unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
732{ 732{
733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
755 755
756static DEFINE_SPINLOCK(nmi_print_lock); 756static DEFINE_SPINLOCK(nmi_print_lock);
757 757
758void __kprobes die_nmi(struct pt_regs *regs, const char *msg) 758void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
759{ 759{
760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) 760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
761 return; 761 return;
@@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
786 do_exit(SIGSEGV); 786 do_exit(SIGSEGV);
787} 787}
788 788
789static __kprobes void default_do_nmi(struct pt_regs *regs) 789static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
790{ 790{
791 unsigned char reason = 0; 791 unsigned char reason = 0;
792 792
@@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
828 828
829static int ignore_nmis; 829static int ignore_nmis;
830 830
831__kprobes void do_nmi(struct pt_regs *regs, long error_code) 831notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
832{ 832{
833 int cpu; 833 int cpu;
834 834
@@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void)
1148 struct thread_info *thread = current_thread_info(); 1148 struct thread_info *thread = current_thread_info();
1149 struct task_struct *tsk = thread->task; 1149 struct task_struct *tsk = thread->task;
1150 1150
1151 if (!tsk_used_math(tsk)) {
1152 local_irq_enable();
1153 /*
1154 * does a slab alloc which can sleep
1155 */
1156 if (init_fpu(tsk)) {
1157 /*
1158 * ran out of memory!
1159 */
1160 do_group_exit(SIGKILL);
1161 return;
1162 }
1163 local_irq_disable();
1164 }
1165
1151 clts(); /* Allow maths ops (or we recurse) */ 1166 clts(); /* Allow maths ops (or we recurse) */
1152 if (!tsk_used_math(tsk))
1153 init_fpu(tsk);
1154 restore_fpu(tsk); 1167 restore_fpu(tsk);
1155 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1168 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1156 tsk->fpu_counter++; 1169 tsk->fpu_counter++;
@@ -1208,11 +1221,6 @@ void __init trap_init(void)
1208#endif 1221#endif
1209 set_trap_gate(19, &simd_coprocessor_error); 1222 set_trap_gate(19, &simd_coprocessor_error);
1210 1223
1211 /*
1212 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1213 * Generate a build-time error if the alignment is wrong.
1214 */
1215 BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1216 if (cpu_has_fxsr) { 1224 if (cpu_has_fxsr) {
1217 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1225 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1218 set_in_cr4(X86_CR4_OSFXSR); 1226 set_in_cr4(X86_CR4_OSFXSR);
@@ -1233,6 +1241,7 @@ void __init trap_init(void)
1233 1241
1234 set_bit(SYSCALL_VECTOR, used_vectors); 1242 set_bit(SYSCALL_VECTOR, used_vectors);
1235 1243
1244 init_thread_xstate();
1236 /* 1245 /*
1237 * Should be a barrier for any external CPU state: 1246 * Should be a barrier for any external CPU state:
1238 */ 1247 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 79aa6fc0815c..adff76ea97c4 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err)
600 oops_end(flags, regs, SIGSEGV); 600 oops_end(flags, regs, SIGSEGV);
601} 601}
602 602
603void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) 603notrace __kprobes void
604die_nmi(char *str, struct pt_regs *regs, int do_panic)
604{ 605{
605 unsigned long flags; 606 unsigned long flags;
606 607
@@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
772 die("general protection fault", regs, error_code); 773 die("general protection fault", regs, error_code);
773} 774}
774 775
775static __kprobes void 776static notrace __kprobes void
776mem_parity_error(unsigned char reason, struct pt_regs * regs) 777mem_parity_error(unsigned char reason, struct pt_regs * regs)
777{ 778{
778 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 779 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
@@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
796 outb(reason, 0x61); 797 outb(reason, 0x61);
797} 798}
798 799
799static __kprobes void 800static notrace __kprobes void
800io_check_error(unsigned char reason, struct pt_regs * regs) 801io_check_error(unsigned char reason, struct pt_regs * regs)
801{ 802{
802 printk("NMI: IOCK error (debug interrupt?)\n"); 803 printk("NMI: IOCK error (debug interrupt?)\n");
@@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
810 outb(reason, 0x61); 811 outb(reason, 0x61);
811} 812}
812 813
813static __kprobes void 814static notrace __kprobes void
814unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 815unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
815{ 816{
816 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 817 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
827 828
828/* Runs on IST stack. This code must keep interrupts off all the time. 829/* Runs on IST stack. This code must keep interrupts off all the time.
829 Nested NMIs are prevented by the CPU. */ 830 Nested NMIs are prevented by the CPU. */
830asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) 831asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
831{ 832{
832 unsigned char reason = 0; 833 unsigned char reason = 0;
833 int cpu; 834 int cpu;
@@ -1123,11 +1124,24 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1123asmlinkage void math_state_restore(void) 1124asmlinkage void math_state_restore(void)
1124{ 1125{
1125 struct task_struct *me = current; 1126 struct task_struct *me = current;
1126 clts(); /* Allow maths ops (or we recurse) */
1127 1127
1128 if (!used_math()) 1128 if (!used_math()) {
1129 init_fpu(me); 1129 local_irq_enable();
1130 restore_fpu_checking(&me->thread.i387.fxsave); 1130 /*
1131 * does a slab alloc which can sleep
1132 */
1133 if (init_fpu(me)) {
1134 /*
1135 * ran out of memory!
1136 */
1137 do_group_exit(SIGKILL);
1138 return;
1139 }
1140 local_irq_disable();
1141 }
1142
1143 clts(); /* Allow maths ops (or we recurse) */
1144 restore_fpu_checking(&me->thread.xstate->fxsave);
1131 task_thread_info(me)->status |= TS_USEDFPU; 1145 task_thread_info(me)->status |= TS_USEDFPU;
1132 me->fpu_counter++; 1146 me->fpu_counter++;
1133} 1147}
@@ -1163,6 +1177,10 @@ void __init trap_init(void)
1163#endif 1177#endif
1164 1178
1165 /* 1179 /*
1180 * initialize the per thread extended state:
1181 */
1182 init_thread_xstate();
1183 /*
1166 * Should be a barrier for any external CPU state. 1184 * Should be a barrier for any external CPU state.
1167 */ 1185 */
1168 cpu_init(); 1186 cpu_init();
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 3d7e6e9fa6c2..e4790728b224 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
221 * if the CPU frequency is scaled, TSC-based delays will need a different 221 * if the CPU frequency is scaled, TSC-based delays will need a different
222 * loops_per_jiffy value to function properly. 222 * loops_per_jiffy value to function properly.
223 */ 223 */
224static unsigned int ref_freq = 0; 224static unsigned int ref_freq;
225static unsigned long loops_per_jiffy_ref = 0; 225static unsigned long loops_per_jiffy_ref;
226static unsigned long cpu_khz_ref = 0; 226static unsigned long cpu_khz_ref;
227 227
228static int 228static int
229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) 229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
@@ -283,15 +283,28 @@ core_initcall(cpufreq_tsc);
283 283
284/* clock source code */ 284/* clock source code */
285 285
286static unsigned long current_tsc_khz = 0; 286static unsigned long current_tsc_khz;
287static struct clocksource clocksource_tsc;
287 288
289/*
290 * We compare the TSC to the cycle_last value in the clocksource
291 * structure to avoid a nasty time-warp issue. This can be observed in
292 * a very small window right after one CPU updated cycle_last under
293 * xtime lock and the other CPU reads a TSC value which is smaller
294 * than the cycle_last reference value due to a TSC which is slighty
295 * behind. This delta is nowhere else observable, but in that case it
296 * results in a forward time jump in the range of hours due to the
297 * unsigned delta calculation of the time keeping core code, which is
298 * necessary to support wrapping clocksources like pm timer.
299 */
288static cycle_t read_tsc(void) 300static cycle_t read_tsc(void)
289{ 301{
290 cycle_t ret; 302 cycle_t ret;
291 303
292 rdtscll(ret); 304 rdtscll(ret);
293 305
294 return ret; 306 return ret >= clocksource_tsc.cycle_last ?
307 ret : clocksource_tsc.cycle_last;
295} 308}
296 309
297static struct clocksource clocksource_tsc = { 310static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index ceeba01e7f47..fcc16e58609e 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -11,6 +11,7 @@
11#include <asm/hpet.h> 11#include <asm/hpet.h>
12#include <asm/timex.h> 12#include <asm/timex.h>
13#include <asm/timer.h> 13#include <asm/timer.h>
14#include <asm/vgtod.h>
14 15
15static int notsc __initdata = 0; 16static int notsc __initdata = 0;
16 17
@@ -287,18 +288,34 @@ int __init notsc_setup(char *s)
287 288
288__setup("notsc", notsc_setup); 289__setup("notsc", notsc_setup);
289 290
291static struct clocksource clocksource_tsc;
290 292
291/* clock source code: */ 293/*
294 * We compare the TSC to the cycle_last value in the clocksource
295 * structure to avoid a nasty time-warp. This can be observed in a
296 * very small window right after one CPU updated cycle_last under
297 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
298 * is smaller than the cycle_last reference value due to a TSC which
299 * is slighty behind. This delta is nowhere else observable, but in
300 * that case it results in a forward time jump in the range of hours
301 * due to the unsigned delta calculation of the time keeping core
302 * code, which is necessary to support wrapping clocksources like pm
303 * timer.
304 */
292static cycle_t read_tsc(void) 305static cycle_t read_tsc(void)
293{ 306{
294 cycle_t ret = (cycle_t)get_cycles(); 307 cycle_t ret = (cycle_t)get_cycles();
295 return ret; 308
309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
296} 311}
297 312
298static cycle_t __vsyscall_fn vread_tsc(void) 313static cycle_t __vsyscall_fn vread_tsc(void)
299{ 314{
300 cycle_t ret = (cycle_t)vget_cycles(); 315 cycle_t ret = (cycle_t)vget_cycles();
301 return ret; 316
317 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
318 ret : __vsyscall_gtod_data.clock.cycle_last;
302} 319}
303 320
304static struct clocksource clocksource_tsc = { 321static struct clocksource clocksource_tsc = {