diff options
author | Haavard Skinnemoen <haavard.skinnemoen@atmel.com> | 2008-07-27 07:54:08 -0400 |
---|---|---|
committer | Haavard Skinnemoen <haavard.skinnemoen@atmel.com> | 2008-07-27 07:54:08 -0400 |
commit | eda3d8f5604860aae1bb9996bb5efc4213778369 (patch) | |
tree | 9d3887d2665bcc5f5abf200758794545c7b2c69b /arch/x86/kernel | |
parent | 87a9f704658a40940e740b1d73d861667e9164d3 (diff) | |
parent | 8be1a6d6c77ab4532e4476fdb8177030ef48b52c (diff) |
Merge commit 'upstream/master'
Diffstat (limited to 'arch/x86/kernel')
83 files changed, 2147 insertions, 1196 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index da140611bb57..3db651fc8ec5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu | |||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 8 | ||
9 | ifdef CONFIG_FTRACE | 9 | ifdef CONFIG_FTRACE |
10 | # Do not profile debug utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt.o = -pg | ||
13 | endif | 14 | endif |
14 | 15 | ||
15 | # | 16 | # |
@@ -102,6 +103,7 @@ obj-$(CONFIG_OLPC) += olpc.o | |||
102 | # 64 bit specific files | 103 | # 64 bit specific files |
103 | ifeq ($(CONFIG_X86_64),y) | 104 | ifeq ($(CONFIG_X86_64),y) |
104 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o | 105 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o |
106 | obj-y += bios_uv.o | ||
105 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | 107 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
106 | obj-$(CONFIG_AUDIT) += audit_64.o | 108 | obj-$(CONFIG_AUDIT) += audit_64.o |
107 | 109 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f489d7a9be92..fa88a1d71290 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -1021,7 +1021,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1021 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 1021 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; |
1022 | #endif | 1022 | #endif |
1023 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | 1023 | set_bit(MP_ISA_BUS, mp_bus_not_pci); |
1024 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | 1024 | pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); |
1025 | 1025 | ||
1026 | #ifdef CONFIG_X86_ES7000 | 1026 | #ifdef CONFIG_X86_ES7000 |
1027 | /* | 1027 | /* |
@@ -1127,8 +1127,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
1127 | return gsi; | 1127 | return gsi; |
1128 | } | 1128 | } |
1129 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | 1129 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { |
1130 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1130 | pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n", |
1131 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1131 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); |
1132 | #ifdef CONFIG_X86_32 | 1132 | #ifdef CONFIG_X86_32 |
1133 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | 1133 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); |
1134 | #else | 1134 | #else |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index c2502eb9aa83..9220cf46aa10 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -73,6 +73,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
73 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 73 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
74 | 74 | ||
75 | cpumask_t saved_mask; | 75 | cpumask_t saved_mask; |
76 | cpumask_of_cpu_ptr(new_mask, cpu); | ||
76 | int retval; | 77 | int retval; |
77 | unsigned int eax, ebx, ecx, edx; | 78 | unsigned int eax, ebx, ecx, edx; |
78 | unsigned int edx_part; | 79 | unsigned int edx_part; |
@@ -91,7 +92,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
91 | 92 | ||
92 | /* Make sure we are running on right CPU */ | 93 | /* Make sure we are running on right CPU */ |
93 | saved_mask = current->cpus_allowed; | 94 | saved_mask = current->cpus_allowed; |
94 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 95 | retval = set_cpus_allowed_ptr(current, new_mask); |
95 | if (retval) | 96 | if (retval) |
96 | return -1; | 97 | return -1; |
97 | 98 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 868de3d5c39d..fa2161d5003b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
10 | #include <linux/dmi.h> | 10 | #include <linux/dmi.h> |
11 | #include <linux/cpumask.h> | 11 | #include <linux/cpumask.h> |
12 | #include <asm/segment.h> | ||
12 | 13 | ||
13 | #include "realmode/wakeup.h" | 14 | #include "realmode/wakeup.h" |
14 | #include "sleep.h" | 15 | #include "sleep.h" |
@@ -23,15 +24,6 @@ static unsigned long acpi_realmode; | |||
23 | static char temp_stack[10240]; | 24 | static char temp_stack[10240]; |
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* XXX: this macro should move to asm-x86/segment.h and be shared with the | ||
27 | boot code... */ | ||
28 | #define GDT_ENTRY(flags, base, limit) \ | ||
29 | (((u64)(base & 0xff000000) << 32) | \ | ||
30 | ((u64)flags << 40) | \ | ||
31 | ((u64)(limit & 0x00ff0000) << 32) | \ | ||
32 | ((u64)(base & 0x00ffffff) << 16) | \ | ||
33 | ((u64)(limit & 0x0000ffff))) | ||
34 | |||
35 | /** | 27 | /** |
36 | * acpi_save_state_mem - save kernel state | 28 | * acpi_save_state_mem - save kernel state |
37 | * | 29 | * |
@@ -158,6 +150,10 @@ static int __init acpi_sleep_setup(char *str) | |||
158 | acpi_realmode_flags |= 2; | 150 | acpi_realmode_flags |= 2; |
159 | if (strncmp(str, "s3_beep", 7) == 0) | 151 | if (strncmp(str, "s3_beep", 7) == 0) |
160 | acpi_realmode_flags |= 4; | 152 | acpi_realmode_flags |= 4; |
153 | #ifdef CONFIG_HIBERNATION | ||
154 | if (strncmp(str, "s4_nohwsig", 10) == 0) | ||
155 | acpi_no_s4_hw_signature(); | ||
156 | #endif | ||
161 | if (strncmp(str, "old_ordering", 12) == 0) | 157 | if (strncmp(str, "old_ordering", 12) == 0) |
162 | acpi_old_suspend_ordering(); | 158 | acpi_old_suspend_ordering(); |
163 | str = strchr(str, ','); | 159 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a0..74697408576f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
24 | #include <linux/iommu-helper.h> | 24 | #include <linux/iommu-helper.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/gart.h> | 26 | #include <asm/iommu.h> |
27 | #include <asm/amd_iommu_types.h> | 27 | #include <asm/amd_iommu_types.h> |
28 | #include <asm/amd_iommu.h> | 28 | #include <asm/amd_iommu.h> |
29 | 29 | ||
@@ -32,21 +32,37 @@ | |||
32 | #define to_pages(addr, size) \ | 32 | #define to_pages(addr, size) \ |
33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | 33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) |
34 | 34 | ||
35 | #define EXIT_LOOP_COUNT 10000000 | ||
36 | |||
35 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 37 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
36 | 38 | ||
37 | struct command { | 39 | /* |
40 | * general struct to manage commands send to an IOMMU | ||
41 | */ | ||
42 | struct iommu_cmd { | ||
38 | u32 data[4]; | 43 | u32 data[4]; |
39 | }; | 44 | }; |
40 | 45 | ||
41 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 46 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
42 | struct unity_map_entry *e); | 47 | struct unity_map_entry *e); |
43 | 48 | ||
49 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
44 | static int iommu_has_npcache(struct amd_iommu *iommu) | 50 | static int iommu_has_npcache(struct amd_iommu *iommu) |
45 | { | 51 | { |
46 | return iommu->cap & IOMMU_CAP_NPCACHE; | 52 | return iommu->cap & IOMMU_CAP_NPCACHE; |
47 | } | 53 | } |
48 | 54 | ||
49 | static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 55 | /**************************************************************************** |
56 | * | ||
57 | * IOMMU command queuing functions | ||
58 | * | ||
59 | ****************************************************************************/ | ||
60 | |||
61 | /* | ||
62 | * Writes the command to the IOMMUs command buffer and informs the | ||
63 | * hardware about the new command. Must be called with iommu->lock held. | ||
64 | */ | ||
65 | static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
50 | { | 66 | { |
51 | u32 tail, head; | 67 | u32 tail, head; |
52 | u8 *target; | 68 | u8 *target; |
@@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
63 | return 0; | 79 | return 0; |
64 | } | 80 | } |
65 | 81 | ||
66 | static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 82 | /* |
83 | * General queuing function for commands. Takes iommu->lock and calls | ||
84 | * __iommu_queue_command(). | ||
85 | */ | ||
86 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
67 | { | 87 | { |
68 | unsigned long flags; | 88 | unsigned long flags; |
69 | int ret; | 89 | int ret; |
@@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
75 | return ret; | 95 | return ret; |
76 | } | 96 | } |
77 | 97 | ||
98 | /* | ||
99 | * This function is called whenever we need to ensure that the IOMMU has | ||
100 | * completed execution of all commands we sent. It sends a | ||
101 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | ||
102 | * us about that by writing a value to a physical address we pass with | ||
103 | * the command. | ||
104 | */ | ||
78 | static int iommu_completion_wait(struct amd_iommu *iommu) | 105 | static int iommu_completion_wait(struct amd_iommu *iommu) |
79 | { | 106 | { |
80 | int ret; | 107 | int ret; |
81 | struct command cmd; | 108 | struct iommu_cmd cmd; |
82 | volatile u64 ready = 0; | 109 | volatile u64 ready = 0; |
83 | unsigned long ready_phys = virt_to_phys(&ready); | 110 | unsigned long ready_phys = virt_to_phys(&ready); |
111 | unsigned long i = 0; | ||
84 | 112 | ||
85 | memset(&cmd, 0, sizeof(cmd)); | 113 | memset(&cmd, 0, sizeof(cmd)); |
86 | cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; | 114 | cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; |
87 | cmd.data[1] = HIGH_U32(ready_phys); | 115 | cmd.data[1] = upper_32_bits(ready_phys); |
88 | cmd.data[2] = 1; /* value written to 'ready' */ | 116 | cmd.data[2] = 1; /* value written to 'ready' */ |
89 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | 117 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); |
90 | 118 | ||
@@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
95 | if (ret) | 123 | if (ret) |
96 | return ret; | 124 | return ret; |
97 | 125 | ||
98 | while (!ready) | 126 | while (!ready && (i < EXIT_LOOP_COUNT)) { |
127 | ++i; | ||
99 | cpu_relax(); | 128 | cpu_relax(); |
129 | } | ||
130 | |||
131 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) | ||
132 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); | ||
100 | 133 | ||
101 | return 0; | 134 | return 0; |
102 | } | 135 | } |
103 | 136 | ||
137 | /* | ||
138 | * Command send function for invalidating a device table entry | ||
139 | */ | ||
104 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 140 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) |
105 | { | 141 | { |
106 | struct command cmd; | 142 | struct iommu_cmd cmd; |
107 | 143 | ||
108 | BUG_ON(iommu == NULL); | 144 | BUG_ON(iommu == NULL); |
109 | 145 | ||
@@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
116 | return iommu_queue_command(iommu, &cmd); | 152 | return iommu_queue_command(iommu, &cmd); |
117 | } | 153 | } |
118 | 154 | ||
155 | /* | ||
156 | * Generic command send function for invalidaing TLB entries | ||
157 | */ | ||
119 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | 158 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, |
120 | u64 address, u16 domid, int pde, int s) | 159 | u64 address, u16 domid, int pde, int s) |
121 | { | 160 | { |
122 | struct command cmd; | 161 | struct iommu_cmd cmd; |
123 | 162 | ||
124 | memset(&cmd, 0, sizeof(cmd)); | 163 | memset(&cmd, 0, sizeof(cmd)); |
125 | address &= PAGE_MASK; | 164 | address &= PAGE_MASK; |
126 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | 165 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); |
127 | cmd.data[1] |= domid; | 166 | cmd.data[1] |= domid; |
128 | cmd.data[2] = LOW_U32(address); | 167 | cmd.data[2] = LOW_U32(address); |
129 | cmd.data[3] = HIGH_U32(address); | 168 | cmd.data[3] = upper_32_bits(address); |
130 | if (s) | 169 | if (s) /* size bit - we flush more than one 4kb page */ |
131 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | 170 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; |
132 | if (pde) | 171 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ |
133 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | 172 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
134 | 173 | ||
135 | iommu->need_sync = 1; | 174 | iommu->need_sync = 1; |
@@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
137 | return iommu_queue_command(iommu, &cmd); | 176 | return iommu_queue_command(iommu, &cmd); |
138 | } | 177 | } |
139 | 178 | ||
179 | /* | ||
180 | * TLB invalidation function which is called from the mapping functions. | ||
181 | * It invalidates a single PTE if the range to flush is within a single | ||
182 | * page. Otherwise it flushes the whole TLB of the IOMMU. | ||
183 | */ | ||
140 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 184 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, |
141 | u64 address, size_t size) | 185 | u64 address, size_t size) |
142 | { | 186 | { |
@@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
159 | return 0; | 203 | return 0; |
160 | } | 204 | } |
161 | 205 | ||
206 | /**************************************************************************** | ||
207 | * | ||
208 | * The functions below are used the create the page table mappings for | ||
209 | * unity mapped regions. | ||
210 | * | ||
211 | ****************************************************************************/ | ||
212 | |||
213 | /* | ||
214 | * Generic mapping functions. It maps a physical address into a DMA | ||
215 | * address space. It allocates the page table pages if necessary. | ||
216 | * In the future it can be extended to a generic mapping function | ||
217 | * supporting all features of AMD IOMMU page tables like level skipping | ||
218 | * and full 64 bit address spaces. | ||
219 | */ | ||
162 | static int iommu_map(struct protection_domain *dom, | 220 | static int iommu_map(struct protection_domain *dom, |
163 | unsigned long bus_addr, | 221 | unsigned long bus_addr, |
164 | unsigned long phys_addr, | 222 | unsigned long phys_addr, |
@@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom, | |||
209 | return 0; | 267 | return 0; |
210 | } | 268 | } |
211 | 269 | ||
270 | /* | ||
271 | * This function checks if a specific unity mapping entry is needed for | ||
272 | * this specific IOMMU. | ||
273 | */ | ||
212 | static int iommu_for_unity_map(struct amd_iommu *iommu, | 274 | static int iommu_for_unity_map(struct amd_iommu *iommu, |
213 | struct unity_map_entry *entry) | 275 | struct unity_map_entry *entry) |
214 | { | 276 | { |
@@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
223 | return 0; | 285 | return 0; |
224 | } | 286 | } |
225 | 287 | ||
288 | /* | ||
289 | * Init the unity mappings for a specific IOMMU in the system | ||
290 | * | ||
291 | * Basically iterates over all unity mapping entries and applies them to | ||
292 | * the default domain DMA of that IOMMU if necessary. | ||
293 | */ | ||
226 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | 294 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) |
227 | { | 295 | { |
228 | struct unity_map_entry *entry; | 296 | struct unity_map_entry *entry; |
@@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) | |||
239 | return 0; | 307 | return 0; |
240 | } | 308 | } |
241 | 309 | ||
310 | /* | ||
311 | * This function actually applies the mapping to the page table of the | ||
312 | * dma_ops domain. | ||
313 | */ | ||
242 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 314 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
243 | struct unity_map_entry *e) | 315 | struct unity_map_entry *e) |
244 | { | 316 | { |
@@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
261 | return 0; | 333 | return 0; |
262 | } | 334 | } |
263 | 335 | ||
336 | /* | ||
337 | * Inits the unity mappings required for a specific device | ||
338 | */ | ||
264 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 339 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
265 | u16 devid) | 340 | u16 devid) |
266 | { | 341 | { |
@@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
278 | return 0; | 353 | return 0; |
279 | } | 354 | } |
280 | 355 | ||
356 | /**************************************************************************** | ||
357 | * | ||
358 | * The next functions belong to the address allocator for the dma_ops | ||
359 | * interface functions. They work like the allocators in the other IOMMU | ||
360 | * drivers. Its basically a bitmap which marks the allocated pages in | ||
361 | * the aperture. Maybe it could be enhanced in the future to a more | ||
362 | * efficient allocator. | ||
363 | * | ||
364 | ****************************************************************************/ | ||
281 | static unsigned long dma_mask_to_pages(unsigned long mask) | 365 | static unsigned long dma_mask_to_pages(unsigned long mask) |
282 | { | 366 | { |
283 | return (mask >> PAGE_SHIFT) + | 367 | return (mask >> PAGE_SHIFT) + |
284 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | 368 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); |
285 | } | 369 | } |
286 | 370 | ||
371 | /* | ||
372 | * The address allocator core function. | ||
373 | * | ||
374 | * called with domain->lock held | ||
375 | */ | ||
287 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 376 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
288 | struct dma_ops_domain *dom, | 377 | struct dma_ops_domain *dom, |
289 | unsigned int pages) | 378 | unsigned int pages) |
@@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
317 | return address; | 406 | return address; |
318 | } | 407 | } |
319 | 408 | ||
409 | /* | ||
410 | * The address free function. | ||
411 | * | ||
412 | * called with domain->lock held | ||
413 | */ | ||
320 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, | 414 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, |
321 | unsigned long address, | 415 | unsigned long address, |
322 | unsigned int pages) | 416 | unsigned int pages) |
@@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
325 | iommu_area_free(dom->bitmap, address, pages); | 419 | iommu_area_free(dom->bitmap, address, pages); |
326 | } | 420 | } |
327 | 421 | ||
422 | /**************************************************************************** | ||
423 | * | ||
424 | * The next functions belong to the domain allocation. A domain is | ||
425 | * allocated for every IOMMU as the default domain. If device isolation | ||
426 | * is enabled, every device get its own domain. The most important thing | ||
427 | * about domains is the page table mapping the DMA address space they | ||
428 | * contain. | ||
429 | * | ||
430 | ****************************************************************************/ | ||
431 | |||
328 | static u16 domain_id_alloc(void) | 432 | static u16 domain_id_alloc(void) |
329 | { | 433 | { |
330 | unsigned long flags; | 434 | unsigned long flags; |
@@ -342,6 +446,10 @@ static u16 domain_id_alloc(void) | |||
342 | return id; | 446 | return id; |
343 | } | 447 | } |
344 | 448 | ||
449 | /* | ||
450 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
451 | * ranges. | ||
452 | */ | ||
345 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 453 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
346 | unsigned long start_page, | 454 | unsigned long start_page, |
347 | unsigned int pages) | 455 | unsigned int pages) |
@@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
382 | free_page((unsigned long)p1); | 490 | free_page((unsigned long)p1); |
383 | } | 491 | } |
384 | 492 | ||
493 | /* | ||
494 | * Free a domain, only used if something went wrong in the | ||
495 | * allocation path and we need to free an already allocated page table | ||
496 | */ | ||
385 | static void dma_ops_domain_free(struct dma_ops_domain *dom) | 497 | static void dma_ops_domain_free(struct dma_ops_domain *dom) |
386 | { | 498 | { |
387 | if (!dom) | 499 | if (!dom) |
@@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
396 | kfree(dom); | 508 | kfree(dom); |
397 | } | 509 | } |
398 | 510 | ||
511 | /* | ||
512 | * Allocates a new protection domain usable for the dma_ops functions. | ||
513 | * It also intializes the page table and the address allocator data | ||
514 | * structures required for the dma_ops interface | ||
515 | */ | ||
399 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | 516 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, |
400 | unsigned order) | 517 | unsigned order) |
401 | { | 518 | { |
@@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
436 | dma_dom->bitmap[0] = 1; | 553 | dma_dom->bitmap[0] = 1; |
437 | dma_dom->next_bit = 0; | 554 | dma_dom->next_bit = 0; |
438 | 555 | ||
556 | /* Intialize the exclusion range if necessary */ | ||
439 | if (iommu->exclusion_start && | 557 | if (iommu->exclusion_start && |
440 | iommu->exclusion_start < dma_dom->aperture_size) { | 558 | iommu->exclusion_start < dma_dom->aperture_size) { |
441 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 559 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; |
@@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
444 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 562 | dma_ops_reserve_addresses(dma_dom, startpage, pages); |
445 | } | 563 | } |
446 | 564 | ||
565 | /* | ||
566 | * At the last step, build the page tables so we don't need to | ||
567 | * allocate page table pages in the dma_ops mapping/unmapping | ||
568 | * path. | ||
569 | */ | ||
447 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); | 570 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); |
448 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), | 571 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), |
449 | GFP_KERNEL); | 572 | GFP_KERNEL); |
@@ -472,6 +595,10 @@ free_dma_dom: | |||
472 | return NULL; | 595 | return NULL; |
473 | } | 596 | } |
474 | 597 | ||
598 | /* | ||
599 | * Find out the protection domain structure for a given PCI device. This | ||
600 | * will give us the pointer to the page table root for example. | ||
601 | */ | ||
475 | static struct protection_domain *domain_for_device(u16 devid) | 602 | static struct protection_domain *domain_for_device(u16 devid) |
476 | { | 603 | { |
477 | struct protection_domain *dom; | 604 | struct protection_domain *dom; |
@@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
484 | return dom; | 611 | return dom; |
485 | } | 612 | } |
486 | 613 | ||
614 | /* | ||
615 | * If a device is not yet associated with a domain, this function does | ||
616 | * assigns it visible for the hardware | ||
617 | */ | ||
487 | static void set_device_domain(struct amd_iommu *iommu, | 618 | static void set_device_domain(struct amd_iommu *iommu, |
488 | struct protection_domain *domain, | 619 | struct protection_domain *domain, |
489 | u16 devid) | 620 | u16 devid) |
@@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
508 | iommu->need_sync = 1; | 639 | iommu->need_sync = 1; |
509 | } | 640 | } |
510 | 641 | ||
642 | /***************************************************************************** | ||
643 | * | ||
644 | * The next functions belong to the dma_ops mapping/unmapping code. | ||
645 | * | ||
646 | *****************************************************************************/ | ||
647 | |||
648 | /* | ||
649 | * In the dma_ops path we only have the struct device. This function | ||
650 | * finds the corresponding IOMMU, the protection domain and the | ||
651 | * requestor id for a given device. | ||
652 | * If the device is not yet associated with a domain this is also done | ||
653 | * in this function. | ||
654 | */ | ||
511 | static int get_device_resources(struct device *dev, | 655 | static int get_device_resources(struct device *dev, |
512 | struct amd_iommu **iommu, | 656 | struct amd_iommu **iommu, |
513 | struct protection_domain **domain, | 657 | struct protection_domain **domain, |
@@ -520,9 +664,10 @@ static int get_device_resources(struct device *dev, | |||
520 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 664 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); |
521 | 665 | ||
522 | pcidev = to_pci_dev(dev); | 666 | pcidev = to_pci_dev(dev); |
523 | _bdf = (pcidev->bus->number << 8) | pcidev->devfn; | 667 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
524 | 668 | ||
525 | if (_bdf >= amd_iommu_last_bdf) { | 669 | /* device not translated by any IOMMU in the system? */ |
670 | if (_bdf > amd_iommu_last_bdf) { | ||
526 | *iommu = NULL; | 671 | *iommu = NULL; |
527 | *domain = NULL; | 672 | *domain = NULL; |
528 | *bdf = 0xffff; | 673 | *bdf = 0xffff; |
@@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev, | |||
547 | return 1; | 692 | return 1; |
548 | } | 693 | } |
549 | 694 | ||
695 | /* | ||
696 | * This is the generic map function. It maps one 4kb page at paddr to | ||
697 | * the given address in the DMA address space for the domain. | ||
698 | */ | ||
550 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 699 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, |
551 | struct dma_ops_domain *dom, | 700 | struct dma_ops_domain *dom, |
552 | unsigned long address, | 701 | unsigned long address, |
@@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
578 | return (dma_addr_t)address; | 727 | return (dma_addr_t)address; |
579 | } | 728 | } |
580 | 729 | ||
730 | /* | ||
731 | * The generic unmapping function for on page in the DMA address space. | ||
732 | */ | ||
581 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 733 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, |
582 | struct dma_ops_domain *dom, | 734 | struct dma_ops_domain *dom, |
583 | unsigned long address) | 735 | unsigned long address) |
@@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
597 | *pte = 0ULL; | 749 | *pte = 0ULL; |
598 | } | 750 | } |
599 | 751 | ||
752 | /* | ||
753 | * This function contains common code for mapping of a physically | ||
754 | * contiguous memory region into DMA address space. It is uses by all | ||
755 | * mapping functions provided by this IOMMU driver. | ||
756 | * Must be called with the domain lock held. | ||
757 | */ | ||
600 | static dma_addr_t __map_single(struct device *dev, | 758 | static dma_addr_t __map_single(struct device *dev, |
601 | struct amd_iommu *iommu, | 759 | struct amd_iommu *iommu, |
602 | struct dma_ops_domain *dma_dom, | 760 | struct dma_ops_domain *dma_dom, |
@@ -628,6 +786,10 @@ out: | |||
628 | return address; | 786 | return address; |
629 | } | 787 | } |
630 | 788 | ||
789 | /* | ||
790 | * Does the reverse of the __map_single function. Must be called with | ||
791 | * the domain lock held too | ||
792 | */ | ||
631 | static void __unmap_single(struct amd_iommu *iommu, | 793 | static void __unmap_single(struct amd_iommu *iommu, |
632 | struct dma_ops_domain *dma_dom, | 794 | struct dma_ops_domain *dma_dom, |
633 | dma_addr_t dma_addr, | 795 | dma_addr_t dma_addr, |
@@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
652 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 814 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
653 | } | 815 | } |
654 | 816 | ||
817 | /* | ||
818 | * The exported map_single function for dma_ops. | ||
819 | */ | ||
655 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | 820 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, |
656 | size_t size, int dir) | 821 | size_t size, int dir) |
657 | { | 822 | { |
@@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
664 | get_device_resources(dev, &iommu, &domain, &devid); | 829 | get_device_resources(dev, &iommu, &domain, &devid); |
665 | 830 | ||
666 | if (iommu == NULL || domain == NULL) | 831 | if (iommu == NULL || domain == NULL) |
832 | /* device not handled by any AMD IOMMU */ | ||
667 | return (dma_addr_t)paddr; | 833 | return (dma_addr_t)paddr; |
668 | 834 | ||
669 | spin_lock_irqsave(&domain->lock, flags); | 835 | spin_lock_irqsave(&domain->lock, flags); |
@@ -683,6 +849,9 @@ out: | |||
683 | return addr; | 849 | return addr; |
684 | } | 850 | } |
685 | 851 | ||
852 | /* | ||
853 | * The exported unmap_single function for dma_ops. | ||
854 | */ | ||
686 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, | 855 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, |
687 | size_t size, int dir) | 856 | size_t size, int dir) |
688 | { | 857 | { |
@@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
692 | u16 devid; | 861 | u16 devid; |
693 | 862 | ||
694 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 863 | if (!get_device_resources(dev, &iommu, &domain, &devid)) |
864 | /* device not handled by any AMD IOMMU */ | ||
695 | return; | 865 | return; |
696 | 866 | ||
697 | spin_lock_irqsave(&domain->lock, flags); | 867 | spin_lock_irqsave(&domain->lock, flags); |
@@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
706 | spin_unlock_irqrestore(&domain->lock, flags); | 876 | spin_unlock_irqrestore(&domain->lock, flags); |
707 | } | 877 | } |
708 | 878 | ||
879 | /* | ||
880 | * This is a special map_sg function which is used if we should map a | ||
881 | * device which is not handled by an AMD IOMMU in the system. | ||
882 | */ | ||
709 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | 883 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, |
710 | int nelems, int dir) | 884 | int nelems, int dir) |
711 | { | 885 | { |
@@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | |||
720 | return nelems; | 894 | return nelems; |
721 | } | 895 | } |
722 | 896 | ||
897 | /* | ||
898 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
899 | * lists). | ||
900 | */ | ||
723 | static int map_sg(struct device *dev, struct scatterlist *sglist, | 901 | static int map_sg(struct device *dev, struct scatterlist *sglist, |
724 | int nelems, int dir) | 902 | int nelems, int dir) |
725 | { | 903 | { |
@@ -775,6 +953,10 @@ unmap: | |||
775 | goto out; | 953 | goto out; |
776 | } | 954 | } |
777 | 955 | ||
956 | /* | ||
957 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
958 | * lists). | ||
959 | */ | ||
778 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, | 960 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, |
779 | int nelems, int dir) | 961 | int nelems, int dir) |
780 | { | 962 | { |
@@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
804 | spin_unlock_irqrestore(&domain->lock, flags); | 986 | spin_unlock_irqrestore(&domain->lock, flags); |
805 | } | 987 | } |
806 | 988 | ||
989 | /* | ||
990 | * The exported alloc_coherent function for dma_ops. | ||
991 | */ | ||
807 | static void *alloc_coherent(struct device *dev, size_t size, | 992 | static void *alloc_coherent(struct device *dev, size_t size, |
808 | dma_addr_t *dma_addr, gfp_t flag) | 993 | dma_addr_t *dma_addr, gfp_t flag) |
809 | { | 994 | { |
@@ -851,6 +1036,11 @@ out: | |||
851 | return virt_addr; | 1036 | return virt_addr; |
852 | } | 1037 | } |
853 | 1038 | ||
1039 | /* | ||
1040 | * The exported free_coherent function for dma_ops. | ||
1041 | * FIXME: fix the generic x86 DMA layer so that it actually calls that | ||
1042 | * function. | ||
1043 | */ | ||
854 | static void free_coherent(struct device *dev, size_t size, | 1044 | static void free_coherent(struct device *dev, size_t size, |
855 | void *virt_addr, dma_addr_t dma_addr) | 1045 | void *virt_addr, dma_addr_t dma_addr) |
856 | { | 1046 | { |
@@ -879,6 +1069,8 @@ free_mem: | |||
879 | } | 1069 | } |
880 | 1070 | ||
881 | /* | 1071 | /* |
1072 | * The function for pre-allocating protection domains. | ||
1073 | * | ||
882 | * If the driver core informs the DMA layer if a driver grabs a device | 1074 | * If the driver core informs the DMA layer if a driver grabs a device |
883 | * we don't need to preallocate the protection domains anymore. | 1075 | * we don't need to preallocate the protection domains anymore. |
884 | * For now we have to. | 1076 | * For now we have to. |
@@ -893,7 +1085,7 @@ void prealloc_protection_domains(void) | |||
893 | 1085 | ||
894 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1086 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
895 | devid = (dev->bus->number << 8) | dev->devfn; | 1087 | devid = (dev->bus->number << 8) | dev->devfn; |
896 | if (devid >= amd_iommu_last_bdf) | 1088 | if (devid > amd_iommu_last_bdf) |
897 | continue; | 1089 | continue; |
898 | devid = amd_iommu_alias_table[devid]; | 1090 | devid = amd_iommu_alias_table[devid]; |
899 | if (domain_for_device(devid)) | 1091 | if (domain_for_device(devid)) |
@@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
921 | .unmap_sg = unmap_sg, | 1113 | .unmap_sg = unmap_sg, |
922 | }; | 1114 | }; |
923 | 1115 | ||
1116 | /* | ||
1117 | * The function which clues the AMD IOMMU driver into dma_ops. | ||
1118 | */ | ||
924 | int __init amd_iommu_init_dma_ops(void) | 1119 | int __init amd_iommu_init_dma_ops(void) |
925 | { | 1120 | { |
926 | struct amd_iommu *iommu; | 1121 | struct amd_iommu *iommu; |
927 | int order = amd_iommu_aperture_order; | 1122 | int order = amd_iommu_aperture_order; |
928 | int ret; | 1123 | int ret; |
929 | 1124 | ||
1125 | /* | ||
1126 | * first allocate a default protection domain for every IOMMU we | ||
1127 | * found in the system. Devices not assigned to any other | ||
1128 | * protection domain will be assigned to the default one. | ||
1129 | */ | ||
930 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1130 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
931 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1131 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); |
932 | if (iommu->default_dom == NULL) | 1132 | if (iommu->default_dom == NULL) |
@@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void) | |||
936 | goto free_domains; | 1136 | goto free_domains; |
937 | } | 1137 | } |
938 | 1138 | ||
1139 | /* | ||
1140 | * If device isolation is enabled, pre-allocate the protection | ||
1141 | * domains for each device. | ||
1142 | */ | ||
939 | if (amd_iommu_isolate) | 1143 | if (amd_iommu_isolate) |
940 | prealloc_protection_domains(); | 1144 | prealloc_protection_domains(); |
941 | 1145 | ||
@@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
947 | gart_iommu_aperture = 0; | 1151 | gart_iommu_aperture = 0; |
948 | #endif | 1152 | #endif |
949 | 1153 | ||
1154 | /* Make the driver finally visible to the drivers */ | ||
950 | dma_ops = &amd_iommu_dma_ops; | 1155 | dma_ops = &amd_iommu_dma_ops; |
951 | 1156 | ||
952 | return 0; | 1157 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437d..d9a9da597e79 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -25,20 +25,13 @@ | |||
25 | #include <asm/pci-direct.h> | 25 | #include <asm/pci-direct.h> |
26 | #include <asm/amd_iommu_types.h> | 26 | #include <asm/amd_iommu_types.h> |
27 | #include <asm/amd_iommu.h> | 27 | #include <asm/amd_iommu.h> |
28 | #include <asm/gart.h> | 28 | #include <asm/iommu.h> |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * definitions for the ACPI scanning code | 31 | * definitions for the ACPI scanning code |
32 | */ | 32 | */ |
33 | #define UPDATE_LAST_BDF(x) do {\ | ||
34 | if ((x) > amd_iommu_last_bdf) \ | ||
35 | amd_iommu_last_bdf = (x); \ | ||
36 | } while (0); | ||
37 | |||
38 | #define DEVID(bus, devfn) (((bus) << 8) | (devfn)) | ||
39 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | 33 | #define PCI_BUS(x) (((x) >> 8) & 0xff) |
40 | #define IVRS_HEADER_LENGTH 48 | 34 | #define IVRS_HEADER_LENGTH 48 |
41 | #define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) | ||
42 | 35 | ||
43 | #define ACPI_IVHD_TYPE 0x10 | 36 | #define ACPI_IVHD_TYPE 0x10 |
44 | #define ACPI_IVMD_TYPE_ALL 0x20 | 37 | #define ACPI_IVMD_TYPE_ALL 0x20 |
@@ -71,6 +64,17 @@ | |||
71 | #define ACPI_DEVFLAG_LINT1 0x80 | 64 | #define ACPI_DEVFLAG_LINT1 0x80 |
72 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 | 65 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 |
73 | 66 | ||
67 | /* | ||
68 | * ACPI table definitions | ||
69 | * | ||
70 | * These data structures are laid over the table to parse the important values | ||
71 | * out of it. | ||
72 | */ | ||
73 | |||
74 | /* | ||
75 | * structure describing one IOMMU in the ACPI table. Typically followed by one | ||
76 | * or more ivhd_entrys. | ||
77 | */ | ||
74 | struct ivhd_header { | 78 | struct ivhd_header { |
75 | u8 type; | 79 | u8 type; |
76 | u8 flags; | 80 | u8 flags; |
@@ -83,6 +87,10 @@ struct ivhd_header { | |||
83 | u32 reserved; | 87 | u32 reserved; |
84 | } __attribute__((packed)); | 88 | } __attribute__((packed)); |
85 | 89 | ||
90 | /* | ||
91 | * A device entry describing which devices a specific IOMMU translates and | ||
92 | * which requestor ids they use. | ||
93 | */ | ||
86 | struct ivhd_entry { | 94 | struct ivhd_entry { |
87 | u8 type; | 95 | u8 type; |
88 | u16 devid; | 96 | u16 devid; |
@@ -90,6 +98,10 @@ struct ivhd_entry { | |||
90 | u32 ext; | 98 | u32 ext; |
91 | } __attribute__((packed)); | 99 | } __attribute__((packed)); |
92 | 100 | ||
101 | /* | ||
102 | * An AMD IOMMU memory definition structure. It defines things like exclusion | ||
103 | * ranges for devices and regions that should be unity mapped. | ||
104 | */ | ||
93 | struct ivmd_header { | 105 | struct ivmd_header { |
94 | u8 type; | 106 | u8 type; |
95 | u8 flags; | 107 | u8 flags; |
@@ -103,22 +115,80 @@ struct ivmd_header { | |||
103 | 115 | ||
104 | static int __initdata amd_iommu_detected; | 116 | static int __initdata amd_iommu_detected; |
105 | 117 | ||
106 | u16 amd_iommu_last_bdf; | 118 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
107 | struct list_head amd_iommu_unity_map; | 119 | to handle */ |
108 | unsigned amd_iommu_aperture_order = 26; | 120 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
109 | int amd_iommu_isolate; | 121 | we find in ACPI */ |
122 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | ||
123 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | ||
124 | |||
125 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | ||
126 | system */ | ||
110 | 127 | ||
111 | struct list_head amd_iommu_list; | 128 | /* |
129 | * Pointer to the device table which is shared by all AMD IOMMUs | ||
130 | * it is indexed by the PCI device id or the HT unit id and contains | ||
131 | * information about the domain the device belongs to as well as the | ||
132 | * page table root pointer. | ||
133 | */ | ||
112 | struct dev_table_entry *amd_iommu_dev_table; | 134 | struct dev_table_entry *amd_iommu_dev_table; |
135 | |||
136 | /* | ||
137 | * The alias table is a driver specific data structure which contains the | ||
138 | * mappings of the PCI device ids to the actual requestor ids on the IOMMU. | ||
139 | * More than one device can share the same requestor id. | ||
140 | */ | ||
113 | u16 *amd_iommu_alias_table; | 141 | u16 *amd_iommu_alias_table; |
142 | |||
143 | /* | ||
144 | * The rlookup table is used to find the IOMMU which is responsible | ||
145 | * for a specific device. It is also indexed by the PCI device id. | ||
146 | */ | ||
114 | struct amd_iommu **amd_iommu_rlookup_table; | 147 | struct amd_iommu **amd_iommu_rlookup_table; |
148 | |||
149 | /* | ||
150 | * The pd table (protection domain table) is used to find the protection domain | ||
151 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
152 | */ | ||
115 | struct protection_domain **amd_iommu_pd_table; | 153 | struct protection_domain **amd_iommu_pd_table; |
154 | |||
155 | /* | ||
156 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | ||
157 | * to know which ones are already in use. | ||
158 | */ | ||
116 | unsigned long *amd_iommu_pd_alloc_bitmap; | 159 | unsigned long *amd_iommu_pd_alloc_bitmap; |
117 | 160 | ||
118 | static u32 dev_table_size; | 161 | static u32 dev_table_size; /* size of the device table */ |
119 | static u32 alias_table_size; | 162 | static u32 alias_table_size; /* size of the alias table */ |
120 | static u32 rlookup_table_size; | 163 | static u32 rlookup_table_size; /* size if the rlookup table */ |
121 | 164 | ||
165 | static inline void update_last_devid(u16 devid) | ||
166 | { | ||
167 | if (devid > amd_iommu_last_bdf) | ||
168 | amd_iommu_last_bdf = devid; | ||
169 | } | ||
170 | |||
171 | static inline unsigned long tbl_size(int entry_size) | ||
172 | { | ||
173 | unsigned shift = PAGE_SHIFT + | ||
174 | get_order(amd_iommu_last_bdf * entry_size); | ||
175 | |||
176 | return 1UL << shift; | ||
177 | } | ||
178 | |||
179 | /**************************************************************************** | ||
180 | * | ||
181 | * AMD IOMMU MMIO register space handling functions | ||
182 | * | ||
183 | * These functions are used to program the IOMMU device registers in | ||
184 | * MMIO space required for that driver. | ||
185 | * | ||
186 | ****************************************************************************/ | ||
187 | |||
188 | /* | ||
189 | * This function set the exclusion range in the IOMMU. DMA accesses to the | ||
190 | * exclusion range are passed through untranslated | ||
191 | */ | ||
122 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | 192 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) |
123 | { | 193 | { |
124 | u64 start = iommu->exclusion_start & PAGE_MASK; | 194 | u64 start = iommu->exclusion_start & PAGE_MASK; |
@@ -137,6 +207,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | |||
137 | &entry, sizeof(entry)); | 207 | &entry, sizeof(entry)); |
138 | } | 208 | } |
139 | 209 | ||
210 | /* Programs the physical address of the device table into the IOMMU hardware */ | ||
140 | static void __init iommu_set_device_table(struct amd_iommu *iommu) | 211 | static void __init iommu_set_device_table(struct amd_iommu *iommu) |
141 | { | 212 | { |
142 | u32 entry; | 213 | u32 entry; |
@@ -149,6 +220,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) | |||
149 | &entry, sizeof(entry)); | 220 | &entry, sizeof(entry)); |
150 | } | 221 | } |
151 | 222 | ||
223 | /* Generic functions to enable/disable certain features of the IOMMU. */ | ||
152 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | 224 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) |
153 | { | 225 | { |
154 | u32 ctrl; | 226 | u32 ctrl; |
@@ -167,6 +239,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
167 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
168 | } | 240 | } |
169 | 241 | ||
242 | /* Function to enable the hardware */ | ||
170 | void __init iommu_enable(struct amd_iommu *iommu) | 243 | void __init iommu_enable(struct amd_iommu *iommu) |
171 | { | 244 | { |
172 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 245 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); |
@@ -176,6 +249,10 @@ void __init iommu_enable(struct amd_iommu *iommu) | |||
176 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 249 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
177 | } | 250 | } |
178 | 251 | ||
252 | /* | ||
253 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | ||
254 | * the system has one. | ||
255 | */ | ||
179 | static u8 * __init iommu_map_mmio_space(u64 address) | 256 | static u8 * __init iommu_map_mmio_space(u64 address) |
180 | { | 257 | { |
181 | u8 *ret; | 258 | u8 *ret; |
@@ -199,16 +276,33 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
199 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); | 276 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); |
200 | } | 277 | } |
201 | 278 | ||
279 | /**************************************************************************** | ||
280 | * | ||
281 | * The functions below belong to the first pass of AMD IOMMU ACPI table | ||
282 | * parsing. In this pass we try to find out the highest device id this | ||
283 | * code has to handle. Upon this information the size of the shared data | ||
284 | * structures is determined later. | ||
285 | * | ||
286 | ****************************************************************************/ | ||
287 | |||
288 | /* | ||
289 | * This function reads the last device id the IOMMU has to handle from the PCI | ||
290 | * capability header for this IOMMU | ||
291 | */ | ||
202 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) | 292 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) |
203 | { | 293 | { |
204 | u32 cap; | 294 | u32 cap; |
205 | 295 | ||
206 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 296 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); |
207 | UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); | 297 | update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); |
208 | 298 | ||
209 | return 0; | 299 | return 0; |
210 | } | 300 | } |
211 | 301 | ||
302 | /* | ||
303 | * After reading the highest device id from the IOMMU PCI capability header | ||
304 | * this function looks if there is a higher device id defined in the ACPI table | ||
305 | */ | ||
212 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | 306 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) |
213 | { | 307 | { |
214 | u8 *p = (void *)h, *end = (void *)h; | 308 | u8 *p = (void *)h, *end = (void *)h; |
@@ -229,7 +323,8 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
229 | case IVHD_DEV_RANGE_END: | 323 | case IVHD_DEV_RANGE_END: |
230 | case IVHD_DEV_ALIAS: | 324 | case IVHD_DEV_ALIAS: |
231 | case IVHD_DEV_EXT_SELECT: | 325 | case IVHD_DEV_EXT_SELECT: |
232 | UPDATE_LAST_BDF(dev->devid); | 326 | /* all the above subfield types refer to device ids */ |
327 | update_last_devid(dev->devid); | ||
233 | break; | 328 | break; |
234 | default: | 329 | default: |
235 | break; | 330 | break; |
@@ -242,6 +337,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
242 | return 0; | 337 | return 0; |
243 | } | 338 | } |
244 | 339 | ||
340 | /* | ||
341 | * Iterate over all IVHD entries in the ACPI table and find the highest device | ||
342 | * id which we need to handle. This is the first of three functions which parse | ||
343 | * the ACPI table. So we check the checksum here. | ||
344 | */ | ||
245 | static int __init find_last_devid_acpi(struct acpi_table_header *table) | 345 | static int __init find_last_devid_acpi(struct acpi_table_header *table) |
246 | { | 346 | { |
247 | int i; | 347 | int i; |
@@ -277,19 +377,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) | |||
277 | return 0; | 377 | return 0; |
278 | } | 378 | } |
279 | 379 | ||
380 | /**************************************************************************** | ||
381 | * | ||
382 | * The following functions belong the the code path which parses the ACPI table | ||
383 | * the second time. In this ACPI parsing iteration we allocate IOMMU specific | ||
384 | * data structures, initialize the device/alias/rlookup table and also | ||
385 | * basically initialize the hardware. | ||
386 | * | ||
387 | ****************************************************************************/ | ||
388 | |||
389 | /* | ||
390 | * Allocates the command buffer. This buffer is per AMD IOMMU. We can | ||
391 | * write commands to that buffer later and the IOMMU will execute them | ||
392 | * asynchronously | ||
393 | */ | ||
280 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | 394 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) |
281 | { | 395 | { |
282 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, | 396 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
283 | get_order(CMD_BUFFER_SIZE)); | 397 | get_order(CMD_BUFFER_SIZE)); |
284 | u64 entry = 0; | 398 | u64 entry; |
285 | 399 | ||
286 | if (cmd_buf == NULL) | 400 | if (cmd_buf == NULL) |
287 | return NULL; | 401 | return NULL; |
288 | 402 | ||
289 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 403 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; |
290 | 404 | ||
291 | memset(cmd_buf, 0, CMD_BUFFER_SIZE); | ||
292 | |||
293 | entry = (u64)virt_to_phys(cmd_buf); | 405 | entry = (u64)virt_to_phys(cmd_buf); |
294 | entry |= MMIO_CMD_SIZE_512; | 406 | entry |= MMIO_CMD_SIZE_512; |
295 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 407 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
@@ -302,11 +414,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
302 | 414 | ||
303 | static void __init free_command_buffer(struct amd_iommu *iommu) | 415 | static void __init free_command_buffer(struct amd_iommu *iommu) |
304 | { | 416 | { |
305 | if (iommu->cmd_buf) | 417 | free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); |
306 | free_pages((unsigned long)iommu->cmd_buf, | ||
307 | get_order(CMD_BUFFER_SIZE)); | ||
308 | } | 418 | } |
309 | 419 | ||
420 | /* sets a specific bit in the device table entry. */ | ||
310 | static void set_dev_entry_bit(u16 devid, u8 bit) | 421 | static void set_dev_entry_bit(u16 devid, u8 bit) |
311 | { | 422 | { |
312 | int i = (bit >> 5) & 0x07; | 423 | int i = (bit >> 5) & 0x07; |
@@ -315,7 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit) | |||
315 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); | 426 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); |
316 | } | 427 | } |
317 | 428 | ||
318 | static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | 429 | /* Writes the specific IOMMU for a device into the rlookup table */ |
430 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | ||
431 | { | ||
432 | amd_iommu_rlookup_table[devid] = iommu; | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * This function takes the device specific flags read from the ACPI | ||
437 | * table and sets up the device table entry with that information | ||
438 | */ | ||
439 | static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, | ||
440 | u16 devid, u32 flags, u32 ext_flags) | ||
319 | { | 441 | { |
320 | if (flags & ACPI_DEVFLAG_INITPASS) | 442 | if (flags & ACPI_DEVFLAG_INITPASS) |
321 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); | 443 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); |
@@ -331,13 +453,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | |||
331 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); | 453 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); |
332 | if (flags & ACPI_DEVFLAG_LINT1) | 454 | if (flags & ACPI_DEVFLAG_LINT1) |
333 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); | 455 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); |
334 | } | ||
335 | 456 | ||
336 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | 457 | set_iommu_for_device(iommu, devid); |
337 | { | ||
338 | amd_iommu_rlookup_table[devid] = iommu; | ||
339 | } | 458 | } |
340 | 459 | ||
460 | /* | ||
461 | * Reads the device exclusion range from ACPI and initialize IOMMU with | ||
462 | * it | ||
463 | */ | ||
341 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | 464 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) |
342 | { | 465 | { |
343 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 466 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; |
@@ -346,12 +469,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
346 | return; | 469 | return; |
347 | 470 | ||
348 | if (iommu) { | 471 | if (iommu) { |
472 | /* | ||
473 | * We only can configure exclusion ranges per IOMMU, not | ||
474 | * per device. But we can enable the exclusion range per | ||
475 | * device. This is done here | ||
476 | */ | ||
349 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); | 477 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); |
350 | iommu->exclusion_start = m->range_start; | 478 | iommu->exclusion_start = m->range_start; |
351 | iommu->exclusion_length = m->range_length; | 479 | iommu->exclusion_length = m->range_length; |
352 | } | 480 | } |
353 | } | 481 | } |
354 | 482 | ||
483 | /* | ||
484 | * This function reads some important data from the IOMMU PCI space and | ||
485 | * initializes the driver data structure with it. It reads the hardware | ||
486 | * capabilities and the first/last device entries | ||
487 | */ | ||
355 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 488 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
356 | { | 489 | { |
357 | int bus = PCI_BUS(iommu->devid); | 490 | int bus = PCI_BUS(iommu->devid); |
@@ -363,10 +496,16 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
363 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 496 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); |
364 | 497 | ||
365 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 498 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); |
366 | iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); | 499 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), |
367 | iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); | 500 | MMIO_GET_FD(range)); |
501 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | ||
502 | MMIO_GET_LD(range)); | ||
368 | } | 503 | } |
369 | 504 | ||
505 | /* | ||
506 | * Takes a pointer to an AMD IOMMU entry in the ACPI table and | ||
507 | * initializes the hardware and our data structures with it. | ||
508 | */ | ||
370 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | 509 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, |
371 | struct ivhd_header *h) | 510 | struct ivhd_header *h) |
372 | { | 511 | { |
@@ -374,7 +513,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
374 | u8 *end = p, flags = 0; | 513 | u8 *end = p, flags = 0; |
375 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; | 514 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; |
376 | u32 ext_flags = 0; | 515 | u32 ext_flags = 0; |
377 | bool alias = 0; | 516 | bool alias = false; |
378 | struct ivhd_entry *e; | 517 | struct ivhd_entry *e; |
379 | 518 | ||
380 | /* | 519 | /* |
@@ -414,22 +553,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
414 | case IVHD_DEV_ALL: | 553 | case IVHD_DEV_ALL: |
415 | for (dev_i = iommu->first_device; | 554 | for (dev_i = iommu->first_device; |
416 | dev_i <= iommu->last_device; ++dev_i) | 555 | dev_i <= iommu->last_device; ++dev_i) |
417 | set_dev_entry_from_acpi(dev_i, e->flags, 0); | 556 | set_dev_entry_from_acpi(iommu, dev_i, |
557 | e->flags, 0); | ||
418 | break; | 558 | break; |
419 | case IVHD_DEV_SELECT: | 559 | case IVHD_DEV_SELECT: |
420 | devid = e->devid; | 560 | devid = e->devid; |
421 | set_dev_entry_from_acpi(devid, e->flags, 0); | 561 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
422 | break; | 562 | break; |
423 | case IVHD_DEV_SELECT_RANGE_START: | 563 | case IVHD_DEV_SELECT_RANGE_START: |
424 | devid_start = e->devid; | 564 | devid_start = e->devid; |
425 | flags = e->flags; | 565 | flags = e->flags; |
426 | ext_flags = 0; | 566 | ext_flags = 0; |
427 | alias = 0; | 567 | alias = false; |
428 | break; | 568 | break; |
429 | case IVHD_DEV_ALIAS: | 569 | case IVHD_DEV_ALIAS: |
430 | devid = e->devid; | 570 | devid = e->devid; |
431 | devid_to = e->ext >> 8; | 571 | devid_to = e->ext >> 8; |
432 | set_dev_entry_from_acpi(devid, e->flags, 0); | 572 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
433 | amd_iommu_alias_table[devid] = devid_to; | 573 | amd_iommu_alias_table[devid] = devid_to; |
434 | break; | 574 | break; |
435 | case IVHD_DEV_ALIAS_RANGE: | 575 | case IVHD_DEV_ALIAS_RANGE: |
@@ -437,24 +577,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
437 | flags = e->flags; | 577 | flags = e->flags; |
438 | devid_to = e->ext >> 8; | 578 | devid_to = e->ext >> 8; |
439 | ext_flags = 0; | 579 | ext_flags = 0; |
440 | alias = 1; | 580 | alias = true; |
441 | break; | 581 | break; |
442 | case IVHD_DEV_EXT_SELECT: | 582 | case IVHD_DEV_EXT_SELECT: |
443 | devid = e->devid; | 583 | devid = e->devid; |
444 | set_dev_entry_from_acpi(devid, e->flags, e->ext); | 584 | set_dev_entry_from_acpi(iommu, devid, e->flags, |
585 | e->ext); | ||
445 | break; | 586 | break; |
446 | case IVHD_DEV_EXT_SELECT_RANGE: | 587 | case IVHD_DEV_EXT_SELECT_RANGE: |
447 | devid_start = e->devid; | 588 | devid_start = e->devid; |
448 | flags = e->flags; | 589 | flags = e->flags; |
449 | ext_flags = e->ext; | 590 | ext_flags = e->ext; |
450 | alias = 0; | 591 | alias = false; |
451 | break; | 592 | break; |
452 | case IVHD_DEV_RANGE_END: | 593 | case IVHD_DEV_RANGE_END: |
453 | devid = e->devid; | 594 | devid = e->devid; |
454 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | 595 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { |
455 | if (alias) | 596 | if (alias) |
456 | amd_iommu_alias_table[dev_i] = devid_to; | 597 | amd_iommu_alias_table[dev_i] = devid_to; |
457 | set_dev_entry_from_acpi( | 598 | set_dev_entry_from_acpi(iommu, |
458 | amd_iommu_alias_table[dev_i], | 599 | amd_iommu_alias_table[dev_i], |
459 | flags, ext_flags); | 600 | flags, ext_flags); |
460 | } | 601 | } |
@@ -467,6 +608,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
467 | } | 608 | } |
468 | } | 609 | } |
469 | 610 | ||
611 | /* Initializes the device->iommu mapping for the driver */ | ||
470 | static int __init init_iommu_devices(struct amd_iommu *iommu) | 612 | static int __init init_iommu_devices(struct amd_iommu *iommu) |
471 | { | 613 | { |
472 | u16 i; | 614 | u16 i; |
@@ -494,6 +636,11 @@ static void __init free_iommu_all(void) | |||
494 | } | 636 | } |
495 | } | 637 | } |
496 | 638 | ||
639 | /* | ||
640 | * This function clues the initialization function for one IOMMU | ||
641 | * together and also allocates the command buffer and programs the | ||
642 | * hardware. It does NOT enable the IOMMU. This is done afterwards. | ||
643 | */ | ||
497 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 644 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
498 | { | 645 | { |
499 | spin_lock_init(&iommu->lock); | 646 | spin_lock_init(&iommu->lock); |
@@ -521,6 +668,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
521 | return 0; | 668 | return 0; |
522 | } | 669 | } |
523 | 670 | ||
671 | /* | ||
672 | * Iterates over all IOMMU entries in the ACPI table, allocates the | ||
673 | * IOMMU structure and initializes it with init_iommu_one() | ||
674 | */ | ||
524 | static int __init init_iommu_all(struct acpi_table_header *table) | 675 | static int __init init_iommu_all(struct acpi_table_header *table) |
525 | { | 676 | { |
526 | u8 *p = (u8 *)table, *end = (u8 *)table; | 677 | u8 *p = (u8 *)table, *end = (u8 *)table; |
@@ -528,8 +679,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
528 | struct amd_iommu *iommu; | 679 | struct amd_iommu *iommu; |
529 | int ret; | 680 | int ret; |
530 | 681 | ||
531 | INIT_LIST_HEAD(&amd_iommu_list); | ||
532 | |||
533 | end += table->length; | 682 | end += table->length; |
534 | p += IVRS_HEADER_LENGTH; | 683 | p += IVRS_HEADER_LENGTH; |
535 | 684 | ||
@@ -555,6 +704,14 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
555 | return 0; | 704 | return 0; |
556 | } | 705 | } |
557 | 706 | ||
707 | /**************************************************************************** | ||
708 | * | ||
709 | * The next functions belong to the third pass of parsing the ACPI | ||
710 | * table. In this last pass the memory mapping requirements are | ||
711 | * gathered (like exclusion and unity mapping reanges). | ||
712 | * | ||
713 | ****************************************************************************/ | ||
714 | |||
558 | static void __init free_unity_maps(void) | 715 | static void __init free_unity_maps(void) |
559 | { | 716 | { |
560 | struct unity_map_entry *entry, *next; | 717 | struct unity_map_entry *entry, *next; |
@@ -565,6 +722,7 @@ static void __init free_unity_maps(void) | |||
565 | } | 722 | } |
566 | } | 723 | } |
567 | 724 | ||
725 | /* called when we find an exclusion range definition in ACPI */ | ||
568 | static int __init init_exclusion_range(struct ivmd_header *m) | 726 | static int __init init_exclusion_range(struct ivmd_header *m) |
569 | { | 727 | { |
570 | int i; | 728 | int i; |
@@ -574,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
574 | set_device_exclusion_range(m->devid, m); | 732 | set_device_exclusion_range(m->devid, m); |
575 | break; | 733 | break; |
576 | case ACPI_IVMD_TYPE_ALL: | 734 | case ACPI_IVMD_TYPE_ALL: |
577 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 735 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
578 | set_device_exclusion_range(i, m); | 736 | set_device_exclusion_range(i, m); |
579 | break; | 737 | break; |
580 | case ACPI_IVMD_TYPE_RANGE: | 738 | case ACPI_IVMD_TYPE_RANGE: |
@@ -588,6 +746,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
588 | return 0; | 746 | return 0; |
589 | } | 747 | } |
590 | 748 | ||
749 | /* called for unity map ACPI definition */ | ||
591 | static int __init init_unity_map_range(struct ivmd_header *m) | 750 | static int __init init_unity_map_range(struct ivmd_header *m) |
592 | { | 751 | { |
593 | struct unity_map_entry *e = 0; | 752 | struct unity_map_entry *e = 0; |
@@ -619,13 +778,12 @@ static int __init init_unity_map_range(struct ivmd_header *m) | |||
619 | return 0; | 778 | return 0; |
620 | } | 779 | } |
621 | 780 | ||
781 | /* iterates over all memory definitions we find in the ACPI table */ | ||
622 | static int __init init_memory_definitions(struct acpi_table_header *table) | 782 | static int __init init_memory_definitions(struct acpi_table_header *table) |
623 | { | 783 | { |
624 | u8 *p = (u8 *)table, *end = (u8 *)table; | 784 | u8 *p = (u8 *)table, *end = (u8 *)table; |
625 | struct ivmd_header *m; | 785 | struct ivmd_header *m; |
626 | 786 | ||
627 | INIT_LIST_HEAD(&amd_iommu_unity_map); | ||
628 | |||
629 | end += table->length; | 787 | end += table->length; |
630 | p += IVRS_HEADER_LENGTH; | 788 | p += IVRS_HEADER_LENGTH; |
631 | 789 | ||
@@ -642,6 +800,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table) | |||
642 | return 0; | 800 | return 0; |
643 | } | 801 | } |
644 | 802 | ||
803 | /* | ||
804 | * This function finally enables all IOMMUs found in the system after | ||
805 | * they have been initialized | ||
806 | */ | ||
645 | static void __init enable_iommus(void) | 807 | static void __init enable_iommus(void) |
646 | { | 808 | { |
647 | struct amd_iommu *iommu; | 809 | struct amd_iommu *iommu; |
@@ -678,6 +840,34 @@ static struct sys_device device_amd_iommu = { | |||
678 | .cls = &amd_iommu_sysdev_class, | 840 | .cls = &amd_iommu_sysdev_class, |
679 | }; | 841 | }; |
680 | 842 | ||
843 | /* | ||
844 | * This is the core init function for AMD IOMMU hardware in the system. | ||
845 | * This function is called from the generic x86 DMA layer initialization | ||
846 | * code. | ||
847 | * | ||
848 | * This function basically parses the ACPI table for AMD IOMMU (IVRS) | ||
849 | * three times: | ||
850 | * | ||
851 | * 1 pass) Find the highest PCI device id the driver has to handle. | ||
852 | * Upon this information the size of the data structures is | ||
853 | * determined that needs to be allocated. | ||
854 | * | ||
855 | * 2 pass) Initialize the data structures just allocated with the | ||
856 | * information in the ACPI table about available AMD IOMMUs | ||
857 | * in the system. It also maps the PCI devices in the | ||
858 | * system to specific IOMMUs | ||
859 | * | ||
860 | * 3 pass) After the basic data structures are allocated and | ||
861 | * initialized we update them with information about memory | ||
862 | * remapping requirements parsed out of the ACPI table in | ||
863 | * this last pass. | ||
864 | * | ||
865 | * After that the hardware is initialized and ready to go. In the last | ||
866 | * step we do some Linux specific things like registering the driver in | ||
867 | * the dma_ops interface and initializing the suspend/resume support | ||
868 | * functions. Finally it prints some information about AMD IOMMUs and | ||
869 | * the driver state and enables the hardware. | ||
870 | */ | ||
681 | int __init amd_iommu_init(void) | 871 | int __init amd_iommu_init(void) |
682 | { | 872 | { |
683 | int i, ret = 0; | 873 | int i, ret = 0; |
@@ -699,14 +889,14 @@ int __init amd_iommu_init(void) | |||
699 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | 889 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) |
700 | return -ENODEV; | 890 | return -ENODEV; |
701 | 891 | ||
702 | dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); | 892 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); |
703 | alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); | 893 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); |
704 | rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); | 894 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); |
705 | 895 | ||
706 | ret = -ENOMEM; | 896 | ret = -ENOMEM; |
707 | 897 | ||
708 | /* Device table - directly used by all IOMMUs */ | 898 | /* Device table - directly used by all IOMMUs */ |
709 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, | 899 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
710 | get_order(dev_table_size)); | 900 | get_order(dev_table_size)); |
711 | if (amd_iommu_dev_table == NULL) | 901 | if (amd_iommu_dev_table == NULL) |
712 | goto out; | 902 | goto out; |
@@ -730,27 +920,23 @@ int __init amd_iommu_init(void) | |||
730 | * Protection Domain table - maps devices to protection domains | 920 | * Protection Domain table - maps devices to protection domains |
731 | * This table has the same size as the rlookup_table | 921 | * This table has the same size as the rlookup_table |
732 | */ | 922 | */ |
733 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, | 923 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
734 | get_order(rlookup_table_size)); | 924 | get_order(rlookup_table_size)); |
735 | if (amd_iommu_pd_table == NULL) | 925 | if (amd_iommu_pd_table == NULL) |
736 | goto free; | 926 | goto free; |
737 | 927 | ||
738 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, | 928 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
929 | GFP_KERNEL | __GFP_ZERO, | ||
739 | get_order(MAX_DOMAIN_ID/8)); | 930 | get_order(MAX_DOMAIN_ID/8)); |
740 | if (amd_iommu_pd_alloc_bitmap == NULL) | 931 | if (amd_iommu_pd_alloc_bitmap == NULL) |
741 | goto free; | 932 | goto free; |
742 | 933 | ||
743 | /* | 934 | /* |
744 | * memory is allocated now; initialize the device table with all zeroes | 935 | * let all alias entries point to itself |
745 | * and let all alias entries point to itself | ||
746 | */ | 936 | */ |
747 | memset(amd_iommu_dev_table, 0, dev_table_size); | 937 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
748 | for (i = 0; i < amd_iommu_last_bdf; ++i) | ||
749 | amd_iommu_alias_table[i] = i; | 938 | amd_iommu_alias_table[i] = i; |
750 | 939 | ||
751 | memset(amd_iommu_pd_table, 0, rlookup_table_size); | ||
752 | memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); | ||
753 | |||
754 | /* | 940 | /* |
755 | * never allocate domain 0 because its used as the non-allocated and | 941 | * never allocate domain 0 because its used as the non-allocated and |
756 | * error value placeholder | 942 | * error value placeholder |
@@ -795,24 +981,19 @@ out: | |||
795 | return ret; | 981 | return ret; |
796 | 982 | ||
797 | free: | 983 | free: |
798 | if (amd_iommu_pd_alloc_bitmap) | 984 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); |
799 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | ||
800 | 985 | ||
801 | if (amd_iommu_pd_table) | 986 | free_pages((unsigned long)amd_iommu_pd_table, |
802 | free_pages((unsigned long)amd_iommu_pd_table, | 987 | get_order(rlookup_table_size)); |
803 | get_order(rlookup_table_size)); | ||
804 | 988 | ||
805 | if (amd_iommu_rlookup_table) | 989 | free_pages((unsigned long)amd_iommu_rlookup_table, |
806 | free_pages((unsigned long)amd_iommu_rlookup_table, | 990 | get_order(rlookup_table_size)); |
807 | get_order(rlookup_table_size)); | ||
808 | 991 | ||
809 | if (amd_iommu_alias_table) | 992 | free_pages((unsigned long)amd_iommu_alias_table, |
810 | free_pages((unsigned long)amd_iommu_alias_table, | 993 | get_order(alias_table_size)); |
811 | get_order(alias_table_size)); | ||
812 | 994 | ||
813 | if (amd_iommu_dev_table) | 995 | free_pages((unsigned long)amd_iommu_dev_table, |
814 | free_pages((unsigned long)amd_iommu_dev_table, | 996 | get_order(dev_table_size)); |
815 | get_order(dev_table_size)); | ||
816 | 997 | ||
817 | free_iommu_all(); | 998 | free_iommu_all(); |
818 | 999 | ||
@@ -821,6 +1002,13 @@ free: | |||
821 | goto out; | 1002 | goto out; |
822 | } | 1003 | } |
823 | 1004 | ||
1005 | /**************************************************************************** | ||
1006 | * | ||
1007 | * Early detect code. This code runs at IOMMU detection time in the DMA | ||
1008 | * layer. It just looks if there is an IVRS ACPI table to detect AMD | ||
1009 | * IOMMUs | ||
1010 | * | ||
1011 | ****************************************************************************/ | ||
824 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) | 1012 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) |
825 | { | 1013 | { |
826 | return 0; | 1014 | return 0; |
@@ -828,7 +1016,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
828 | 1016 | ||
829 | void __init amd_iommu_detect(void) | 1017 | void __init amd_iommu_detect(void) |
830 | { | 1018 | { |
831 | if (swiotlb || no_iommu || iommu_detected) | 1019 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) |
832 | return; | 1020 | return; |
833 | 1021 | ||
834 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1022 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
@@ -841,6 +1029,13 @@ void __init amd_iommu_detect(void) | |||
841 | } | 1029 | } |
842 | } | 1030 | } |
843 | 1031 | ||
1032 | /**************************************************************************** | ||
1033 | * | ||
1034 | * Parsing functions for the AMD IOMMU specific kernel command line | ||
1035 | * options. | ||
1036 | * | ||
1037 | ****************************************************************************/ | ||
1038 | |||
844 | static int __init parse_amd_iommu_options(char *str) | 1039 | static int __init parse_amd_iommu_options(char *str) |
845 | { | 1040 | { |
846 | for (; *str; ++str) { | 1041 | for (; *str; ++str) { |
@@ -853,20 +1048,10 @@ static int __init parse_amd_iommu_options(char *str) | |||
853 | 1048 | ||
854 | static int __init parse_amd_iommu_size_options(char *str) | 1049 | static int __init parse_amd_iommu_size_options(char *str) |
855 | { | 1050 | { |
856 | for (; *str; ++str) { | 1051 | unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); |
857 | if (strcmp(str, "32M") == 0) | 1052 | |
858 | amd_iommu_aperture_order = 25; | 1053 | if ((order > 24) && (order < 31)) |
859 | if (strcmp(str, "64M") == 0) | 1054 | amd_iommu_aperture_order = order; |
860 | amd_iommu_aperture_order = 26; | ||
861 | if (strcmp(str, "128M") == 0) | ||
862 | amd_iommu_aperture_order = 27; | ||
863 | if (strcmp(str, "256M") == 0) | ||
864 | amd_iommu_aperture_order = 28; | ||
865 | if (strcmp(str, "512M") == 0) | ||
866 | amd_iommu_aperture_order = 29; | ||
867 | if (strcmp(str, "1G") == 0) | ||
868 | amd_iommu_aperture_order = 30; | ||
869 | } | ||
870 | 1055 | ||
871 | return 1; | 1056 | return 1; |
872 | } | 1057 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9f907806c1a5..44e21826db11 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
23 | #include <asm/io.h> | 23 | #include <asm/io.h> |
24 | #include <asm/iommu.h> | ||
24 | #include <asm/gart.h> | 25 | #include <asm/gart.h> |
25 | #include <asm/pci-direct.h> | 26 | #include <asm/pci-direct.h> |
26 | #include <asm/dma.h> | 27 | #include <asm/dma.h> |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a437d027f20b..d6c898358371 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | |||
75 | /* | 75 | /* |
76 | * Debug level, exported for io_apic.c | 76 | * Debug level, exported for io_apic.c |
77 | */ | 77 | */ |
78 | int apic_verbosity; | 78 | unsigned int apic_verbosity; |
79 | 79 | ||
80 | int pic_mode; | 80 | int pic_mode; |
81 | 81 | ||
@@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void) | |||
177 | /* Level triggered for 82489DX */ | 177 | /* Level triggered for 82489DX */ |
178 | if (!lapic_is_integrated()) | 178 | if (!lapic_is_integrated()) |
179 | v |= APIC_LVT_LEVEL_TRIGGER; | 179 | v |= APIC_LVT_LEVEL_TRIGGER; |
180 | apic_write_around(APIC_LVT0, v); | 180 | apic_write(APIC_LVT0, v); |
181 | } | 181 | } |
182 | 182 | ||
183 | /** | 183 | /** |
@@ -212,9 +212,6 @@ int lapic_get_maxlvt(void) | |||
212 | * this function twice on the boot CPU, once with a bogus timeout | 212 | * this function twice on the boot CPU, once with a bogus timeout |
213 | * value, second time for real. The other (noncalibrating) CPUs | 213 | * value, second time for real. The other (noncalibrating) CPUs |
214 | * call this function only once, with the real, calibrated value. | 214 | * call this function only once, with the real, calibrated value. |
215 | * | ||
216 | * We do reads before writes even if unnecessary, to get around the | ||
217 | * P5 APIC double write bug. | ||
218 | */ | 215 | */ |
219 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | 216 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) |
220 | { | 217 | { |
@@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
229 | if (!irqen) | 226 | if (!irqen) |
230 | lvtt_value |= APIC_LVT_MASKED; | 227 | lvtt_value |= APIC_LVT_MASKED; |
231 | 228 | ||
232 | apic_write_around(APIC_LVTT, lvtt_value); | 229 | apic_write(APIC_LVTT, lvtt_value); |
233 | 230 | ||
234 | /* | 231 | /* |
235 | * Divide PICLK by 16 | 232 | * Divide PICLK by 16 |
236 | */ | 233 | */ |
237 | tmp_value = apic_read(APIC_TDCR); | 234 | tmp_value = apic_read(APIC_TDCR); |
238 | apic_write_around(APIC_TDCR, (tmp_value | 235 | apic_write(APIC_TDCR, |
239 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 236 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | |
240 | | APIC_TDR_DIV_16); | 237 | APIC_TDR_DIV_16); |
241 | 238 | ||
242 | if (!oneshot) | 239 | if (!oneshot) |
243 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 240 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); |
244 | } | 241 | } |
245 | 242 | ||
246 | /* | 243 | /* |
@@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
249 | static int lapic_next_event(unsigned long delta, | 246 | static int lapic_next_event(unsigned long delta, |
250 | struct clock_event_device *evt) | 247 | struct clock_event_device *evt) |
251 | { | 248 | { |
252 | apic_write_around(APIC_TMICT, delta); | 249 | apic_write(APIC_TMICT, delta); |
253 | return 0; | 250 | return 0; |
254 | } | 251 | } |
255 | 252 | ||
@@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
278 | case CLOCK_EVT_MODE_SHUTDOWN: | 275 | case CLOCK_EVT_MODE_SHUTDOWN: |
279 | v = apic_read(APIC_LVTT); | 276 | v = apic_read(APIC_LVTT); |
280 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 277 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
281 | apic_write_around(APIC_LVTT, v); | 278 | apic_write(APIC_LVTT, v); |
282 | break; | 279 | break; |
283 | case CLOCK_EVT_MODE_RESUME: | 280 | case CLOCK_EVT_MODE_RESUME: |
284 | /* Nothing to do here */ | 281 | /* Nothing to do here */ |
@@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
372 | } | 369 | } |
373 | } | 370 | } |
374 | 371 | ||
375 | /* | 372 | static int __init calibrate_APIC_clock(void) |
376 | * Setup the boot APIC | ||
377 | * | ||
378 | * Calibrate and verify the result. | ||
379 | */ | ||
380 | void __init setup_boot_APIC_clock(void) | ||
381 | { | 373 | { |
382 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 374 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
383 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; | 375 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; |
@@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void) | |||
387 | long delta, deltapm; | 379 | long delta, deltapm; |
388 | int pm_referenced = 0; | 380 | int pm_referenced = 0; |
389 | 381 | ||
390 | /* | ||
391 | * The local apic timer can be disabled via the kernel | ||
392 | * commandline or from the CPU detection code. Register the lapic | ||
393 | * timer as a dummy clock event source on SMP systems, so the | ||
394 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
395 | */ | ||
396 | if (local_apic_timer_disabled) { | ||
397 | /* No broadcast on UP ! */ | ||
398 | if (num_possible_cpus() > 1) { | ||
399 | lapic_clockevent.mult = 1; | ||
400 | setup_APIC_timer(); | ||
401 | } | ||
402 | return; | ||
403 | } | ||
404 | |||
405 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
406 | "calibrating APIC timer ...\n"); | ||
407 | |||
408 | local_irq_disable(); | 382 | local_irq_disable(); |
409 | 383 | ||
410 | /* Replace the global interrupt handler */ | 384 | /* Replace the global interrupt handler */ |
@@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void) | |||
489 | calibration_result / (1000000 / HZ), | 463 | calibration_result / (1000000 / HZ), |
490 | calibration_result % (1000000 / HZ)); | 464 | calibration_result % (1000000 / HZ)); |
491 | 465 | ||
492 | local_apic_timer_verify_ok = 1; | ||
493 | |||
494 | /* | 466 | /* |
495 | * Do a sanity check on the APIC calibration result | 467 | * Do a sanity check on the APIC calibration result |
496 | */ | 468 | */ |
@@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void) | |||
498 | local_irq_enable(); | 470 | local_irq_enable(); |
499 | printk(KERN_WARNING | 471 | printk(KERN_WARNING |
500 | "APIC frequency too slow, disabling apic timer\n"); | 472 | "APIC frequency too slow, disabling apic timer\n"); |
501 | /* No broadcast on UP ! */ | 473 | return -1; |
502 | if (num_possible_cpus() > 1) | ||
503 | setup_APIC_timer(); | ||
504 | return; | ||
505 | } | 474 | } |
506 | 475 | ||
476 | local_apic_timer_verify_ok = 1; | ||
477 | |||
507 | /* We trust the pm timer based calibration */ | 478 | /* We trust the pm timer based calibration */ |
508 | if (!pm_referenced) { | 479 | if (!pm_referenced) { |
509 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | 480 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); |
@@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void) | |||
543 | if (!local_apic_timer_verify_ok) { | 514 | if (!local_apic_timer_verify_ok) { |
544 | printk(KERN_WARNING | 515 | printk(KERN_WARNING |
545 | "APIC timer disabled due to verification failure.\n"); | 516 | "APIC timer disabled due to verification failure.\n"); |
517 | return -1; | ||
518 | } | ||
519 | |||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * Setup the boot APIC | ||
525 | * | ||
526 | * Calibrate and verify the result. | ||
527 | */ | ||
528 | void __init setup_boot_APIC_clock(void) | ||
529 | { | ||
530 | /* | ||
531 | * The local apic timer can be disabled via the kernel | ||
532 | * commandline or from the CPU detection code. Register the lapic | ||
533 | * timer as a dummy clock event source on SMP systems, so the | ||
534 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
535 | */ | ||
536 | if (local_apic_timer_disabled) { | ||
546 | /* No broadcast on UP ! */ | 537 | /* No broadcast on UP ! */ |
547 | if (num_possible_cpus() == 1) | 538 | if (num_possible_cpus() > 1) { |
548 | return; | 539 | lapic_clockevent.mult = 1; |
549 | } else { | 540 | setup_APIC_timer(); |
550 | /* | 541 | } |
551 | * If nmi_watchdog is set to IO_APIC, we need the | 542 | return; |
552 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
553 | * device. | ||
554 | */ | ||
555 | if (nmi_watchdog != NMI_IO_APIC) | ||
556 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
557 | else | ||
558 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
559 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
560 | } | 543 | } |
561 | 544 | ||
545 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
546 | "calibrating APIC timer ...\n"); | ||
547 | |||
548 | if (calibrate_APIC_clock()) { | ||
549 | /* No broadcast on UP ! */ | ||
550 | if (num_possible_cpus() > 1) | ||
551 | setup_APIC_timer(); | ||
552 | return; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * If nmi_watchdog is set to IO_APIC, we need the | ||
557 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
558 | * device. | ||
559 | */ | ||
560 | if (nmi_watchdog != NMI_IO_APIC) | ||
561 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
562 | else | ||
563 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
564 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
565 | |||
562 | /* Setup the lapic or request the broadcast */ | 566 | /* Setup the lapic or request the broadcast */ |
563 | setup_APIC_timer(); | 567 | setup_APIC_timer(); |
564 | } | 568 | } |
@@ -693,44 +697,44 @@ void clear_local_APIC(void) | |||
693 | */ | 697 | */ |
694 | if (maxlvt >= 3) { | 698 | if (maxlvt >= 3) { |
695 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | 699 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ |
696 | apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); | 700 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
697 | } | 701 | } |
698 | /* | 702 | /* |
699 | * Careful: we have to set masks only first to deassert | 703 | * Careful: we have to set masks only first to deassert |
700 | * any level-triggered sources. | 704 | * any level-triggered sources. |
701 | */ | 705 | */ |
702 | v = apic_read(APIC_LVTT); | 706 | v = apic_read(APIC_LVTT); |
703 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 707 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); |
704 | v = apic_read(APIC_LVT0); | 708 | v = apic_read(APIC_LVT0); |
705 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 709 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
706 | v = apic_read(APIC_LVT1); | 710 | v = apic_read(APIC_LVT1); |
707 | apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); | 711 | apic_write(APIC_LVT1, v | APIC_LVT_MASKED); |
708 | if (maxlvt >= 4) { | 712 | if (maxlvt >= 4) { |
709 | v = apic_read(APIC_LVTPC); | 713 | v = apic_read(APIC_LVTPC); |
710 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 714 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); |
711 | } | 715 | } |
712 | 716 | ||
713 | /* lets not touch this if we didn't frob it */ | 717 | /* lets not touch this if we didn't frob it */ |
714 | #ifdef CONFIG_X86_MCE_P4THERMAL | 718 | #ifdef CONFIG_X86_MCE_P4THERMAL |
715 | if (maxlvt >= 5) { | 719 | if (maxlvt >= 5) { |
716 | v = apic_read(APIC_LVTTHMR); | 720 | v = apic_read(APIC_LVTTHMR); |
717 | apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 721 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
718 | } | 722 | } |
719 | #endif | 723 | #endif |
720 | /* | 724 | /* |
721 | * Clean APIC state for other OSs: | 725 | * Clean APIC state for other OSs: |
722 | */ | 726 | */ |
723 | apic_write_around(APIC_LVTT, APIC_LVT_MASKED); | 727 | apic_write(APIC_LVTT, APIC_LVT_MASKED); |
724 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 728 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
725 | apic_write_around(APIC_LVT1, APIC_LVT_MASKED); | 729 | apic_write(APIC_LVT1, APIC_LVT_MASKED); |
726 | if (maxlvt >= 3) | 730 | if (maxlvt >= 3) |
727 | apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); | 731 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); |
728 | if (maxlvt >= 4) | 732 | if (maxlvt >= 4) |
729 | apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); | 733 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
730 | 734 | ||
731 | #ifdef CONFIG_X86_MCE_P4THERMAL | 735 | #ifdef CONFIG_X86_MCE_P4THERMAL |
732 | if (maxlvt >= 5) | 736 | if (maxlvt >= 5) |
733 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | 737 | apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); |
734 | #endif | 738 | #endif |
735 | /* Integrated APIC (!82489DX) ? */ | 739 | /* Integrated APIC (!82489DX) ? */ |
736 | if (lapic_is_integrated()) { | 740 | if (lapic_is_integrated()) { |
@@ -756,7 +760,7 @@ void disable_local_APIC(void) | |||
756 | */ | 760 | */ |
757 | value = apic_read(APIC_SPIV); | 761 | value = apic_read(APIC_SPIV); |
758 | value &= ~APIC_SPIV_APIC_ENABLED; | 762 | value &= ~APIC_SPIV_APIC_ENABLED; |
759 | apic_write_around(APIC_SPIV, value); | 763 | apic_write(APIC_SPIV, value); |
760 | 764 | ||
761 | /* | 765 | /* |
762 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | 766 | * When LAPIC was disabled by the BIOS and enabled by the kernel, |
@@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void) | |||
865 | apic_wait_icr_idle(); | 869 | apic_wait_icr_idle(); |
866 | 870 | ||
867 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 871 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
868 | apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | 872 | apic_write(APIC_ICR, |
869 | | APIC_DM_INIT); | 873 | APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); |
870 | } | 874 | } |
871 | 875 | ||
872 | /* | 876 | /* |
@@ -902,16 +906,16 @@ void __init init_bsp_APIC(void) | |||
902 | else | 906 | else |
903 | value |= APIC_SPIV_FOCUS_DISABLED; | 907 | value |= APIC_SPIV_FOCUS_DISABLED; |
904 | value |= SPURIOUS_APIC_VECTOR; | 908 | value |= SPURIOUS_APIC_VECTOR; |
905 | apic_write_around(APIC_SPIV, value); | 909 | apic_write(APIC_SPIV, value); |
906 | 910 | ||
907 | /* | 911 | /* |
908 | * Set up the virtual wire mode. | 912 | * Set up the virtual wire mode. |
909 | */ | 913 | */ |
910 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 914 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
911 | value = APIC_DM_NMI; | 915 | value = APIC_DM_NMI; |
912 | if (!lapic_is_integrated()) /* 82489DX */ | 916 | if (!lapic_is_integrated()) /* 82489DX */ |
913 | value |= APIC_LVT_LEVEL_TRIGGER; | 917 | value |= APIC_LVT_LEVEL_TRIGGER; |
914 | apic_write_around(APIC_LVT1, value); | 918 | apic_write(APIC_LVT1, value); |
915 | } | 919 | } |
916 | 920 | ||
917 | static void __cpuinit lapic_setup_esr(void) | 921 | static void __cpuinit lapic_setup_esr(void) |
@@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
926 | 930 | ||
927 | /* enables sending errors */ | 931 | /* enables sending errors */ |
928 | value = ERROR_APIC_VECTOR; | 932 | value = ERROR_APIC_VECTOR; |
929 | apic_write_around(APIC_LVTERR, value); | 933 | apic_write(APIC_LVTERR, value); |
930 | /* | 934 | /* |
931 | * spec says clear errors after enabling vector. | 935 | * spec says clear errors after enabling vector. |
932 | */ | 936 | */ |
@@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void) | |||
989 | */ | 993 | */ |
990 | value = apic_read(APIC_TASKPRI); | 994 | value = apic_read(APIC_TASKPRI); |
991 | value &= ~APIC_TPRI_MASK; | 995 | value &= ~APIC_TPRI_MASK; |
992 | apic_write_around(APIC_TASKPRI, value); | 996 | apic_write(APIC_TASKPRI, value); |
993 | 997 | ||
994 | /* | 998 | /* |
995 | * After a crash, we no longer service the interrupts and a pending | 999 | * After a crash, we no longer service the interrupts and a pending |
@@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void) | |||
1047 | * Set spurious IRQ vector | 1051 | * Set spurious IRQ vector |
1048 | */ | 1052 | */ |
1049 | value |= SPURIOUS_APIC_VECTOR; | 1053 | value |= SPURIOUS_APIC_VECTOR; |
1050 | apic_write_around(APIC_SPIV, value); | 1054 | apic_write(APIC_SPIV, value); |
1051 | 1055 | ||
1052 | /* | 1056 | /* |
1053 | * Set up LVT0, LVT1: | 1057 | * Set up LVT0, LVT1: |
@@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void) | |||
1069 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", | 1073 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", |
1070 | smp_processor_id()); | 1074 | smp_processor_id()); |
1071 | } | 1075 | } |
1072 | apic_write_around(APIC_LVT0, value); | 1076 | apic_write(APIC_LVT0, value); |
1073 | 1077 | ||
1074 | /* | 1078 | /* |
1075 | * only the BP should see the LINT1 NMI signal, obviously. | 1079 | * only the BP should see the LINT1 NMI signal, obviously. |
@@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void) | |||
1080 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 1084 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
1081 | if (!integrated) /* 82489DX */ | 1085 | if (!integrated) /* 82489DX */ |
1082 | value |= APIC_LVT_LEVEL_TRIGGER; | 1086 | value |= APIC_LVT_LEVEL_TRIGGER; |
1083 | apic_write_around(APIC_LVT1, value); | 1087 | apic_write(APIC_LVT1, value); |
1084 | } | 1088 | } |
1085 | 1089 | ||
1086 | void __cpuinit end_local_APIC_setup(void) | 1090 | void __cpuinit end_local_APIC_setup(void) |
@@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void) | |||
1091 | /* Disable the local apic timer */ | 1095 | /* Disable the local apic timer */ |
1092 | value = apic_read(APIC_LVTT); | 1096 | value = apic_read(APIC_LVTT); |
1093 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 1097 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
1094 | apic_write_around(APIC_LVTT, value); | 1098 | apic_write(APIC_LVTT, value); |
1095 | 1099 | ||
1096 | setup_apic_nmi_watchdog(NULL); | 1100 | setup_apic_nmi_watchdog(NULL); |
1097 | apic_pm_activate(); | 1101 | apic_pm_activate(); |
@@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS]; | |||
1214 | 1218 | ||
1215 | int __init APIC_init_uniprocessor(void) | 1219 | int __init APIC_init_uniprocessor(void) |
1216 | { | 1220 | { |
1217 | if (disable_apic) | ||
1218 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1219 | |||
1220 | if (!smp_found_config && !cpu_has_apic) | 1221 | if (!smp_found_config && !cpu_has_apic) |
1221 | return -1; | 1222 | return -1; |
1222 | 1223 | ||
@@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1419 | value &= ~APIC_VECTOR_MASK; | 1420 | value &= ~APIC_VECTOR_MASK; |
1420 | value |= APIC_SPIV_APIC_ENABLED; | 1421 | value |= APIC_SPIV_APIC_ENABLED; |
1421 | value |= 0xf; | 1422 | value |= 0xf; |
1422 | apic_write_around(APIC_SPIV, value); | 1423 | apic_write(APIC_SPIV, value); |
1423 | 1424 | ||
1424 | if (!virt_wire_setup) { | 1425 | if (!virt_wire_setup) { |
1425 | /* | 1426 | /* |
@@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1432 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1433 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
1433 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1434 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
1434 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | 1435 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
1435 | apic_write_around(APIC_LVT0, value); | 1436 | apic_write(APIC_LVT0, value); |
1436 | } else { | 1437 | } else { |
1437 | /* Disable LVT0 */ | 1438 | /* Disable LVT0 */ |
1438 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 1439 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
1439 | } | 1440 | } |
1440 | 1441 | ||
1441 | /* | 1442 | /* |
@@ -1449,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1449 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1450 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
1450 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1451 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
1451 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 1452 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); |
1452 | apic_write_around(APIC_LVT1, value); | 1453 | apic_write(APIC_LVT1, value); |
1453 | } | 1454 | } |
1454 | } | 1455 | } |
1455 | 1456 | ||
@@ -1700,7 +1701,7 @@ early_param("lapic", parse_lapic); | |||
1700 | static int __init parse_nolapic(char *arg) | 1701 | static int __init parse_nolapic(char *arg) |
1701 | { | 1702 | { |
1702 | disable_apic = 1; | 1703 | disable_apic = 1; |
1703 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1704 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
1704 | return 0; | 1705 | return 0; |
1705 | } | 1706 | } |
1706 | early_param("nolapic", parse_nolapic); | 1707 | early_param("nolapic", parse_nolapic); |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c14..7f1f030da7ee 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
54 | /* | 54 | /* |
55 | * Debug level, exported for io_apic.c | 55 | * Debug level, exported for io_apic.c |
56 | */ | 56 | */ |
57 | int apic_verbosity; | 57 | unsigned int apic_verbosity; |
58 | 58 | ||
59 | /* Have we found an MP table */ | 59 | /* Have we found an MP table */ |
60 | int smp_found_config; | 60 | int smp_found_config; |
@@ -314,7 +314,7 @@ static void setup_APIC_timer(void) | |||
314 | 314 | ||
315 | #define TICK_COUNT 100000000 | 315 | #define TICK_COUNT 100000000 |
316 | 316 | ||
317 | static void __init calibrate_APIC_clock(void) | 317 | static int __init calibrate_APIC_clock(void) |
318 | { | 318 | { |
319 | unsigned apic, apic_start; | 319 | unsigned apic, apic_start; |
320 | unsigned long tsc, tsc_start; | 320 | unsigned long tsc, tsc_start; |
@@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void) | |||
368 | clockevent_delta2ns(0xF, &lapic_clockevent); | 368 | clockevent_delta2ns(0xF, &lapic_clockevent); |
369 | 369 | ||
370 | calibration_result = result / HZ; | 370 | calibration_result = result / HZ; |
371 | |||
372 | /* | ||
373 | * Do a sanity check on the APIC calibration result | ||
374 | */ | ||
375 | if (calibration_result < (1000000 / HZ)) { | ||
376 | printk(KERN_WARNING | ||
377 | "APIC frequency too slow, disabling apic timer\n"); | ||
378 | return -1; | ||
379 | } | ||
380 | |||
381 | return 0; | ||
371 | } | 382 | } |
372 | 383 | ||
373 | /* | 384 | /* |
@@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void) | |||
394 | } | 405 | } |
395 | 406 | ||
396 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); | 407 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); |
397 | calibrate_APIC_clock(); | 408 | if (calibrate_APIC_clock()) { |
398 | |||
399 | /* | ||
400 | * Do a sanity check on the APIC calibration result | ||
401 | */ | ||
402 | if (calibration_result < (1000000 / HZ)) { | ||
403 | printk(KERN_WARNING | ||
404 | "APIC frequency too slow, disabling apic timer\n"); | ||
405 | /* No broadcast on UP ! */ | 409 | /* No broadcast on UP ! */ |
406 | if (num_possible_cpus() > 1) | 410 | if (num_possible_cpus() > 1) |
407 | setup_APIC_timer(); | 411 | setup_APIC_timer(); |
@@ -1337,7 +1341,7 @@ early_param("apic", apic_set_verbosity); | |||
1337 | static __init int setup_disableapic(char *str) | 1341 | static __init int setup_disableapic(char *str) |
1338 | { | 1342 | { |
1339 | disable_apic = 1; | 1343 | disable_apic = 1; |
1340 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1344 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
1341 | return 0; | 1345 | return 0; |
1342 | } | 1346 | } |
1343 | early_param("disableapic", setup_disableapic); | 1347 | early_param("disableapic", setup_disableapic); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9b441331e9..9ee24e6bc4b0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -219,7 +219,6 @@ | |||
219 | #include <linux/time.h> | 219 | #include <linux/time.h> |
220 | #include <linux/sched.h> | 220 | #include <linux/sched.h> |
221 | #include <linux/pm.h> | 221 | #include <linux/pm.h> |
222 | #include <linux/pm_legacy.h> | ||
223 | #include <linux/capability.h> | 222 | #include <linux/capability.h> |
224 | #include <linux/device.h> | 223 | #include <linux/device.h> |
225 | #include <linux/kernel.h> | 224 | #include <linux/kernel.h> |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bacf5deeec2d..aa89387006fe 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/bootparam.h> | 19 | #include <asm/bootparam.h> |
20 | 20 | ||
21 | #include <xen/interface/xen.h> | ||
22 | |||
21 | #define __NO_STUBS 1 | 23 | #define __NO_STUBS 1 |
22 | #undef __SYSCALL | 24 | #undef __SYSCALL |
23 | #undef _ASM_X86_64_UNISTD_H_ | 25 | #undef _ASM_X86_64_UNISTD_H_ |
@@ -131,5 +133,14 @@ int main(void) | |||
131 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | 133 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); |
132 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 134 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
133 | OFFSET(BP_version, boot_params, hdr.version); | 135 | OFFSET(BP_version, boot_params, hdr.version); |
136 | |||
137 | BLANK(); | ||
138 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | ||
139 | #ifdef CONFIG_XEN | ||
140 | BLANK(); | ||
141 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
142 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
143 | #undef ENTRY | ||
144 | #endif | ||
134 | return 0; | 145 | return 0; |
135 | } | 146 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c new file mode 100644 index 000000000000..c639bd55391c --- /dev/null +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * BIOS run time interface routines. | ||
3 | * | ||
4 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | |||
21 | #include <asm/uv/bios.h> | ||
22 | |||
23 | const char * | ||
24 | x86_bios_strerror(long status) | ||
25 | { | ||
26 | const char *str; | ||
27 | switch (status) { | ||
28 | case 0: str = "Call completed without error"; break; | ||
29 | case -1: str = "Not implemented"; break; | ||
30 | case -2: str = "Invalid argument"; break; | ||
31 | case -3: str = "Call completed with error"; break; | ||
32 | default: str = "Unknown BIOS status code"; break; | ||
33 | } | ||
34 | return str; | ||
35 | } | ||
36 | |||
37 | long | ||
38 | x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, | ||
39 | unsigned long *drift_info) | ||
40 | { | ||
41 | struct uv_bios_retval isrv; | ||
42 | |||
43 | BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); | ||
44 | *ticks_per_second = isrv.v0; | ||
45 | *drift_info = isrv.v1; | ||
46 | return isrv.status; | ||
47 | } | ||
48 | EXPORT_SYMBOL_GPL(x86_bios_freq_base); | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d44..cae9cabc3031 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -24,8 +24,6 @@ | |||
24 | extern void vide(void); | 24 | extern void vide(void); |
25 | __asm__(".align 4\nvide: ret"); | 25 | __asm__(".align 4\nvide: ret"); |
26 | 26 | ||
27 | int force_mwait __cpuinitdata; | ||
28 | |||
29 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 27 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
30 | { | 28 | { |
31 | if (cpuid_eax(0x80000000) >= 0x80000007) { | 29 | if (cpuid_eax(0x80000000) >= 0x80000007) { |
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7c36fb8a28d4..d1692b2a41ff 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c | |||
@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ |
116 | if (c->x86_power & (1<<8)) | 116 | if (c->x86_power & (1<<8)) |
117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
118 | |||
119 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
118 | } | 120 | } |
119 | 121 | ||
120 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 122 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1b1c56bb338f..c9b58a806e85 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -131,13 +131,7 @@ static void __init check_popad(void) | |||
131 | * (for due to lack of "invlpg" and working WP on a i386) | 131 | * (for due to lack of "invlpg" and working WP on a i386) |
132 | * - In order to run on anything without a TSC, we need to be | 132 | * - In order to run on anything without a TSC, we need to be |
133 | * compiled for a i486. | 133 | * compiled for a i486. |
134 | * - In order to support the local APIC on a buggy Pentium machine, | 134 | */ |
135 | * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, | ||
136 | * which happens implicitly if compiled for a Pentium or lower | ||
137 | * (unless an advanced selection of CPU features is used) as an | ||
138 | * otherwise config implies a properly working local APIC without | ||
139 | * the need to do extra reads from the APIC. | ||
140 | */ | ||
141 | 135 | ||
142 | static void __init check_config(void) | 136 | static void __init check_config(void) |
143 | { | 137 | { |
@@ -151,21 +145,6 @@ static void __init check_config(void) | |||
151 | if (boot_cpu_data.x86 == 3) | 145 | if (boot_cpu_data.x86 == 3) |
152 | panic("Kernel requires i486+ for 'invlpg' and other features"); | 146 | panic("Kernel requires i486+ for 'invlpg' and other features"); |
153 | #endif | 147 | #endif |
154 | |||
155 | /* | ||
156 | * If we were told we had a good local APIC, check for buggy Pentia, | ||
157 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
158 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
159 | * Specification Update"). | ||
160 | */ | ||
161 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) | ||
162 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL | ||
163 | && cpu_has_apic | ||
164 | && boot_cpu_data.x86 == 5 | ||
165 | && boot_cpu_data.x86_model == 2 | ||
166 | && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) | ||
167 | panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); | ||
168 | #endif | ||
169 | } | 148 | } |
170 | 149 | ||
171 | 150 | ||
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb40..dd6e3f15017e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c | |||
@@ -7,15 +7,13 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/kgdb.h> | 8 | #include <linux/kgdb.h> |
9 | #include <linux/topology.h> | 9 | #include <linux/topology.h> |
10 | #include <linux/string.h> | ||
11 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
13 | #include <linux/module.h> | ||
14 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
15 | #include <asm/processor.h> | ||
16 | #include <asm/i387.h> | 13 | #include <asm/i387.h> |
17 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
18 | #include <asm/io.h> | 15 | #include <asm/io.h> |
16 | #include <asm/linkage.h> | ||
19 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
20 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
21 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
@@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
305 | c->x86_capability[2] = cpuid_edx(0x80860001); | 303 | c->x86_capability[2] = cpuid_edx(0x80860001); |
306 | } | 304 | } |
307 | 305 | ||
308 | c->extended_cpuid_level = cpuid_eax(0x80000000); | ||
309 | if (c->extended_cpuid_level >= 0x80000007) | 306 | if (c->extended_cpuid_level >= 0x80000007) |
310 | c->x86_power = cpuid_edx(0x80000007); | 307 | c->x86_power = cpuid_edx(0x80000007); |
311 | 308 | ||
@@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
316 | c->x86_phys_bits = eax & 0xff; | 313 | c->x86_phys_bits = eax & 0xff; |
317 | } | 314 | } |
318 | 315 | ||
319 | /* Assume all 64-bit CPUs support 32-bit syscall */ | ||
320 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
321 | |||
322 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 316 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
323 | cpu_devs[c->x86_vendor]->c_early_init) | 317 | cpu_devs[c->x86_vendor]->c_early_init) |
324 | cpu_devs[c->x86_vendor]->c_early_init(c); | 318 | cpu_devs[c->x86_vendor]->c_early_init(c); |
325 | 319 | ||
326 | validate_pat_support(c); | 320 | validate_pat_support(c); |
327 | |||
328 | /* early_param could clear that, but recall get it set again */ | ||
329 | if (disable_apic) | ||
330 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
331 | } | 321 | } |
332 | 322 | ||
333 | /* | 323 | /* |
@@ -517,8 +507,7 @@ void pda_init(int cpu) | |||
517 | } | 507 | } |
518 | 508 | ||
519 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 509 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + |
520 | DEBUG_STKSZ] | 510 | DEBUG_STKSZ] __page_aligned_bss; |
521 | __attribute__((section(".bss.page_aligned"))); | ||
522 | 511 | ||
523 | extern asmlinkage void ignore_sysret(void); | 512 | extern asmlinkage void ignore_sysret(void); |
524 | 513 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b0c8208df9fa..ff2fff56f0a8 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -200,10 +200,12 @@ static void drv_read(struct drv_cmd *cmd) | |||
200 | static void drv_write(struct drv_cmd *cmd) | 200 | static void drv_write(struct drv_cmd *cmd) |
201 | { | 201 | { |
202 | cpumask_t saved_mask = current->cpus_allowed; | 202 | cpumask_t saved_mask = current->cpus_allowed; |
203 | cpumask_of_cpu_ptr_declare(cpu_mask); | ||
203 | unsigned int i; | 204 | unsigned int i; |
204 | 205 | ||
205 | for_each_cpu_mask(i, cmd->mask) { | 206 | for_each_cpu_mask_nr(i, cmd->mask) { |
206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); | 207 | cpumask_of_cpu_ptr_next(cpu_mask, i); |
208 | set_cpus_allowed_ptr(current, cpu_mask); | ||
207 | do_drv_write(cmd); | 209 | do_drv_write(cmd); |
208 | } | 210 | } |
209 | 211 | ||
@@ -267,11 +269,12 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
267 | } aperf_cur, mperf_cur; | 269 | } aperf_cur, mperf_cur; |
268 | 270 | ||
269 | cpumask_t saved_mask; | 271 | cpumask_t saved_mask; |
272 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
270 | unsigned int perf_percent; | 273 | unsigned int perf_percent; |
271 | unsigned int retval; | 274 | unsigned int retval; |
272 | 275 | ||
273 | saved_mask = current->cpus_allowed; | 276 | saved_mask = current->cpus_allowed; |
274 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 277 | set_cpus_allowed_ptr(current, cpu_mask); |
275 | if (get_cpu() != cpu) { | 278 | if (get_cpu() != cpu) { |
276 | /* We were not able to run on requested processor */ | 279 | /* We were not able to run on requested processor */ |
277 | put_cpu(); | 280 | put_cpu(); |
@@ -337,6 +340,7 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
337 | 340 | ||
338 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 341 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
339 | { | 342 | { |
343 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
340 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); | 344 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); |
341 | unsigned int freq; | 345 | unsigned int freq; |
342 | unsigned int cached_freq; | 346 | unsigned int cached_freq; |
@@ -349,7 +353,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | |||
349 | } | 353 | } |
350 | 354 | ||
351 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | 355 | cached_freq = data->freq_table[data->acpi_data->state].frequency; |
352 | freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); | 356 | freq = extract_freq(get_cur_val(cpu_mask), data); |
353 | if (freq != cached_freq) { | 357 | if (freq != cached_freq) { |
354 | /* | 358 | /* |
355 | * The dreaded BIOS frequency change behind our back. | 359 | * The dreaded BIOS frequency change behind our back. |
@@ -451,7 +455,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
451 | 455 | ||
452 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 456 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
453 | freqs.new = data->freq_table[next_state].frequency; | 457 | freqs.new = data->freq_table[next_state].frequency; |
454 | for_each_cpu_mask(i, cmd.mask) { | 458 | for_each_cpu_mask_nr(i, cmd.mask) { |
455 | freqs.cpu = i; | 459 | freqs.cpu = i; |
456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 460 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
457 | } | 461 | } |
@@ -466,7 +470,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
466 | } | 470 | } |
467 | } | 471 | } |
468 | 472 | ||
469 | for_each_cpu_mask(i, cmd.mask) { | 473 | for_each_cpu_mask_nr(i, cmd.mask) { |
470 | freqs.cpu = i; | 474 | freqs.cpu = i; |
471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 475 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
472 | } | 476 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 199e4e05e5dc..f1685fb91fbd 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | /* notifiers */ | 124 | /* notifiers */ |
125 | for_each_cpu_mask(i, policy->cpus) { | 125 | for_each_cpu_mask_nr(i, policy->cpus) { |
126 | freqs.cpu = i; | 126 | freqs.cpu = i; |
127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
128 | } | 128 | } |
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software |
131 | * Developer's Manual, Volume 3 | 131 | * Developer's Manual, Volume 3 |
132 | */ | 132 | */ |
133 | for_each_cpu_mask(i, policy->cpus) | 133 | for_each_cpu_mask_nr(i, policy->cpus) |
134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); |
135 | 135 | ||
136 | /* notifiers */ | 136 | /* notifiers */ |
137 | for_each_cpu_mask(i, policy->cpus) { | 137 | for_each_cpu_mask_nr(i, policy->cpus) { |
138 | freqs.cpu = i; | 138 | freqs.cpu = i; |
139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
140 | } | 140 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h index f8a63b3664e3..35fb4eaf6e1c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h | |||
@@ -1,5 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ | ||
3 | * (C) 2003 Dave Jones. | 2 | * (C) 2003 Dave Jones. |
4 | * | 3 | * |
5 | * Licensed under the terms of the GNU GPL License version 2. | 4 | * Licensed under the terms of the GNU GPL License version 2. |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 206791eb46e3..53c7b6936973 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -479,11 +479,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi | |||
479 | static int check_supported_cpu(unsigned int cpu) | 479 | static int check_supported_cpu(unsigned int cpu) |
480 | { | 480 | { |
481 | cpumask_t oldmask; | 481 | cpumask_t oldmask; |
482 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
482 | u32 eax, ebx, ecx, edx; | 483 | u32 eax, ebx, ecx, edx; |
483 | unsigned int rc = 0; | 484 | unsigned int rc = 0; |
484 | 485 | ||
485 | oldmask = current->cpus_allowed; | 486 | oldmask = current->cpus_allowed; |
486 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 487 | set_cpus_allowed_ptr(current, cpu_mask); |
487 | 488 | ||
488 | if (smp_processor_id() != cpu) { | 489 | if (smp_processor_id() != cpu) { |
489 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); | 490 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); |
@@ -966,7 +967,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
966 | freqs.old = find_khz_freq_from_fid(data->currfid); | 967 | freqs.old = find_khz_freq_from_fid(data->currfid); |
967 | freqs.new = find_khz_freq_from_fid(fid); | 968 | freqs.new = find_khz_freq_from_fid(fid); |
968 | 969 | ||
969 | for_each_cpu_mask(i, *(data->available_cores)) { | 970 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
970 | freqs.cpu = i; | 971 | freqs.cpu = i; |
971 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 972 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
972 | } | 973 | } |
@@ -974,7 +975,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
974 | res = transition_fid_vid(data, fid, vid); | 975 | res = transition_fid_vid(data, fid, vid); |
975 | freqs.new = find_khz_freq_from_fid(data->currfid); | 976 | freqs.new = find_khz_freq_from_fid(data->currfid); |
976 | 977 | ||
977 | for_each_cpu_mask(i, *(data->available_cores)) { | 978 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
978 | freqs.cpu = i; | 979 | freqs.cpu = i; |
979 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 980 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
980 | } | 981 | } |
@@ -997,7 +998,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
997 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); | 998 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); |
998 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 999 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
999 | 1000 | ||
1000 | for_each_cpu_mask(i, *(data->available_cores)) { | 1001 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1001 | freqs.cpu = i; | 1002 | freqs.cpu = i; |
1002 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1003 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1003 | } | 1004 | } |
@@ -1005,7 +1006,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
1005 | res = transition_pstate(data, pstate); | 1006 | res = transition_pstate(data, pstate); |
1006 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1007 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1007 | 1008 | ||
1008 | for_each_cpu_mask(i, *(data->available_cores)) { | 1009 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1009 | freqs.cpu = i; | 1010 | freqs.cpu = i; |
1010 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1011 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1011 | } | 1012 | } |
@@ -1016,6 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
1016 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) | 1017 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) |
1017 | { | 1018 | { |
1018 | cpumask_t oldmask; | 1019 | cpumask_t oldmask; |
1020 | cpumask_of_cpu_ptr(cpu_mask, pol->cpu); | ||
1019 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1021 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1020 | u32 checkfid; | 1022 | u32 checkfid; |
1021 | u32 checkvid; | 1023 | u32 checkvid; |
@@ -1030,7 +1032,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi | |||
1030 | 1032 | ||
1031 | /* only run on specific CPU from here on */ | 1033 | /* only run on specific CPU from here on */ |
1032 | oldmask = current->cpus_allowed; | 1034 | oldmask = current->cpus_allowed; |
1033 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1035 | set_cpus_allowed_ptr(current, cpu_mask); |
1034 | 1036 | ||
1035 | if (smp_processor_id() != pol->cpu) { | 1037 | if (smp_processor_id() != pol->cpu) { |
1036 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1038 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1105,6 +1107,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1105 | { | 1107 | { |
1106 | struct powernow_k8_data *data; | 1108 | struct powernow_k8_data *data; |
1107 | cpumask_t oldmask; | 1109 | cpumask_t oldmask; |
1110 | cpumask_of_cpu_ptr_declare(newmask); | ||
1108 | int rc; | 1111 | int rc; |
1109 | 1112 | ||
1110 | if (!cpu_online(pol->cpu)) | 1113 | if (!cpu_online(pol->cpu)) |
@@ -1156,7 +1159,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1156 | 1159 | ||
1157 | /* only run on specific CPU from here on */ | 1160 | /* only run on specific CPU from here on */ |
1158 | oldmask = current->cpus_allowed; | 1161 | oldmask = current->cpus_allowed; |
1159 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1162 | cpumask_of_cpu_ptr_next(newmask, pol->cpu); |
1163 | set_cpus_allowed_ptr(current, newmask); | ||
1160 | 1164 | ||
1161 | if (smp_processor_id() != pol->cpu) { | 1165 | if (smp_processor_id() != pol->cpu) { |
1162 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1166 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1178,7 +1182,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1178 | set_cpus_allowed_ptr(current, &oldmask); | 1182 | set_cpus_allowed_ptr(current, &oldmask); |
1179 | 1183 | ||
1180 | if (cpu_family == CPU_HW_PSTATE) | 1184 | if (cpu_family == CPU_HW_PSTATE) |
1181 | pol->cpus = cpumask_of_cpu(pol->cpu); | 1185 | pol->cpus = *newmask; |
1182 | else | 1186 | else |
1183 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); | 1187 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); |
1184 | data->available_cores = &(pol->cpus); | 1188 | data->available_cores = &(pol->cpus); |
@@ -1244,6 +1248,7 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1244 | { | 1248 | { |
1245 | struct powernow_k8_data *data; | 1249 | struct powernow_k8_data *data; |
1246 | cpumask_t oldmask = current->cpus_allowed; | 1250 | cpumask_t oldmask = current->cpus_allowed; |
1251 | cpumask_of_cpu_ptr(newmask, cpu); | ||
1247 | unsigned int khz = 0; | 1252 | unsigned int khz = 0; |
1248 | unsigned int first; | 1253 | unsigned int first; |
1249 | 1254 | ||
@@ -1253,7 +1258,7 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1253 | if (!data) | 1258 | if (!data) |
1254 | return -EINVAL; | 1259 | return -EINVAL; |
1255 | 1260 | ||
1256 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 1261 | set_cpus_allowed_ptr(current, newmask); |
1257 | if (smp_processor_id() != cpu) { | 1262 | if (smp_processor_id() != cpu) { |
1258 | printk(KERN_ERR PFX | 1263 | printk(KERN_ERR PFX |
1259 | "limiting to CPU %d failed in powernowk8_get\n", cpu); | 1264 | "limiting to CPU %d failed in powernowk8_get\n", cpu); |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 908dd347c67e..ca2ac13b7af2 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -28,7 +28,8 @@ | |||
28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" |
30 | 30 | ||
31 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 31 | #define dprintk(msg...) \ |
32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
32 | 33 | ||
33 | #define INTEL_MSR_RANGE (0xffff) | 34 | #define INTEL_MSR_RANGE (0xffff) |
34 | 35 | ||
@@ -66,11 +67,12 @@ struct cpu_model | |||
66 | 67 | ||
67 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | 68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ |
68 | }; | 69 | }; |
69 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); | 70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
71 | const struct cpu_id *x); | ||
70 | 72 | ||
71 | /* Operating points for current CPU */ | 73 | /* Operating points for current CPU */ |
72 | static struct cpu_model *centrino_model[NR_CPUS]; | 74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); |
73 | static const struct cpu_id *centrino_cpu[NR_CPUS]; | 75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); |
74 | 76 | ||
75 | static struct cpufreq_driver centrino_driver; | 77 | static struct cpufreq_driver centrino_driver; |
76 | 78 | ||
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
255 | return -ENOENT; | 257 | return -ENOENT; |
256 | } | 258 | } |
257 | 259 | ||
258 | centrino_model[policy->cpu] = model; | 260 | per_cpu(centrino_model, policy->cpu) = model; |
259 | 261 | ||
260 | dprintk("found \"%s\": max frequency: %dkHz\n", | 262 | dprintk("found \"%s\": max frequency: %dkHz\n", |
261 | model->model_name, model->max_freq); | 263 | model->model_name, model->max_freq); |
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
264 | } | 266 | } |
265 | 267 | ||
266 | #else | 268 | #else |
267 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } | 269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) |
270 | { | ||
271 | return -ENODEV; | ||
272 | } | ||
268 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | 273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ |
269 | 274 | ||
270 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) | 275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
276 | const struct cpu_id *x) | ||
271 | { | 277 | { |
272 | if ((c->x86 == x->x86) && | 278 | if ((c->x86 == x->x86) && |
273 | (c->x86_model == x->x86_model) && | 279 | (c->x86_model == x->x86_model) && |
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | |||
286 | * for centrino, as some DSDTs are buggy. | 292 | * for centrino, as some DSDTs are buggy. |
287 | * Ideally, this can be done using the acpi_data structure. | 293 | * Ideally, this can be done using the acpi_data structure. |
288 | */ | 294 | */ |
289 | if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || | 295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || |
290 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || | 296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || |
291 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { | 297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { |
292 | msr = (msr >> 8) & 0xff; | 298 | msr = (msr >> 8) & 0xff; |
293 | return msr * 100000; | 299 | return msr * 100000; |
294 | } | 300 | } |
295 | 301 | ||
296 | if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) | 302 | if ((!per_cpu(centrino_model, cpu)) || |
303 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
297 | return 0; | 304 | return 0; |
298 | 305 | ||
299 | msr &= 0xffff; | 306 | msr &= 0xffff; |
300 | for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { | 307 | for (i = 0; |
301 | if (msr == centrino_model[cpu]->op_points[i].index) | 308 | per_cpu(centrino_model, cpu)->op_points[i].frequency |
302 | return centrino_model[cpu]->op_points[i].frequency; | 309 | != CPUFREQ_TABLE_END; |
310 | i++) { | ||
311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
312 | return per_cpu(centrino_model, cpu)-> | ||
313 | op_points[i].frequency; | ||
303 | } | 314 | } |
304 | if (failsafe) | 315 | if (failsafe) |
305 | return centrino_model[cpu]->op_points[i-1].frequency; | 316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; |
306 | else | 317 | else |
307 | return 0; | 318 | return 0; |
308 | } | 319 | } |
@@ -313,9 +324,10 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
313 | unsigned l, h; | 324 | unsigned l, h; |
314 | unsigned clock_freq; | 325 | unsigned clock_freq; |
315 | cpumask_t saved_mask; | 326 | cpumask_t saved_mask; |
327 | cpumask_of_cpu_ptr(new_mask, cpu); | ||
316 | 328 | ||
317 | saved_mask = current->cpus_allowed; | 329 | saved_mask = current->cpus_allowed; |
318 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 330 | set_cpus_allowed_ptr(current, new_mask); |
319 | if (smp_processor_id() != cpu) | 331 | if (smp_processor_id() != cpu) |
320 | return 0; | 332 | return 0; |
321 | 333 | ||
@@ -347,7 +359,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
347 | int i; | 359 | int i; |
348 | 360 | ||
349 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | 361 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ |
350 | if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) | 362 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
363 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
351 | return -ENODEV; | 364 | return -ENODEV; |
352 | 365 | ||
353 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | 366 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) |
@@ -361,9 +374,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
361 | break; | 374 | break; |
362 | 375 | ||
363 | if (i != N_IDS) | 376 | if (i != N_IDS) |
364 | centrino_cpu[policy->cpu] = &cpu_ids[i]; | 377 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; |
365 | 378 | ||
366 | if (!centrino_cpu[policy->cpu]) { | 379 | if (!per_cpu(centrino_cpu, policy->cpu)) { |
367 | dprintk("found unsupported CPU with " | 380 | dprintk("found unsupported CPU with " |
368 | "Enhanced SpeedStep: send /proc/cpuinfo to " | 381 | "Enhanced SpeedStep: send /proc/cpuinfo to " |
369 | MAINTAINER "\n"); | 382 | MAINTAINER "\n"); |
@@ -386,23 +399,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
386 | /* check to see if it stuck */ | 399 | /* check to see if it stuck */ |
387 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 400 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
388 | if (!(l & (1<<16))) { | 401 | if (!(l & (1<<16))) { |
389 | printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); | 402 | printk(KERN_INFO PFX |
403 | "couldn't enable Enhanced SpeedStep\n"); | ||
390 | return -ENODEV; | 404 | return -ENODEV; |
391 | } | 405 | } |
392 | } | 406 | } |
393 | 407 | ||
394 | freq = get_cur_freq(policy->cpu); | 408 | freq = get_cur_freq(policy->cpu); |
395 | 409 | policy->cpuinfo.transition_latency = 10000; | |
396 | policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ | 410 | /* 10uS transition latency */ |
397 | policy->cur = freq; | 411 | policy->cur = freq; |
398 | 412 | ||
399 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | 413 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); |
400 | 414 | ||
401 | ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); | 415 | ret = cpufreq_frequency_table_cpuinfo(policy, |
416 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
402 | if (ret) | 417 | if (ret) |
403 | return (ret); | 418 | return (ret); |
404 | 419 | ||
405 | cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); | 420 | cpufreq_frequency_table_get_attr( |
421 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
406 | 422 | ||
407 | return 0; | 423 | return 0; |
408 | } | 424 | } |
@@ -411,12 +427,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
411 | { | 427 | { |
412 | unsigned int cpu = policy->cpu; | 428 | unsigned int cpu = policy->cpu; |
413 | 429 | ||
414 | if (!centrino_model[cpu]) | 430 | if (!per_cpu(centrino_model, cpu)) |
415 | return -ENODEV; | 431 | return -ENODEV; |
416 | 432 | ||
417 | cpufreq_frequency_table_put_attr(cpu); | 433 | cpufreq_frequency_table_put_attr(cpu); |
418 | 434 | ||
419 | centrino_model[cpu] = NULL; | 435 | per_cpu(centrino_model, cpu) = NULL; |
420 | 436 | ||
421 | return 0; | 437 | return 0; |
422 | } | 438 | } |
@@ -430,17 +446,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
430 | */ | 446 | */ |
431 | static int centrino_verify (struct cpufreq_policy *policy) | 447 | static int centrino_verify (struct cpufreq_policy *policy) |
432 | { | 448 | { |
433 | return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); | 449 | return cpufreq_frequency_table_verify(policy, |
450 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
434 | } | 451 | } |
435 | 452 | ||
436 | /** | 453 | /** |
437 | * centrino_setpolicy - set a new CPUFreq policy | 454 | * centrino_setpolicy - set a new CPUFreq policy |
438 | * @policy: new policy | 455 | * @policy: new policy |
439 | * @target_freq: the target frequency | 456 | * @target_freq: the target frequency |
440 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | 457 | * @relation: how that frequency relates to achieved frequency |
458 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
441 | * | 459 | * |
442 | * Sets a new CPUFreq policy. | 460 | * Sets a new CPUFreq policy. |
443 | */ | 461 | */ |
462 | struct allmasks { | ||
463 | cpumask_t online_policy_cpus; | ||
464 | cpumask_t saved_mask; | ||
465 | cpumask_t set_mask; | ||
466 | cpumask_t covered_cpus; | ||
467 | }; | ||
468 | |||
444 | static int centrino_target (struct cpufreq_policy *policy, | 469 | static int centrino_target (struct cpufreq_policy *policy, |
445 | unsigned int target_freq, | 470 | unsigned int target_freq, |
446 | unsigned int relation) | 471 | unsigned int relation) |
@@ -448,48 +473,55 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
448 | unsigned int newstate = 0; | 473 | unsigned int newstate = 0; |
449 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | 474 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; |
450 | struct cpufreq_freqs freqs; | 475 | struct cpufreq_freqs freqs; |
451 | cpumask_t online_policy_cpus; | ||
452 | cpumask_t saved_mask; | ||
453 | cpumask_t set_mask; | ||
454 | cpumask_t covered_cpus; | ||
455 | int retval = 0; | 476 | int retval = 0; |
456 | unsigned int j, k, first_cpu, tmp; | 477 | unsigned int j, k, first_cpu, tmp; |
457 | 478 | CPUMASK_ALLOC(allmasks); | |
458 | if (unlikely(centrino_model[cpu] == NULL)) | 479 | CPUMASK_PTR(online_policy_cpus, allmasks); |
459 | return -ENODEV; | 480 | CPUMASK_PTR(saved_mask, allmasks); |
481 | CPUMASK_PTR(set_mask, allmasks); | ||
482 | CPUMASK_PTR(covered_cpus, allmasks); | ||
483 | |||
484 | if (unlikely(allmasks == NULL)) | ||
485 | return -ENOMEM; | ||
486 | |||
487 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
488 | retval = -ENODEV; | ||
489 | goto out; | ||
490 | } | ||
460 | 491 | ||
461 | if (unlikely(cpufreq_frequency_table_target(policy, | 492 | if (unlikely(cpufreq_frequency_table_target(policy, |
462 | centrino_model[cpu]->op_points, | 493 | per_cpu(centrino_model, cpu)->op_points, |
463 | target_freq, | 494 | target_freq, |
464 | relation, | 495 | relation, |
465 | &newstate))) { | 496 | &newstate))) { |
466 | return -EINVAL; | 497 | retval = -EINVAL; |
498 | goto out; | ||
467 | } | 499 | } |
468 | 500 | ||
469 | #ifdef CONFIG_HOTPLUG_CPU | 501 | #ifdef CONFIG_HOTPLUG_CPU |
470 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | 502 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
471 | cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); | 503 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); |
472 | #else | 504 | #else |
473 | online_policy_cpus = policy->cpus; | 505 | *online_policy_cpus = policy->cpus; |
474 | #endif | 506 | #endif |
475 | 507 | ||
476 | saved_mask = current->cpus_allowed; | 508 | *saved_mask = current->cpus_allowed; |
477 | first_cpu = 1; | 509 | first_cpu = 1; |
478 | cpus_clear(covered_cpus); | 510 | cpus_clear(*covered_cpus); |
479 | for_each_cpu_mask(j, online_policy_cpus) { | 511 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
480 | /* | 512 | /* |
481 | * Support for SMP systems. | 513 | * Support for SMP systems. |
482 | * Make sure we are running on CPU that wants to change freq | 514 | * Make sure we are running on CPU that wants to change freq |
483 | */ | 515 | */ |
484 | cpus_clear(set_mask); | 516 | cpus_clear(*set_mask); |
485 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 517 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
486 | cpus_or(set_mask, set_mask, online_policy_cpus); | 518 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); |
487 | else | 519 | else |
488 | cpu_set(j, set_mask); | 520 | cpu_set(j, *set_mask); |
489 | 521 | ||
490 | set_cpus_allowed_ptr(current, &set_mask); | 522 | set_cpus_allowed_ptr(current, set_mask); |
491 | preempt_disable(); | 523 | preempt_disable(); |
492 | if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { | 524 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { |
493 | dprintk("couldn't limit to CPUs in this domain\n"); | 525 | dprintk("couldn't limit to CPUs in this domain\n"); |
494 | retval = -EAGAIN; | 526 | retval = -EAGAIN; |
495 | if (first_cpu) { | 527 | if (first_cpu) { |
@@ -500,7 +532,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
500 | break; | 532 | break; |
501 | } | 533 | } |
502 | 534 | ||
503 | msr = centrino_model[cpu]->op_points[newstate].index; | 535 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
504 | 536 | ||
505 | if (first_cpu) { | 537 | if (first_cpu) { |
506 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 538 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
@@ -517,7 +549,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
517 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 549 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
518 | target_freq, freqs.old, freqs.new, msr); | 550 | target_freq, freqs.old, freqs.new, msr); |
519 | 551 | ||
520 | for_each_cpu_mask(k, online_policy_cpus) { | 552 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
521 | freqs.cpu = k; | 553 | freqs.cpu = k; |
522 | cpufreq_notify_transition(&freqs, | 554 | cpufreq_notify_transition(&freqs, |
523 | CPUFREQ_PRECHANGE); | 555 | CPUFREQ_PRECHANGE); |
@@ -536,11 +568,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
536 | break; | 568 | break; |
537 | } | 569 | } |
538 | 570 | ||
539 | cpu_set(j, covered_cpus); | 571 | cpu_set(j, *covered_cpus); |
540 | preempt_enable(); | 572 | preempt_enable(); |
541 | } | 573 | } |
542 | 574 | ||
543 | for_each_cpu_mask(k, online_policy_cpus) { | 575 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
544 | freqs.cpu = k; | 576 | freqs.cpu = k; |
545 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 577 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
546 | } | 578 | } |
@@ -553,10 +585,12 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
553 | * Best effort undo.. | 585 | * Best effort undo.. |
554 | */ | 586 | */ |
555 | 587 | ||
556 | if (!cpus_empty(covered_cpus)) { | 588 | if (!cpus_empty(*covered_cpus)) { |
557 | for_each_cpu_mask(j, covered_cpus) { | 589 | cpumask_of_cpu_ptr_declare(new_mask); |
558 | set_cpus_allowed_ptr(current, | 590 | |
559 | &cpumask_of_cpu(j)); | 591 | for_each_cpu_mask_nr(j, *covered_cpus) { |
592 | cpumask_of_cpu_ptr_next(new_mask, j); | ||
593 | set_cpus_allowed_ptr(current, new_mask); | ||
560 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 594 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
561 | } | 595 | } |
562 | } | 596 | } |
@@ -564,19 +598,22 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
564 | tmp = freqs.new; | 598 | tmp = freqs.new; |
565 | freqs.new = freqs.old; | 599 | freqs.new = freqs.old; |
566 | freqs.old = tmp; | 600 | freqs.old = tmp; |
567 | for_each_cpu_mask(j, online_policy_cpus) { | 601 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
568 | freqs.cpu = j; | 602 | freqs.cpu = j; |
569 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 603 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 604 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
571 | } | 605 | } |
572 | } | 606 | } |
573 | set_cpus_allowed_ptr(current, &saved_mask); | 607 | set_cpus_allowed_ptr(current, saved_mask); |
574 | return 0; | 608 | retval = 0; |
609 | goto out; | ||
575 | 610 | ||
576 | migrate_end: | 611 | migrate_end: |
577 | preempt_enable(); | 612 | preempt_enable(); |
578 | set_cpus_allowed_ptr(current, &saved_mask); | 613 | set_cpus_allowed_ptr(current, saved_mask); |
579 | return 0; | 614 | out: |
615 | CPUMASK_FREE(allmasks); | ||
616 | return retval; | ||
580 | } | 617 | } |
581 | 618 | ||
582 | static struct freq_attr* centrino_attr[] = { | 619 | static struct freq_attr* centrino_attr[] = { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 1b50244b1fdf..2f3728dc24f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -244,7 +244,8 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) | |||
244 | 244 | ||
245 | static unsigned int speedstep_get(unsigned int cpu) | 245 | static unsigned int speedstep_get(unsigned int cpu) |
246 | { | 246 | { |
247 | return _speedstep_get(&cpumask_of_cpu(cpu)); | 247 | cpumask_of_cpu_ptr(newmask, cpu); |
248 | return _speedstep_get(newmask); | ||
248 | } | 249 | } |
249 | 250 | ||
250 | /** | 251 | /** |
@@ -279,7 +280,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
279 | 280 | ||
280 | cpus_allowed = current->cpus_allowed; | 281 | cpus_allowed = current->cpus_allowed; |
281 | 282 | ||
282 | for_each_cpu_mask(i, policy->cpus) { | 283 | for_each_cpu_mask_nr(i, policy->cpus) { |
283 | freqs.cpu = i; | 284 | freqs.cpu = i; |
284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 285 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
285 | } | 286 | } |
@@ -292,7 +293,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
292 | /* allow to be run on all CPUs */ | 293 | /* allow to be run on all CPUs */ |
293 | set_cpus_allowed_ptr(current, &cpus_allowed); | 294 | set_cpus_allowed_ptr(current, &cpus_allowed); |
294 | 295 | ||
295 | for_each_cpu_mask(i, policy->cpus) { | 296 | for_each_cpu_mask_nr(i, policy->cpus) { |
296 | freqs.cpu = i; | 297 | freqs.cpu = i; |
297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 298 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
298 | } | 299 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 70609efdf1da..b75f2569b8f8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
227 | if (cpu_has_bts) | 227 | if (cpu_has_bts) |
228 | ds_init_intel(c); | 228 | ds_init_intel(c); |
229 | 229 | ||
230 | /* | ||
231 | * See if we have a good local APIC by checking for buggy Pentia, | ||
232 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
233 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
234 | * Specification Update"). | ||
235 | */ | ||
236 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | ||
237 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | ||
238 | set_cpu_cap(c, X86_FEATURE_11AP); | ||
239 | |||
230 | #ifdef CONFIG_X86_NUMAQ | 240 | #ifdef CONFIG_X86_NUMAQ |
231 | numaq_tsc_disable(); | 241 | numaq_tsc_disable(); |
232 | #endif | 242 | #endif |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2c8afafa18e8..650d40f7912b 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -489,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
489 | int sibling; | 489 | int sibling; |
490 | 490 | ||
491 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 491 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
492 | for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { | 492 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { |
493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); |
495 | } | 495 | } |
@@ -516,6 +516,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
516 | unsigned long j; | 516 | unsigned long j; |
517 | int retval; | 517 | int retval; |
518 | cpumask_t oldmask; | 518 | cpumask_t oldmask; |
519 | cpumask_of_cpu_ptr(newmask, cpu); | ||
519 | 520 | ||
520 | if (num_cache_leaves == 0) | 521 | if (num_cache_leaves == 0) |
521 | return -ENOENT; | 522 | return -ENOENT; |
@@ -526,7 +527,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
526 | return -ENOMEM; | 527 | return -ENOMEM; |
527 | 528 | ||
528 | oldmask = current->cpus_allowed; | 529 | oldmask = current->cpus_allowed; |
529 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 530 | retval = set_cpus_allowed_ptr(current, newmask); |
530 | if (retval) | 531 | if (retval) |
531 | goto out; | 532 | goto out; |
532 | 533 | ||
@@ -780,15 +781,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
780 | } | 781 | } |
781 | kobject_put(per_cpu(cache_kobject, cpu)); | 782 | kobject_put(per_cpu(cache_kobject, cpu)); |
782 | cpuid4_cache_sysfs_exit(cpu); | 783 | cpuid4_cache_sysfs_exit(cpu); |
783 | break; | 784 | return retval; |
784 | } | 785 | } |
785 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | 786 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); |
786 | } | 787 | } |
787 | if (!retval) | 788 | cpu_set(cpu, cache_dev_map); |
788 | cpu_set(cpu, cache_dev_map); | ||
789 | 789 | ||
790 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 790 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); |
791 | return retval; | 791 | return 0; |
792 | } | 792 | } |
793 | 793 | ||
794 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 794 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index c4a7ec31394c..65a339678ece 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
580 | char __user *buf = ubuf; | 580 | char __user *buf = ubuf; |
581 | int i, err; | 581 | int i, err; |
582 | 582 | ||
583 | cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); | 583 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); |
584 | if (!cpu_tsc) | 584 | if (!cpu_tsc) |
585 | return -ENOMEM; | 585 | return -ENOMEM; |
586 | 586 | ||
@@ -762,10 +762,14 @@ DEFINE_PER_CPU(struct sys_device, device_mce); | |||
762 | 762 | ||
763 | /* Why are there no generic functions for this? */ | 763 | /* Why are there no generic functions for this? */ |
764 | #define ACCESSOR(name, var, start) \ | 764 | #define ACCESSOR(name, var, start) \ |
765 | static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ | 765 | static ssize_t show_ ## name(struct sys_device *s, \ |
766 | struct sysdev_attribute *attr, \ | ||
767 | char *buf) { \ | ||
766 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | 768 | return sprintf(buf, "%lx\n", (unsigned long)var); \ |
767 | } \ | 769 | } \ |
768 | static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ | 770 | static ssize_t set_ ## name(struct sys_device *s, \ |
771 | struct sysdev_attribute *attr, \ | ||
772 | const char *buf, size_t siz) { \ | ||
769 | char *end; \ | 773 | char *end; \ |
770 | unsigned long new = simple_strtoul(buf, &end, 0); \ | 774 | unsigned long new = simple_strtoul(buf, &end, 0); \ |
771 | if (end == buf) return -EINVAL; \ | 775 | if (end == buf) return -EINVAL; \ |
@@ -786,14 +790,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart()) | |||
786 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 790 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
787 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 791 | ACCESSOR(bank5ctl,bank[5],mce_restart()) |
788 | 792 | ||
789 | static ssize_t show_trigger(struct sys_device *s, char *buf) | 793 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
794 | char *buf) | ||
790 | { | 795 | { |
791 | strcpy(buf, trigger); | 796 | strcpy(buf, trigger); |
792 | strcat(buf, "\n"); | 797 | strcat(buf, "\n"); |
793 | return strlen(trigger) + 1; | 798 | return strlen(trigger) + 1; |
794 | } | 799 | } |
795 | 800 | ||
796 | static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | 801 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
802 | const char *buf,size_t siz) | ||
797 | { | 803 | { |
798 | char *p; | 804 | char *p; |
799 | int len; | 805 | int len; |
@@ -806,12 +812,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | |||
806 | } | 812 | } |
807 | 813 | ||
808 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 814 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
809 | ACCESSOR(tolerant,tolerant,) | 815 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
810 | ACCESSOR(check_interval,check_interval,mce_restart()) | 816 | ACCESSOR(check_interval,check_interval,mce_restart()) |
811 | static struct sysdev_attribute *mce_attributes[] = { | 817 | static struct sysdev_attribute *mce_attributes[] = { |
812 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | 818 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, |
813 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | 819 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, |
814 | &attr_tolerant, &attr_check_interval, &attr_trigger, | 820 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
815 | NULL | 821 | NULL |
816 | }; | 822 | }; |
817 | 823 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7c9a813e1193..88736cadbaa6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
527 | if (err) | 527 | if (err) |
528 | goto out_free; | 528 | goto out_free; |
529 | 529 | ||
530 | for_each_cpu_mask(i, b->cpus) { | 530 | for_each_cpu_mask_nr(i, b->cpus) { |
531 | if (i == cpu) | 531 | if (i == cpu) |
532 | continue; | 532 | continue; |
533 | 533 | ||
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
617 | #endif | 617 | #endif |
618 | 618 | ||
619 | /* remove all sibling symlinks before unregistering */ | 619 | /* remove all sibling symlinks before unregistering */ |
620 | for_each_cpu_mask(i, b->cpus) { | 620 | for_each_cpu_mask_nr(i, b->cpus) { |
621 | if (i == cpu) | 621 | if (i == cpu) |
622 | continue; | 622 | continue; |
623 | 623 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index eef001ad3bde..9b60fce09f75 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
102 | /* The temperature transition interrupt handler setup */ | 102 | /* The temperature transition interrupt handler setup */ |
103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | 103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ |
104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
105 | apic_write_around(APIC_LVTTHMR, h); | 105 | apic_write(APIC_LVTTHMR, h); |
106 | 106 | ||
107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); |
@@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); | 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
115 | 115 | ||
116 | l = apic_read(APIC_LVTTHMR); | 116 | l = apic_read(APIC_LVTTHMR); |
117 | apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
119 | 119 | ||
120 | /* enable thermal throttle processing */ | 120 | /* enable thermal throttle processing */ |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1f4cc48c14c6..d5ae2243f0b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
35 | 35 | ||
36 | #define define_therm_throt_sysdev_show_func(name) \ | 36 | #define define_therm_throt_sysdev_show_func(name) \ |
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
38 | struct sysdev_attribute *attr, \ | ||
38 | char *buf) \ | 39 | char *buf) \ |
39 | { \ | 40 | { \ |
40 | unsigned int cpu = dev->id; \ | 41 | unsigned int cpu = dev->id; \ |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6d4bdc02388a..de7439f82b92 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -250,7 +250,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, | |||
250 | 250 | ||
251 | do_div(count, nmi_hz); | 251 | do_div(count, nmi_hz); |
252 | if(descr) | 252 | if(descr) |
253 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 253 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
254 | wrmsrl(perfctr_msr, 0 - count); | 254 | wrmsrl(perfctr_msr, 0 - count); |
255 | } | 255 | } |
256 | 256 | ||
@@ -261,7 +261,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, | |||
261 | 261 | ||
262 | do_div(count, nmi_hz); | 262 | do_div(count, nmi_hz); |
263 | if(descr) | 263 | if(descr) |
264 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 264 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
265 | wrmsr(perfctr_msr, (u32)(-count), 0); | 265 | wrmsr(perfctr_msr, (u32)(-count), 0); |
266 | } | 266 | } |
267 | 267 | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0d0d9057e7c0..a26c480b9491 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) | |||
160 | { | 160 | { |
161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ |
162 | *pos = first_cpu(cpu_online_map); | 162 | *pos = first_cpu(cpu_online_map); |
163 | if ((*pos) < NR_CPUS && cpu_online(*pos)) | 163 | if ((*pos) < nr_cpu_ids && cpu_online(*pos)) |
164 | return &cpu_data(*pos); | 164 | return &cpu_data(*pos); |
165 | return NULL; | 165 | return NULL; |
166 | } | 166 | } |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2de5fa2bbf77..14b11b3be31c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -141,8 +141,8 @@ static __cpuinit int cpuid_device_create(int cpu) | |||
141 | { | 141 | { |
142 | struct device *dev; | 142 | struct device *dev; |
143 | 143 | ||
144 | dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), | 144 | dev = device_create_drvdata(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), |
145 | "cpu%d", cpu); | 145 | NULL, "cpu%d", cpu); |
146 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 146 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
147 | } | 147 | } |
148 | 148 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 28c29180b380..9af89078f7bb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) | |||
877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | 877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) |
878 | count++; | 878 | count++; |
879 | 879 | ||
880 | printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); | 880 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", |
881 | count, start, end); | ||
881 | for (i = 0; i < count; i++) { | 882 | for (i = 0; i < count; i++) { |
882 | struct early_res *r = &early_res[i]; | 883 | struct early_res *r = &early_res[i]; |
883 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | 884 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, |
@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void) | |||
1298 | } | 1299 | } |
1299 | } | 1300 | } |
1300 | 1301 | ||
1301 | /* | ||
1302 | * Non-standard memory setup can be specified via this quirk: | ||
1303 | */ | ||
1304 | char * (*arch_memory_setup_quirk)(void); | ||
1305 | |||
1306 | char *__init default_machine_specific_memory_setup(void) | 1302 | char *__init default_machine_specific_memory_setup(void) |
1307 | { | 1303 | { |
1308 | char *who = "BIOS-e820"; | 1304 | char *who = "BIOS-e820"; |
@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void) | |||
1343 | 1339 | ||
1344 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | 1340 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
1345 | { | 1341 | { |
1346 | if (arch_memory_setup_quirk) { | 1342 | if (x86_quirks->arch_memory_setup) { |
1347 | char *who = arch_memory_setup_quirk(); | 1343 | char *who = x86_quirks->arch_memory_setup(); |
1348 | 1344 | ||
1349 | if (who) | 1345 | if (who) |
1350 | return who; | 1346 | return who; |
@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void) | |||
1367 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1363 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1368 | e820_print_map(who); | 1364 | e820_print_map(who); |
1369 | } | 1365 | } |
1370 | |||
1371 | #ifdef CONFIG_X86_64 | ||
1372 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | ||
1373 | { | ||
1374 | int i; | ||
1375 | |||
1376 | if (slot < 0 || slot >= e820.nr_map) | ||
1377 | return -1; | ||
1378 | for (i = slot; i < e820.nr_map; i++) { | ||
1379 | if (e820.map[i].type != E820_RAM) | ||
1380 | continue; | ||
1381 | break; | ||
1382 | } | ||
1383 | if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | ||
1384 | return -1; | ||
1385 | *addr = e820.map[i].addr; | ||
1386 | *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | ||
1387 | max_pfn << PAGE_SHIFT) - *addr; | ||
1388 | return i + 1; | ||
1389 | } | ||
1390 | #endif | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a0e11c0cc872..4353cf5e6fac 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -16,10 +16,7 @@ | |||
16 | #include <asm/dma.h> | 16 | #include <asm/dma.h> |
17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | 19 | #include <asm/iommu.h> | |
20 | #ifdef CONFIG_GART_IOMMU | ||
21 | #include <asm/gart.h> | ||
22 | #endif | ||
23 | 20 | ||
24 | static void __init fix_hypertransport_config(int num, int slot, int func) | 21 | static void __init fix_hypertransport_config(int num, int slot, int func) |
25 | { | 22 | { |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f1202..109792bc7cfa 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -54,6 +54,16 @@ | |||
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
56 | 56 | ||
57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
58 | #include <linux/elf-em.h> | ||
59 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
61 | |||
62 | #ifndef CONFIG_AUDITSYSCALL | ||
63 | #define sysenter_audit syscall_trace_entry | ||
64 | #define sysexit_audit syscall_exit_work | ||
65 | #endif | ||
66 | |||
57 | /* | 67 | /* |
58 | * We use macros for low-level operations which need to be overridden | 68 | * We use macros for low-level operations which need to be overridden |
59 | * for paravirtualization. The following will never clobber any registers: | 69 | * for paravirtualization. The following will never clobber any registers: |
@@ -332,8 +342,9 @@ sysenter_past_esp: | |||
332 | GET_THREAD_INFO(%ebp) | 342 | GET_THREAD_INFO(%ebp) |
333 | 343 | ||
334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 344 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
335 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 345 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
336 | jnz syscall_trace_entry | 346 | jnz sysenter_audit |
347 | sysenter_do_call: | ||
337 | cmpl $(nr_syscalls), %eax | 348 | cmpl $(nr_syscalls), %eax |
338 | jae syscall_badsys | 349 | jae syscall_badsys |
339 | call *sys_call_table(,%eax,4) | 350 | call *sys_call_table(,%eax,4) |
@@ -343,7 +354,8 @@ sysenter_past_esp: | |||
343 | TRACE_IRQS_OFF | 354 | TRACE_IRQS_OFF |
344 | movl TI_flags(%ebp), %ecx | 355 | movl TI_flags(%ebp), %ecx |
345 | testw $_TIF_ALLWORK_MASK, %cx | 356 | testw $_TIF_ALLWORK_MASK, %cx |
346 | jne syscall_exit_work | 357 | jne sysexit_audit |
358 | sysenter_exit: | ||
347 | /* if something modifies registers it must also disable sysexit */ | 359 | /* if something modifies registers it must also disable sysexit */ |
348 | movl PT_EIP(%esp), %edx | 360 | movl PT_EIP(%esp), %edx |
349 | movl PT_OLDESP(%esp), %ecx | 361 | movl PT_OLDESP(%esp), %ecx |
@@ -351,6 +363,45 @@ sysenter_past_esp: | |||
351 | TRACE_IRQS_ON | 363 | TRACE_IRQS_ON |
352 | 1: mov PT_FS(%esp), %fs | 364 | 1: mov PT_FS(%esp), %fs |
353 | ENABLE_INTERRUPTS_SYSEXIT | 365 | ENABLE_INTERRUPTS_SYSEXIT |
366 | |||
367 | #ifdef CONFIG_AUDITSYSCALL | ||
368 | sysenter_audit: | ||
369 | testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | ||
370 | jnz syscall_trace_entry | ||
371 | addl $4,%esp | ||
372 | CFI_ADJUST_CFA_OFFSET -4 | ||
373 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | ||
374 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | ||
375 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | ||
376 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | ||
377 | movl %eax,%edx /* 2nd arg: syscall number */ | ||
378 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | ||
379 | call audit_syscall_entry | ||
380 | pushl %ebx | ||
381 | CFI_ADJUST_CFA_OFFSET 4 | ||
382 | movl PT_EAX(%esp),%eax /* reload syscall number */ | ||
383 | jmp sysenter_do_call | ||
384 | |||
385 | sysexit_audit: | ||
386 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
387 | jne syscall_exit_work | ||
388 | TRACE_IRQS_ON | ||
389 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
390 | movl %eax,%edx /* second arg, syscall return value */ | ||
391 | cmpl $0,%eax /* is it < 0? */ | ||
392 | setl %al /* 1 if so, 0 if not */ | ||
393 | movzbl %al,%eax /* zero-extend that */ | ||
394 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
395 | call audit_syscall_exit | ||
396 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
397 | TRACE_IRQS_OFF | ||
398 | movl TI_flags(%ebp), %ecx | ||
399 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
400 | jne syscall_exit_work | ||
401 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | ||
402 | jmp sysenter_exit | ||
403 | #endif | ||
404 | |||
354 | CFI_ENDPROC | 405 | CFI_ENDPROC |
355 | .pushsection .fixup,"ax" | 406 | .pushsection .fixup,"ax" |
356 | 2: movl $0,PT_FS(%esp) | 407 | 2: movl $0,PT_FS(%esp) |
@@ -370,7 +421,7 @@ ENTRY(system_call) | |||
370 | GET_THREAD_INFO(%ebp) | 421 | GET_THREAD_INFO(%ebp) |
371 | # system call tracing in operation / emulation | 422 | # system call tracing in operation / emulation |
372 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 423 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
373 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 424 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
374 | jnz syscall_trace_entry | 425 | jnz syscall_trace_entry |
375 | cmpl $(nr_syscalls), %eax | 426 | cmpl $(nr_syscalls), %eax |
376 | jae syscall_badsys | 427 | jae syscall_badsys |
@@ -383,10 +434,6 @@ syscall_exit: | |||
383 | # setting need_resched or sigpending | 434 | # setting need_resched or sigpending |
384 | # between sampling and the iret | 435 | # between sampling and the iret |
385 | TRACE_IRQS_OFF | 436 | TRACE_IRQS_OFF |
386 | testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit | ||
387 | jz no_singlestep | ||
388 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
389 | no_singlestep: | ||
390 | movl TI_flags(%ebp), %ecx | 437 | movl TI_flags(%ebp), %ecx |
391 | testw $_TIF_ALLWORK_MASK, %cx # current->work | 438 | testw $_TIF_ALLWORK_MASK, %cx # current->work |
392 | jne syscall_exit_work | 439 | jne syscall_exit_work |
@@ -514,12 +561,8 @@ END(work_pending) | |||
514 | syscall_trace_entry: | 561 | syscall_trace_entry: |
515 | movl $-ENOSYS,PT_EAX(%esp) | 562 | movl $-ENOSYS,PT_EAX(%esp) |
516 | movl %esp, %eax | 563 | movl %esp, %eax |
517 | xorl %edx,%edx | 564 | call syscall_trace_enter |
518 | call do_syscall_trace | 565 | /* What it returned is what we'll actually use. */ |
519 | cmpl $0, %eax | ||
520 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | ||
521 | # so must skip actual syscall | ||
522 | movl PT_ORIG_EAX(%esp), %eax | ||
523 | cmpl $(nr_syscalls), %eax | 566 | cmpl $(nr_syscalls), %eax |
524 | jnae syscall_call | 567 | jnae syscall_call |
525 | jmp syscall_exit | 568 | jmp syscall_exit |
@@ -528,14 +571,13 @@ END(syscall_trace_entry) | |||
528 | # perform syscall exit tracing | 571 | # perform syscall exit tracing |
529 | ALIGN | 572 | ALIGN |
530 | syscall_exit_work: | 573 | syscall_exit_work: |
531 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | 574 | testb $_TIF_WORK_SYSCALL_EXIT, %cl |
532 | jz work_pending | 575 | jz work_pending |
533 | TRACE_IRQS_ON | 576 | TRACE_IRQS_ON |
534 | ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | 577 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call |
535 | # schedule() instead | 578 | # schedule() instead |
536 | movl %esp, %eax | 579 | movl %esp, %eax |
537 | movl $1, %edx | 580 | call syscall_trace_leave |
538 | call do_syscall_trace | ||
539 | jmp resume_userspace | 581 | jmp resume_userspace |
540 | END(syscall_exit_work) | 582 | END(syscall_exit_work) |
541 | CFI_ENDPROC | 583 | CFI_ENDPROC |
@@ -1024,6 +1066,7 @@ ENDPROC(kernel_thread_helper) | |||
1024 | ENTRY(xen_sysenter_target) | 1066 | ENTRY(xen_sysenter_target) |
1025 | RING0_INT_FRAME | 1067 | RING0_INT_FRAME |
1026 | addl $5*4, %esp /* remove xen-provided frame */ | 1068 | addl $5*4, %esp /* remove xen-provided frame */ |
1069 | CFI_ADJUST_CFA_OFFSET -5*4 | ||
1027 | jmp sysenter_past_esp | 1070 | jmp sysenter_past_esp |
1028 | CFI_ENDPROC | 1071 | CFI_ENDPROC |
1029 | 1072 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c340..89434d439605 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -53,6 +53,12 @@ | |||
53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | 55 | ||
56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
57 | #include <linux/elf-em.h> | ||
58 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | ||
59 | #define __AUDIT_ARCH_64BIT 0x80000000 | ||
60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
61 | |||
56 | .code64 | 62 | .code64 |
57 | 63 | ||
58 | #ifdef CONFIG_FTRACE | 64 | #ifdef CONFIG_FTRACE |
@@ -349,9 +355,9 @@ ENTRY(system_call_after_swapgs) | |||
349 | movq %rcx,RIP-ARGOFFSET(%rsp) | 355 | movq %rcx,RIP-ARGOFFSET(%rsp) |
350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 356 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
351 | GET_THREAD_INFO(%rcx) | 357 | GET_THREAD_INFO(%rcx) |
352 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 358 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) |
353 | TI_flags(%rcx) | ||
354 | jnz tracesys | 359 | jnz tracesys |
360 | system_call_fastpath: | ||
355 | cmpq $__NR_syscall_max,%rax | 361 | cmpq $__NR_syscall_max,%rax |
356 | ja badsys | 362 | ja badsys |
357 | movq %r10,%rcx | 363 | movq %r10,%rcx |
@@ -403,16 +409,16 @@ sysret_careful: | |||
403 | sysret_signal: | 409 | sysret_signal: |
404 | TRACE_IRQS_ON | 410 | TRACE_IRQS_ON |
405 | ENABLE_INTERRUPTS(CLBR_NONE) | 411 | ENABLE_INTERRUPTS(CLBR_NONE) |
406 | testl $_TIF_DO_NOTIFY_MASK,%edx | 412 | #ifdef CONFIG_AUDITSYSCALL |
407 | jz 1f | 413 | bt $TIF_SYSCALL_AUDIT,%edx |
408 | 414 | jc sysret_audit | |
409 | /* Really a signal */ | 415 | #endif |
410 | /* edx: work flags (arg3) */ | 416 | /* edx: work flags (arg3) */ |
411 | leaq do_notify_resume(%rip),%rax | 417 | leaq do_notify_resume(%rip),%rax |
412 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 418 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
413 | xorl %esi,%esi # oldset -> arg2 | 419 | xorl %esi,%esi # oldset -> arg2 |
414 | call ptregscall_common | 420 | call ptregscall_common |
415 | 1: movl $_TIF_WORK_MASK,%edi | 421 | movl $_TIF_WORK_MASK,%edi |
416 | /* Use IRET because user could have changed frame. This | 422 | /* Use IRET because user could have changed frame. This |
417 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 423 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
418 | DISABLE_INTERRUPTS(CLBR_NONE) | 424 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -423,14 +429,56 @@ badsys: | |||
423 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 429 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
424 | jmp ret_from_sys_call | 430 | jmp ret_from_sys_call |
425 | 431 | ||
432 | #ifdef CONFIG_AUDITSYSCALL | ||
433 | /* | ||
434 | * Fast path for syscall audit without full syscall trace. | ||
435 | * We just call audit_syscall_entry() directly, and then | ||
436 | * jump back to the normal fast path. | ||
437 | */ | ||
438 | auditsys: | ||
439 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | ||
440 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | ||
441 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | ||
442 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | ||
443 | movq %rax,%rsi /* 2nd arg: syscall number */ | ||
444 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | ||
445 | call audit_syscall_entry | ||
446 | LOAD_ARGS 0 /* reload call-clobbered registers */ | ||
447 | jmp system_call_fastpath | ||
448 | |||
449 | /* | ||
450 | * Return fast path for syscall audit. Call audit_syscall_exit() | ||
451 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
452 | * masked off. | ||
453 | */ | ||
454 | sysret_audit: | ||
455 | movq %rax,%rsi /* second arg, syscall return value */ | ||
456 | cmpq $0,%rax /* is it < 0? */ | ||
457 | setl %al /* 1 if so, 0 if not */ | ||
458 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
459 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
460 | call audit_syscall_exit | ||
461 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
462 | jmp sysret_check | ||
463 | #endif /* CONFIG_AUDITSYSCALL */ | ||
464 | |||
426 | /* Do syscall tracing */ | 465 | /* Do syscall tracing */ |
427 | tracesys: | 466 | tracesys: |
467 | #ifdef CONFIG_AUDITSYSCALL | ||
468 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | ||
469 | jz auditsys | ||
470 | #endif | ||
428 | SAVE_REST | 471 | SAVE_REST |
429 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 472 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
430 | FIXUP_TOP_OF_STACK %rdi | 473 | FIXUP_TOP_OF_STACK %rdi |
431 | movq %rsp,%rdi | 474 | movq %rsp,%rdi |
432 | call syscall_trace_enter | 475 | call syscall_trace_enter |
433 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | 476 | /* |
477 | * Reload arg registers from stack in case ptrace changed them. | ||
478 | * We don't reload %rax because syscall_trace_enter() returned | ||
479 | * the value it wants us to use in the table lookup. | ||
480 | */ | ||
481 | LOAD_ARGS ARGOFFSET, 1 | ||
434 | RESTORE_REST | 482 | RESTORE_REST |
435 | cmpq $__NR_syscall_max,%rax | 483 | cmpq $__NR_syscall_max,%rax |
436 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 484 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
@@ -444,6 +492,7 @@ tracesys: | |||
444 | * Has correct top of stack, but partial stack frame. | 492 | * Has correct top of stack, but partial stack frame. |
445 | */ | 493 | */ |
446 | .globl int_ret_from_sys_call | 494 | .globl int_ret_from_sys_call |
495 | .globl int_with_check | ||
447 | int_ret_from_sys_call: | 496 | int_ret_from_sys_call: |
448 | DISABLE_INTERRUPTS(CLBR_NONE) | 497 | DISABLE_INTERRUPTS(CLBR_NONE) |
449 | TRACE_IRQS_OFF | 498 | TRACE_IRQS_OFF |
@@ -483,7 +532,7 @@ int_very_careful: | |||
483 | ENABLE_INTERRUPTS(CLBR_NONE) | 532 | ENABLE_INTERRUPTS(CLBR_NONE) |
484 | SAVE_REST | 533 | SAVE_REST |
485 | /* Check for syscall exit trace */ | 534 | /* Check for syscall exit trace */ |
486 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 535 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
487 | jz int_signal | 536 | jz int_signal |
488 | pushq %rdi | 537 | pushq %rdi |
489 | CFI_ADJUST_CFA_OFFSET 8 | 538 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -491,7 +540,7 @@ int_very_careful: | |||
491 | call syscall_trace_leave | 540 | call syscall_trace_leave |
492 | popq %rdi | 541 | popq %rdi |
493 | CFI_ADJUST_CFA_OFFSET -8 | 542 | CFI_ADJUST_CFA_OFFSET -8 |
494 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 543 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
495 | jmp int_restore_rest | 544 | jmp int_restore_rest |
496 | 545 | ||
497 | int_signal: | 546 | int_signal: |
@@ -1189,6 +1238,7 @@ END(device_not_available) | |||
1189 | /* runs on exception stack */ | 1238 | /* runs on exception stack */ |
1190 | KPROBE_ENTRY(debug) | 1239 | KPROBE_ENTRY(debug) |
1191 | INTR_FRAME | 1240 | INTR_FRAME |
1241 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1192 | pushq $0 | 1242 | pushq $0 |
1193 | CFI_ADJUST_CFA_OFFSET 8 | 1243 | CFI_ADJUST_CFA_OFFSET 8 |
1194 | paranoidentry do_debug, DEBUG_STACK | 1244 | paranoidentry do_debug, DEBUG_STACK |
@@ -1198,6 +1248,7 @@ KPROBE_END(debug) | |||
1198 | /* runs on exception stack */ | 1248 | /* runs on exception stack */ |
1199 | KPROBE_ENTRY(nmi) | 1249 | KPROBE_ENTRY(nmi) |
1200 | INTR_FRAME | 1250 | INTR_FRAME |
1251 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1201 | pushq $-1 | 1252 | pushq $-1 |
1202 | CFI_ADJUST_CFA_OFFSET 8 | 1253 | CFI_ADJUST_CFA_OFFSET 8 |
1203 | paranoidentry do_nmi, 0, 0 | 1254 | paranoidentry do_nmi, 0, 0 |
@@ -1211,6 +1262,7 @@ KPROBE_END(nmi) | |||
1211 | 1262 | ||
1212 | KPROBE_ENTRY(int3) | 1263 | KPROBE_ENTRY(int3) |
1213 | INTR_FRAME | 1264 | INTR_FRAME |
1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1214 | pushq $0 | 1266 | pushq $0 |
1215 | CFI_ADJUST_CFA_OFFSET 8 | 1267 | CFI_ADJUST_CFA_OFFSET 8 |
1216 | paranoidentry do_int3, DEBUG_STACK | 1268 | paranoidentry do_int3, DEBUG_STACK |
@@ -1237,6 +1289,7 @@ END(coprocessor_segment_overrun) | |||
1237 | /* runs on exception stack */ | 1289 | /* runs on exception stack */ |
1238 | ENTRY(double_fault) | 1290 | ENTRY(double_fault) |
1239 | XCPT_FRAME | 1291 | XCPT_FRAME |
1292 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1240 | paranoidentry do_double_fault | 1293 | paranoidentry do_double_fault |
1241 | jmp paranoid_exit1 | 1294 | jmp paranoid_exit1 |
1242 | CFI_ENDPROC | 1295 | CFI_ENDPROC |
@@ -1253,6 +1306,7 @@ END(segment_not_present) | |||
1253 | /* runs on exception stack */ | 1306 | /* runs on exception stack */ |
1254 | ENTRY(stack_segment) | 1307 | ENTRY(stack_segment) |
1255 | XCPT_FRAME | 1308 | XCPT_FRAME |
1309 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1256 | paranoidentry do_stack_segment | 1310 | paranoidentry do_stack_segment |
1257 | jmp paranoid_exit1 | 1311 | jmp paranoid_exit1 |
1258 | CFI_ENDPROC | 1312 | CFI_ENDPROC |
@@ -1278,6 +1332,7 @@ END(spurious_interrupt_bug) | |||
1278 | /* runs on exception stack */ | 1332 | /* runs on exception stack */ |
1279 | ENTRY(machine_check) | 1333 | ENTRY(machine_check) |
1280 | INTR_FRAME | 1334 | INTR_FRAME |
1335 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1281 | pushq $0 | 1336 | pushq $0 |
1282 | CFI_ADJUST_CFA_OFFSET 8 | 1337 | CFI_ADJUST_CFA_OFFSET 8 |
1283 | paranoidentry do_machine_check | 1338 | paranoidentry do_machine_check |
@@ -1312,3 +1367,103 @@ KPROBE_ENTRY(ignore_sysret) | |||
1312 | sysret | 1367 | sysret |
1313 | CFI_ENDPROC | 1368 | CFI_ENDPROC |
1314 | ENDPROC(ignore_sysret) | 1369 | ENDPROC(ignore_sysret) |
1370 | |||
1371 | #ifdef CONFIG_XEN | ||
1372 | ENTRY(xen_hypervisor_callback) | ||
1373 | zeroentry xen_do_hypervisor_callback | ||
1374 | END(xen_hypervisor_callback) | ||
1375 | |||
1376 | /* | ||
1377 | # A note on the "critical region" in our callback handler. | ||
1378 | # We want to avoid stacking callback handlers due to events occurring | ||
1379 | # during handling of the last event. To do this, we keep events disabled | ||
1380 | # until we've done all processing. HOWEVER, we must enable events before | ||
1381 | # popping the stack frame (can't be done atomically) and so it would still | ||
1382 | # be possible to get enough handler activations to overflow the stack. | ||
1383 | # Although unlikely, bugs of that kind are hard to track down, so we'd | ||
1384 | # like to avoid the possibility. | ||
1385 | # So, on entry to the handler we detect whether we interrupted an | ||
1386 | # existing activation in its critical region -- if so, we pop the current | ||
1387 | # activation and restart the handler using the previous one. | ||
1388 | */ | ||
1389 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | ||
1390 | CFI_STARTPROC | ||
1391 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | ||
1392 | see the correct pointer to the pt_regs */ | ||
1393 | movq %rdi, %rsp # we don't return, adjust the stack frame | ||
1394 | CFI_ENDPROC | ||
1395 | CFI_DEFAULT_STACK | ||
1396 | 11: incl %gs:pda_irqcount | ||
1397 | movq %rsp,%rbp | ||
1398 | CFI_DEF_CFA_REGISTER rbp | ||
1399 | cmovzq %gs:pda_irqstackptr,%rsp | ||
1400 | pushq %rbp # backlink for old unwinder | ||
1401 | call xen_evtchn_do_upcall | ||
1402 | popq %rsp | ||
1403 | CFI_DEF_CFA_REGISTER rsp | ||
1404 | decl %gs:pda_irqcount | ||
1405 | jmp error_exit | ||
1406 | CFI_ENDPROC | ||
1407 | END(do_hypervisor_callback) | ||
1408 | |||
1409 | /* | ||
1410 | # Hypervisor uses this for application faults while it executes. | ||
1411 | # We get here for two reasons: | ||
1412 | # 1. Fault while reloading DS, ES, FS or GS | ||
1413 | # 2. Fault while executing IRET | ||
1414 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | ||
1415 | # registers that could be reloaded and zeroed the others. | ||
1416 | # Category 2 we fix up by killing the current process. We cannot use the | ||
1417 | # normal Linux return path in this case because if we use the IRET hypercall | ||
1418 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | ||
1419 | # We distinguish between categories by comparing each saved segment register | ||
1420 | # with its current contents: any discrepancy means we in category 1. | ||
1421 | */ | ||
1422 | ENTRY(xen_failsafe_callback) | ||
1423 | framesz = (RIP-0x30) /* workaround buggy gas */ | ||
1424 | _frame framesz | ||
1425 | CFI_REL_OFFSET rcx, 0 | ||
1426 | CFI_REL_OFFSET r11, 8 | ||
1427 | movw %ds,%cx | ||
1428 | cmpw %cx,0x10(%rsp) | ||
1429 | CFI_REMEMBER_STATE | ||
1430 | jne 1f | ||
1431 | movw %es,%cx | ||
1432 | cmpw %cx,0x18(%rsp) | ||
1433 | jne 1f | ||
1434 | movw %fs,%cx | ||
1435 | cmpw %cx,0x20(%rsp) | ||
1436 | jne 1f | ||
1437 | movw %gs,%cx | ||
1438 | cmpw %cx,0x28(%rsp) | ||
1439 | jne 1f | ||
1440 | /* All segments match their saved values => Category 2 (Bad IRET). */ | ||
1441 | movq (%rsp),%rcx | ||
1442 | CFI_RESTORE rcx | ||
1443 | movq 8(%rsp),%r11 | ||
1444 | CFI_RESTORE r11 | ||
1445 | addq $0x30,%rsp | ||
1446 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
1447 | pushq $0 | ||
1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
1449 | pushq %r11 | ||
1450 | CFI_ADJUST_CFA_OFFSET 8 | ||
1451 | pushq %rcx | ||
1452 | CFI_ADJUST_CFA_OFFSET 8 | ||
1453 | jmp general_protection | ||
1454 | CFI_RESTORE_STATE | ||
1455 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | ||
1456 | movq (%rsp),%rcx | ||
1457 | CFI_RESTORE rcx | ||
1458 | movq 8(%rsp),%r11 | ||
1459 | CFI_RESTORE r11 | ||
1460 | addq $0x30,%rsp | ||
1461 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
1462 | pushq $0 | ||
1463 | CFI_ADJUST_CFA_OFFSET 8 | ||
1464 | SAVE_ALL | ||
1465 | jmp error_exit | ||
1466 | CFI_ENDPROC | ||
1467 | END(xen_failsafe_callback) | ||
1468 | |||
1469 | #endif /* CONFIG_XEN */ | ||
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..786548a62d38 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
@@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
168 | * May as well be the first. | 168 | * May as well be the first. |
169 | */ | 169 | */ |
170 | cpu = first_cpu(cpumask); | 170 | cpu = first_cpu(cpumask); |
171 | if ((unsigned)cpu < NR_CPUS) | 171 | if ((unsigned)cpu < nr_cpu_ids) |
172 | return per_cpu(x86_cpu_to_apicid, cpu); | 172 | return per_cpu(x86_cpu_to_apicid, cpu); |
173 | else | 173 | else |
174 | return BAD_APICID; | 174 | return BAD_APICID; |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..2cfcbded888a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
25 | #include <asm/uv/uv_mmrs.h> | 25 | #include <asm/uv/uv_mmrs.h> |
26 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
27 | #include <asm/uv/bios.h> | ||
27 | 28 | ||
28 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 29 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
29 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); | 30 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); |
@@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade); | |||
40 | short uv_possible_blades; | 41 | short uv_possible_blades; |
41 | EXPORT_SYMBOL_GPL(uv_possible_blades); | 42 | EXPORT_SYMBOL_GPL(uv_possible_blades); |
42 | 43 | ||
44 | unsigned long sn_rtc_cycles_per_second; | ||
45 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | ||
46 | |||
43 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 47 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
44 | 48 | ||
45 | static cpumask_t uv_target_cpus(void) | 49 | static cpumask_t uv_target_cpus(void) |
@@ -94,7 +98,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector) | |||
94 | { | 98 | { |
95 | unsigned int cpu; | 99 | unsigned int cpu; |
96 | 100 | ||
97 | for (cpu = 0; cpu < NR_CPUS; ++cpu) | 101 | for_each_possible_cpu(cpu) |
98 | if (cpu_isset(cpu, mask)) | 102 | if (cpu_isset(cpu, mask)) |
99 | uv_send_IPI_one(cpu, vector); | 103 | uv_send_IPI_one(cpu, vector); |
100 | } | 104 | } |
@@ -128,7 +132,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
128 | * May as well be the first. | 132 | * May as well be the first. |
129 | */ | 133 | */ |
130 | cpu = first_cpu(cpumask); | 134 | cpu = first_cpu(cpumask); |
131 | if ((unsigned)cpu < NR_CPUS) | 135 | if ((unsigned)cpu < nr_cpu_ids) |
132 | return per_cpu(x86_cpu_to_apicid, cpu); | 136 | return per_cpu(x86_cpu_to_apicid, cpu); |
133 | else | 137 | else |
134 | return BAD_APICID; | 138 | return BAD_APICID; |
@@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode) | |||
272 | map_high("MMIOH", mmioh.s.base, shift, map_uc); | 276 | map_high("MMIOH", mmioh.s.base, shift, map_uc); |
273 | } | 277 | } |
274 | 278 | ||
279 | static __init void uv_rtc_init(void) | ||
280 | { | ||
281 | long status, ticks_per_sec, drift; | ||
282 | |||
283 | status = | ||
284 | x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, | ||
285 | &drift); | ||
286 | if (status != 0 || ticks_per_sec < 100000) { | ||
287 | printk(KERN_WARNING | ||
288 | "unable to determine platform RTC clock frequency, " | ||
289 | "guessing.\n"); | ||
290 | /* BIOS gives wrong value for clock freq. so guess */ | ||
291 | sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; | ||
292 | } else | ||
293 | sn_rtc_cycles_per_second = ticks_per_sec; | ||
294 | } | ||
295 | |||
275 | static __init void uv_system_init(void) | 296 | static __init void uv_system_init(void) |
276 | { | 297 | { |
277 | union uvh_si_addr_map_config_u m_n_config; | 298 | union uvh_si_addr_map_config_u m_n_config; |
@@ -326,6 +347,8 @@ static __init void uv_system_init(void) | |||
326 | gnode_upper = (((unsigned long)node_id.s.node_id) & | 347 | gnode_upper = (((unsigned long)node_id.s.node_id) & |
327 | ~((1 << n_val) - 1)) << m_val; | 348 | ~((1 << n_val) - 1)) << m_val; |
328 | 349 | ||
350 | uv_rtc_init(); | ||
351 | |||
329 | for_each_present_cpu(cpu) { | 352 | for_each_present_cpu(cpu) { |
330 | nid = cpu_to_node(cpu); | 353 | nid = cpu_to_node(cpu); |
331 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 354 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c97819829146..1b318e903bf6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | |||
39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | 39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; |
40 | #endif | 40 | #endif |
41 | 41 | ||
42 | void __init x86_64_init_pda(void) | ||
43 | { | ||
44 | _cpu_pda = __cpu_pda; | ||
45 | cpu_pda(0) = &_boot_cpu_pda; | ||
46 | pda_init(0); | ||
47 | } | ||
48 | |||
42 | static void __init zap_identity_mappings(void) | 49 | static void __init zap_identity_mappings(void) |
43 | { | 50 | { |
44 | pgd_t *pgd = pgd_offset_k(0UL); | 51 | pgd_t *pgd = pgd_offset_k(0UL); |
@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
102 | 109 | ||
103 | early_printk("Kernel alive\n"); | 110 | early_printk("Kernel alive\n"); |
104 | 111 | ||
105 | _cpu_pda = __cpu_pda; | 112 | x86_64_init_pda(); |
106 | cpu_pda(0) = &_boot_cpu_pda; | ||
107 | pda_init(0); | ||
108 | 113 | ||
109 | early_printk("Kernel really alive\n"); | 114 | early_printk("Kernel really alive\n"); |
110 | 115 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b07ac7b217cb..db3280afe886 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -407,6 +407,7 @@ ENTRY(phys_base) | |||
407 | /* This must match the first entry in level2_kernel_pgt */ | 407 | /* This must match the first entry in level2_kernel_pgt */ |
408 | .quad 0x0000000000000000 | 408 | .quad 0x0000000000000000 |
409 | 409 | ||
410 | #include "../../x86/xen/xen-head.S" | ||
410 | 411 | ||
411 | .section .bss, "aw", @nobits | 412 | .section .bss, "aw", @nobits |
412 | .align L1_CACHE_BYTES | 413 | .align L1_CACHE_BYTES |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0ea6a19bfdfe..ad2b15a1334d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -468,7 +468,7 @@ void hpet_disable(void) | |||
468 | #define RTC_NUM_INTS 1 | 468 | #define RTC_NUM_INTS 1 |
469 | 469 | ||
470 | static unsigned long hpet_rtc_flags; | 470 | static unsigned long hpet_rtc_flags; |
471 | static unsigned long hpet_prev_update_sec; | 471 | static int hpet_prev_update_sec; |
472 | static struct rtc_time hpet_alarm_time; | 472 | static struct rtc_time hpet_alarm_time; |
473 | static unsigned long hpet_pie_count; | 473 | static unsigned long hpet_pie_count; |
474 | static unsigned long hpet_t1_cmp; | 474 | static unsigned long hpet_t1_cmp; |
@@ -575,6 +575,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) | |||
575 | 575 | ||
576 | hpet_rtc_flags |= bit_mask; | 576 | hpet_rtc_flags |= bit_mask; |
577 | 577 | ||
578 | if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) | ||
579 | hpet_prev_update_sec = -1; | ||
580 | |||
578 | if (!oldbits) | 581 | if (!oldbits) |
579 | hpet_rtc_timer_init(); | 582 | hpet_rtc_timer_init(); |
580 | 583 | ||
@@ -652,7 +655,7 @@ static void hpet_rtc_timer_reinit(void) | |||
652 | if (hpet_rtc_flags & RTC_PIE) | 655 | if (hpet_rtc_flags & RTC_PIE) |
653 | hpet_pie_count += lost_ints; | 656 | hpet_pie_count += lost_ints; |
654 | if (printk_ratelimit()) | 657 | if (printk_ratelimit()) |
655 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | 658 | printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", |
656 | lost_ints); | 659 | lost_ints); |
657 | } | 660 | } |
658 | } | 661 | } |
@@ -670,7 +673,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
670 | 673 | ||
671 | if (hpet_rtc_flags & RTC_UIE && | 674 | if (hpet_rtc_flags & RTC_UIE && |
672 | curr_time.tm_sec != hpet_prev_update_sec) { | 675 | curr_time.tm_sec != hpet_prev_update_sec) { |
673 | rtc_int_flag = RTC_UF; | 676 | if (hpet_prev_update_sec >= 0) |
677 | rtc_int_flag = RTC_UF; | ||
674 | hpet_prev_update_sec = curr_time.tm_sec; | 678 | hpet_prev_update_sec = curr_time.tm_sec; |
675 | } | 679 | } |
676 | 680 | ||
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 558abf4c796a..de9aa0e3a9c5 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -756,7 +756,7 @@ void send_IPI_self(int vector) | |||
756 | /* | 756 | /* |
757 | * Send the IPI. The write to APIC_ICR fires this off. | 757 | * Send the IPI. The write to APIC_ICR fires this off. |
758 | */ | 758 | */ |
759 | apic_write_around(APIC_ICR, cfg); | 759 | apic_write(APIC_ICR, cfg); |
760 | } | 760 | } |
761 | #endif /* !CONFIG_SMP */ | 761 | #endif /* !CONFIG_SMP */ |
762 | 762 | ||
@@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq) | |||
2030 | unsigned long v; | 2030 | unsigned long v; |
2031 | 2031 | ||
2032 | v = apic_read(APIC_LVT0); | 2032 | v = apic_read(APIC_LVT0); |
2033 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 2033 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
2034 | } | 2034 | } |
2035 | 2035 | ||
2036 | static void unmask_lapic_irq(unsigned int irq) | 2036 | static void unmask_lapic_irq(unsigned int irq) |
@@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq) | |||
2038 | unsigned long v; | 2038 | unsigned long v; |
2039 | 2039 | ||
2040 | v = apic_read(APIC_LVT0); | 2040 | v = apic_read(APIC_LVT0); |
2041 | apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2041 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2042 | } | 2042 | } |
2043 | 2043 | ||
2044 | static struct irq_chip lapic_chip __read_mostly = { | 2044 | static struct irq_chip lapic_chip __read_mostly = { |
@@ -2168,7 +2168,7 @@ static inline void __init check_timer(void) | |||
2168 | * The AEOI mode will finish them in the 8259A | 2168 | * The AEOI mode will finish them in the 8259A |
2169 | * automatically. | 2169 | * automatically. |
2170 | */ | 2170 | */ |
2171 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2171 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2172 | init_8259A(1); | 2172 | init_8259A(1); |
2173 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | 2173 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); |
2174 | 2174 | ||
@@ -2177,8 +2177,9 @@ static inline void __init check_timer(void) | |||
2177 | pin2 = ioapic_i8259.pin; | 2177 | pin2 = ioapic_i8259.pin; |
2178 | apic2 = ioapic_i8259.apic; | 2178 | apic2 = ioapic_i8259.apic; |
2179 | 2179 | ||
2180 | printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 2180 | apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " |
2181 | vector, apic1, pin1, apic2, pin2); | 2181 | "apic1=%d pin1=%d apic2=%d pin2=%d\n", |
2182 | vector, apic1, pin1, apic2, pin2); | ||
2182 | 2183 | ||
2183 | /* | 2184 | /* |
2184 | * Some BIOS writers are clueless and report the ExtINTA | 2185 | * Some BIOS writers are clueless and report the ExtINTA |
@@ -2216,12 +2217,13 @@ static inline void __init check_timer(void) | |||
2216 | } | 2217 | } |
2217 | clear_IO_APIC_pin(apic1, pin1); | 2218 | clear_IO_APIC_pin(apic1, pin1); |
2218 | if (!no_pin1) | 2219 | if (!no_pin1) |
2219 | printk(KERN_ERR "..MP-BIOS bug: " | 2220 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
2220 | "8254 timer not connected to IO-APIC\n"); | 2221 | "8254 timer not connected to IO-APIC\n"); |
2221 | 2222 | ||
2222 | printk(KERN_INFO "...trying to set up timer (IRQ0) " | 2223 | apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " |
2223 | "through the 8259A ... "); | 2224 | "(IRQ0) through the 8259A ...\n"); |
2224 | printk("\n..... (found pin %d) ...", pin2); | 2225 | apic_printk(APIC_QUIET, KERN_INFO |
2226 | "..... (found apic %d pin %d) ...\n", apic2, pin2); | ||
2225 | /* | 2227 | /* |
2226 | * legacy devices should be connected to IO APIC #0 | 2228 | * legacy devices should be connected to IO APIC #0 |
2227 | */ | 2229 | */ |
@@ -2230,7 +2232,7 @@ static inline void __init check_timer(void) | |||
2230 | unmask_IO_APIC_irq(0); | 2232 | unmask_IO_APIC_irq(0); |
2231 | enable_8259A_irq(0); | 2233 | enable_8259A_irq(0); |
2232 | if (timer_irq_works()) { | 2234 | if (timer_irq_works()) { |
2233 | printk("works.\n"); | 2235 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2234 | timer_through_8259 = 1; | 2236 | timer_through_8259 = 1; |
2235 | if (nmi_watchdog == NMI_IO_APIC) { | 2237 | if (nmi_watchdog == NMI_IO_APIC) { |
2236 | disable_8259A_irq(0); | 2238 | disable_8259A_irq(0); |
@@ -2244,44 +2246,47 @@ static inline void __init check_timer(void) | |||
2244 | */ | 2246 | */ |
2245 | disable_8259A_irq(0); | 2247 | disable_8259A_irq(0); |
2246 | clear_IO_APIC_pin(apic2, pin2); | 2248 | clear_IO_APIC_pin(apic2, pin2); |
2247 | printk(" failed.\n"); | 2249 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2248 | } | 2250 | } |
2249 | 2251 | ||
2250 | if (nmi_watchdog == NMI_IO_APIC) { | 2252 | if (nmi_watchdog == NMI_IO_APIC) { |
2251 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 2253 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " |
2254 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2252 | nmi_watchdog = NMI_NONE; | 2255 | nmi_watchdog = NMI_NONE; |
2253 | } | 2256 | } |
2254 | timer_ack = 0; | 2257 | timer_ack = 0; |
2255 | 2258 | ||
2256 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 2259 | apic_printk(APIC_QUIET, KERN_INFO |
2260 | "...trying to set up timer as Virtual Wire IRQ...\n"); | ||
2257 | 2261 | ||
2258 | lapic_register_intr(0, vector); | 2262 | lapic_register_intr(0, vector); |
2259 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2263 | apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2260 | enable_8259A_irq(0); | 2264 | enable_8259A_irq(0); |
2261 | 2265 | ||
2262 | if (timer_irq_works()) { | 2266 | if (timer_irq_works()) { |
2263 | printk(" works.\n"); | 2267 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2264 | goto out; | 2268 | goto out; |
2265 | } | 2269 | } |
2266 | disable_8259A_irq(0); | 2270 | disable_8259A_irq(0); |
2267 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | 2271 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); |
2268 | printk(" failed.\n"); | 2272 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
2269 | 2273 | ||
2270 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 2274 | apic_printk(APIC_QUIET, KERN_INFO |
2275 | "...trying to set up timer as ExtINT IRQ...\n"); | ||
2271 | 2276 | ||
2272 | init_8259A(0); | 2277 | init_8259A(0); |
2273 | make_8259A_irq(0); | 2278 | make_8259A_irq(0); |
2274 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 2279 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
2275 | 2280 | ||
2276 | unlock_ExtINT_logic(); | 2281 | unlock_ExtINT_logic(); |
2277 | 2282 | ||
2278 | if (timer_irq_works()) { | 2283 | if (timer_irq_works()) { |
2279 | printk(" works.\n"); | 2284 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2280 | goto out; | 2285 | goto out; |
2281 | } | 2286 | } |
2282 | printk(" failed :(.\n"); | 2287 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2283 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 2288 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
2284 | "report. Then try booting with the 'noapic' option"); | 2289 | "report. Then try booting with the 'noapic' option.\n"); |
2285 | out: | 2290 | out: |
2286 | local_irq_restore(flags); | 2291 | local_irq_restore(flags); |
2287 | } | 2292 | } |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 6510cde36b35..8269434d1707 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/proto.h> | 45 | #include <asm/proto.h> |
46 | #include <asm/acpi.h> | 46 | #include <asm/acpi.h> |
47 | #include <asm/dma.h> | 47 | #include <asm/dma.h> |
48 | #include <asm/i8259.h> | ||
48 | #include <asm/nmi.h> | 49 | #include <asm/nmi.h> |
49 | #include <asm/msidef.h> | 50 | #include <asm/msidef.h> |
50 | #include <asm/hypertransport.h> | 51 | #include <asm/hypertransport.h> |
@@ -731,7 +732,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
731 | return 0; | 732 | return 0; |
732 | } | 733 | } |
733 | 734 | ||
734 | for_each_cpu_mask(cpu, mask) { | 735 | for_each_cpu_mask_nr(cpu, mask) { |
735 | cpumask_t domain, new_mask; | 736 | cpumask_t domain, new_mask; |
736 | int new_cpu; | 737 | int new_cpu; |
737 | int vector, offset; | 738 | int vector, offset; |
@@ -752,7 +753,7 @@ next: | |||
752 | continue; | 753 | continue; |
753 | if (vector == IA32_SYSCALL_VECTOR) | 754 | if (vector == IA32_SYSCALL_VECTOR) |
754 | goto next; | 755 | goto next; |
755 | for_each_cpu_mask(new_cpu, new_mask) | 756 | for_each_cpu_mask_nr(new_cpu, new_mask) |
756 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 757 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
757 | goto next; | 758 | goto next; |
758 | /* Found one! */ | 759 | /* Found one! */ |
@@ -762,7 +763,7 @@ next: | |||
762 | cfg->move_in_progress = 1; | 763 | cfg->move_in_progress = 1; |
763 | cfg->old_domain = cfg->domain; | 764 | cfg->old_domain = cfg->domain; |
764 | } | 765 | } |
765 | for_each_cpu_mask(new_cpu, new_mask) | 766 | for_each_cpu_mask_nr(new_cpu, new_mask) |
766 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 767 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
767 | cfg->vector = vector; | 768 | cfg->vector = vector; |
768 | cfg->domain = domain; | 769 | cfg->domain = domain; |
@@ -794,7 +795,7 @@ static void __clear_irq_vector(int irq) | |||
794 | 795 | ||
795 | vector = cfg->vector; | 796 | vector = cfg->vector; |
796 | cpus_and(mask, cfg->domain, cpu_online_map); | 797 | cpus_and(mask, cfg->domain, cpu_online_map); |
797 | for_each_cpu_mask(cpu, mask) | 798 | for_each_cpu_mask_nr(cpu, mask) |
798 | per_cpu(vector_irq, cpu)[vector] = -1; | 799 | per_cpu(vector_irq, cpu)[vector] = -1; |
799 | 800 | ||
800 | cfg->vector = 0; | 801 | cfg->vector = 0; |
@@ -1372,12 +1373,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
1372 | static int ioapic_retrigger_irq(unsigned int irq) | 1373 | static int ioapic_retrigger_irq(unsigned int irq) |
1373 | { | 1374 | { |
1374 | struct irq_cfg *cfg = &irq_cfg[irq]; | 1375 | struct irq_cfg *cfg = &irq_cfg[irq]; |
1375 | cpumask_t mask; | ||
1376 | unsigned long flags; | 1376 | unsigned long flags; |
1377 | 1377 | ||
1378 | spin_lock_irqsave(&vector_lock, flags); | 1378 | spin_lock_irqsave(&vector_lock, flags); |
1379 | mask = cpumask_of_cpu(first_cpu(cfg->domain)); | 1379 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); |
1380 | send_IPI_mask(mask, cfg->vector); | ||
1381 | spin_unlock_irqrestore(&vector_lock, flags); | 1380 | spin_unlock_irqrestore(&vector_lock, flags); |
1382 | 1381 | ||
1383 | return 1; | 1382 | return 1; |
@@ -1696,8 +1695,9 @@ static inline void __init check_timer(void) | |||
1696 | pin2 = ioapic_i8259.pin; | 1695 | pin2 = ioapic_i8259.pin; |
1697 | apic2 = ioapic_i8259.apic; | 1696 | apic2 = ioapic_i8259.apic; |
1698 | 1697 | ||
1699 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 1698 | apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " |
1700 | cfg->vector, apic1, pin1, apic2, pin2); | 1699 | "apic1=%d pin1=%d apic2=%d pin2=%d\n", |
1700 | cfg->vector, apic1, pin1, apic2, pin2); | ||
1701 | 1701 | ||
1702 | /* | 1702 | /* |
1703 | * Some BIOS writers are clueless and report the ExtINTA | 1703 | * Some BIOS writers are clueless and report the ExtINTA |
@@ -1735,14 +1735,13 @@ static inline void __init check_timer(void) | |||
1735 | } | 1735 | } |
1736 | clear_IO_APIC_pin(apic1, pin1); | 1736 | clear_IO_APIC_pin(apic1, pin1); |
1737 | if (!no_pin1) | 1737 | if (!no_pin1) |
1738 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " | 1738 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
1739 | "8254 timer not connected to IO-APIC\n"); | 1739 | "8254 timer not connected to IO-APIC\n"); |
1740 | 1740 | ||
1741 | apic_printk(APIC_VERBOSE,KERN_INFO | 1741 | apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " |
1742 | "...trying to set up timer (IRQ0) " | 1742 | "(IRQ0) through the 8259A ...\n"); |
1743 | "through the 8259A ... "); | 1743 | apic_printk(APIC_QUIET, KERN_INFO |
1744 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", | 1744 | "..... (found apic %d pin %d) ...\n", apic2, pin2); |
1745 | apic2, pin2); | ||
1746 | /* | 1745 | /* |
1747 | * legacy devices should be connected to IO APIC #0 | 1746 | * legacy devices should be connected to IO APIC #0 |
1748 | */ | 1747 | */ |
@@ -1751,7 +1750,7 @@ static inline void __init check_timer(void) | |||
1751 | unmask_IO_APIC_irq(0); | 1750 | unmask_IO_APIC_irq(0); |
1752 | enable_8259A_irq(0); | 1751 | enable_8259A_irq(0); |
1753 | if (timer_irq_works()) { | 1752 | if (timer_irq_works()) { |
1754 | apic_printk(APIC_VERBOSE," works.\n"); | 1753 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
1755 | timer_through_8259 = 1; | 1754 | timer_through_8259 = 1; |
1756 | if (nmi_watchdog == NMI_IO_APIC) { | 1755 | if (nmi_watchdog == NMI_IO_APIC) { |
1757 | disable_8259A_irq(0); | 1756 | disable_8259A_irq(0); |
@@ -1765,29 +1764,32 @@ static inline void __init check_timer(void) | |||
1765 | */ | 1764 | */ |
1766 | disable_8259A_irq(0); | 1765 | disable_8259A_irq(0); |
1767 | clear_IO_APIC_pin(apic2, pin2); | 1766 | clear_IO_APIC_pin(apic2, pin2); |
1768 | apic_printk(APIC_VERBOSE," failed.\n"); | 1767 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
1769 | } | 1768 | } |
1770 | 1769 | ||
1771 | if (nmi_watchdog == NMI_IO_APIC) { | 1770 | if (nmi_watchdog == NMI_IO_APIC) { |
1772 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 1771 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " |
1772 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
1773 | nmi_watchdog = NMI_NONE; | 1773 | nmi_watchdog = NMI_NONE; |
1774 | } | 1774 | } |
1775 | 1775 | ||
1776 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 1776 | apic_printk(APIC_QUIET, KERN_INFO |
1777 | "...trying to set up timer as Virtual Wire IRQ...\n"); | ||
1777 | 1778 | ||
1778 | lapic_register_intr(0); | 1779 | lapic_register_intr(0); |
1779 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 1780 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
1780 | enable_8259A_irq(0); | 1781 | enable_8259A_irq(0); |
1781 | 1782 | ||
1782 | if (timer_irq_works()) { | 1783 | if (timer_irq_works()) { |
1783 | apic_printk(APIC_VERBOSE," works.\n"); | 1784 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
1784 | goto out; | 1785 | goto out; |
1785 | } | 1786 | } |
1786 | disable_8259A_irq(0); | 1787 | disable_8259A_irq(0); |
1787 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 1788 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
1788 | apic_printk(APIC_VERBOSE," failed.\n"); | 1789 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
1789 | 1790 | ||
1790 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 1791 | apic_printk(APIC_QUIET, KERN_INFO |
1792 | "...trying to set up timer as ExtINT IRQ...\n"); | ||
1791 | 1793 | ||
1792 | init_8259A(0); | 1794 | init_8259A(0); |
1793 | make_8259A_irq(0); | 1795 | make_8259A_irq(0); |
@@ -1796,11 +1798,12 @@ static inline void __init check_timer(void) | |||
1796 | unlock_ExtINT_logic(); | 1798 | unlock_ExtINT_logic(); |
1797 | 1799 | ||
1798 | if (timer_irq_works()) { | 1800 | if (timer_irq_works()) { |
1799 | apic_printk(APIC_VERBOSE," works.\n"); | 1801 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
1800 | goto out; | 1802 | goto out; |
1801 | } | 1803 | } |
1802 | apic_printk(APIC_VERBOSE," failed :(.\n"); | 1804 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
1803 | panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); | 1805 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
1806 | "report. Then try booting with the 'noapic' option.\n"); | ||
1804 | out: | 1807 | out: |
1805 | local_irq_restore(flags); | 1808 | local_irq_restore(flags); |
1806 | } | 1809 | } |
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 5921e5f0a640..1c3a66a67f83 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
@@ -103,6 +103,9 @@ void __init io_delay_init(void) | |||
103 | 103 | ||
104 | static int __init io_delay_param(char *s) | 104 | static int __init io_delay_param(char *s) |
105 | { | 105 | { |
106 | if (!s) | ||
107 | return -EINVAL; | ||
108 | |||
106 | if (!strcmp(s, "0x80")) | 109 | if (!strcmp(s, "0x80")) |
107 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; | 110 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; |
108 | else if (!strcmp(s, "0xed")) | 111 | else if (!strcmp(s, "0xed")) |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 9d98cda39ad9..3f7537b669d3 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) | |||
70 | /* | 70 | /* |
71 | * Send the IPI. The write to APIC_ICR fires this off. | 71 | * Send the IPI. The write to APIC_ICR fires this off. |
72 | */ | 72 | */ |
73 | apic_write_around(APIC_ICR, cfg); | 73 | apic_write(APIC_ICR, cfg); |
74 | } | 74 | } |
75 | 75 | ||
76 | void send_IPI_self(int vector) | 76 | void send_IPI_self(int vector) |
@@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
98 | * prepare target chip field | 98 | * prepare target chip field |
99 | */ | 99 | */ |
100 | cfg = __prepare_ICR2(mask); | 100 | cfg = __prepare_ICR2(mask); |
101 | apic_write_around(APIC_ICR2, cfg); | 101 | apic_write(APIC_ICR2, cfg); |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * program the ICR | 104 | * program the ICR |
@@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
108 | /* | 108 | /* |
109 | * Send the IPI. The write to APIC_ICR fires this off. | 109 | * Send the IPI. The write to APIC_ICR fires this off. |
110 | */ | 110 | */ |
111 | apic_write_around(APIC_ICR, cfg); | 111 | apic_write(APIC_ICR, cfg); |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | 114 | /* |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 47a6f6f12478..1cf8c1fcc088 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -83,11 +83,8 @@ union irq_ctx { | |||
83 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; | 83 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; |
84 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | 84 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; |
85 | 85 | ||
86 | static char softirq_stack[NR_CPUS * THREAD_SIZE] | 86 | static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
87 | __attribute__((__section__(".bss.page_aligned"))); | 87 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
88 | |||
89 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] | ||
90 | __attribute__((__section__(".bss.page_aligned"))); | ||
91 | 88 | ||
92 | static void call_on_stack(void *func, void *stack) | 89 | static void call_on_stack(void *func, void *stack) |
93 | { | 90 | { |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0373e88de95a..1f26fd9ec4f4 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -43,10 +43,11 @@ | |||
43 | 43 | ||
44 | #define BUILD_IRQ(nr) \ | 44 | #define BUILD_IRQ(nr) \ |
45 | asmlinkage void IRQ_NAME(nr); \ | 45 | asmlinkage void IRQ_NAME(nr); \ |
46 | asm("\n.p2align\n" \ | 46 | asm("\n.text\n.p2align\n" \ |
47 | "IRQ" #nr "_interrupt:\n\t" \ | 47 | "IRQ" #nr "_interrupt:\n\t" \ |
48 | "push $~(" #nr ") ; " \ | 48 | "push $~(" #nr ") ; " \ |
49 | "jmp common_interrupt"); | 49 | "jmp common_interrupt\n" \ |
50 | ".previous"); | ||
50 | 51 | ||
51 | #define BI(x,y) \ | 52 | #define BI(x,y) \ |
52 | BUILD_IRQ(x##y) | 53 | BUILD_IRQ(x##y) |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index c03205991718..f2d43bc75514 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -12,9 +12,13 @@ | |||
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/module.h> | ||
15 | 16 | ||
16 | #include <asm/setup.h> | 17 | #include <asm/setup.h> |
17 | 18 | ||
19 | struct dentry *arch_debugfs_dir; | ||
20 | EXPORT_SYMBOL(arch_debugfs_dir); | ||
21 | |||
18 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 22 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
19 | struct setup_data_node { | 23 | struct setup_data_node { |
20 | u64 paddr; | 24 | u64 paddr; |
@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void) | |||
209 | { | 213 | { |
210 | int error = 0; | 214 | int error = 0; |
211 | 215 | ||
216 | arch_debugfs_dir = debugfs_create_dir("x86", NULL); | ||
217 | if (!arch_debugfs_dir) | ||
218 | return -ENOMEM; | ||
219 | |||
212 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 220 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
213 | error = boot_params_kdebugfs_init(); | 221 | error = boot_params_kdebugfs_init(); |
214 | #endif | 222 | #endif |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b8c6743a13da..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
431 | regs->ip = (unsigned long)p->ainsn.insn; | 431 | regs->ip = (unsigned long)p->ainsn.insn; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* Called with kretprobe_lock held */ | ||
435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 434 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
436 | struct pt_regs *regs) | 435 | struct pt_regs *regs) |
437 | { | 436 | { |
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
682 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 681 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
683 | 682 | ||
684 | INIT_HLIST_HEAD(&empty_rp); | 683 | INIT_HLIST_HEAD(&empty_rp); |
685 | spin_lock_irqsave(&kretprobe_lock, flags); | 684 | kretprobe_hash_lock(current, &head, &flags); |
686 | head = kretprobe_inst_table_head(current); | ||
687 | /* fixup registers */ | 685 | /* fixup registers */ |
688 | #ifdef CONFIG_X86_64 | 686 | #ifdef CONFIG_X86_64 |
689 | regs->cs = __KERNEL_CS; | 687 | regs->cs = __KERNEL_CS; |
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
732 | 730 | ||
733 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 731 | kretprobe_assert(ri, orig_ret_address, trampoline_address); |
734 | 732 | ||
735 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 733 | kretprobe_hash_unlock(current, &flags); |
736 | 734 | ||
737 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 735 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
738 | hlist_del(&ri->hlist); | 736 | hlist_del(&ri->hlist); |
@@ -860,7 +858,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) | |||
860 | 858 | ||
861 | resume_execution(cur, regs, kcb); | 859 | resume_execution(cur, regs, kcb); |
862 | regs->flags |= kcb->kprobe_saved_flags; | 860 | regs->flags |= kcb->kprobe_saved_flags; |
863 | trace_hardirqs_fixup_flags(regs->flags); | ||
864 | 861 | ||
865 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | 862 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { |
866 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | 863 | kcb->kprobe_status = KPROBE_HIT_SSDONE; |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 87edf1ceb1df..d02def06ca91 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void) | |||
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | #ifdef CONFIG_SMP | 115 | #ifdef CONFIG_SMP |
116 | void __init kvm_smp_prepare_boot_cpu(void) | 116 | static void __init kvm_smp_prepare_boot_cpu(void) |
117 | { | 117 | { |
118 | WARN_ON(kvm_register_clock("primary cpu clock")); | 118 | WARN_ON(kvm_register_clock("primary cpu clock")); |
119 | native_smp_prepare_boot_cpu(); | 119 | native_smp_prepare_boot_cpu(); |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858a..3fee2aa50f3f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -62,12 +62,12 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
62 | 62 | ||
63 | if (reload) { | 63 | if (reload) { |
64 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
65 | cpumask_t mask; | 65 | cpumask_of_cpu_ptr_declare(mask); |
66 | 66 | ||
67 | preempt_disable(); | 67 | preempt_disable(); |
68 | load_LDT(pc); | 68 | load_LDT(pc); |
69 | mask = cpumask_of_cpu(smp_processor_id()); | 69 | cpumask_of_cpu_ptr_next(mask, smp_processor_id()); |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 70 | if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) |
71 | smp_call_function(flush_ldt, current->mm, 1); | 71 | smp_call_function(flush_ldt, current->mm, 1); |
72 | preempt_enable(); | 72 | preempt_enable(); |
73 | #else | 73 | #else |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..9fe478d98406 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/system.h> | 24 | #include <asm/system.h> |
25 | #include <asm/cacheflush.h> | ||
25 | 26 | ||
26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 27 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 28 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
@@ -85,10 +86,12 @@ static void load_segments(void) | |||
85 | * reboot code buffer to allow us to avoid allocations | 86 | * reboot code buffer to allow us to avoid allocations |
86 | * later. | 87 | * later. |
87 | * | 88 | * |
88 | * Currently nothing. | 89 | * Make control page executable. |
89 | */ | 90 | */ |
90 | int machine_kexec_prepare(struct kimage *image) | 91 | int machine_kexec_prepare(struct kimage *image) |
91 | { | 92 | { |
93 | if (nx_enabled) | ||
94 | set_pages_x(image->control_code_page, 1); | ||
92 | return 0; | 95 | return 0; |
93 | } | 96 | } |
94 | 97 | ||
@@ -98,27 +101,48 @@ int machine_kexec_prepare(struct kimage *image) | |||
98 | */ | 101 | */ |
99 | void machine_kexec_cleanup(struct kimage *image) | 102 | void machine_kexec_cleanup(struct kimage *image) |
100 | { | 103 | { |
104 | if (nx_enabled) | ||
105 | set_pages_nx(image->control_code_page, 1); | ||
101 | } | 106 | } |
102 | 107 | ||
103 | /* | 108 | /* |
104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 109 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
105 | * We are past the point of no return, committed to rebooting now. | 110 | * We are past the point of no return, committed to rebooting now. |
106 | */ | 111 | */ |
107 | NORET_TYPE void machine_kexec(struct kimage *image) | 112 | void machine_kexec(struct kimage *image) |
108 | { | 113 | { |
109 | unsigned long page_list[PAGES_NR]; | 114 | unsigned long page_list[PAGES_NR]; |
110 | void *control_page; | 115 | void *control_page; |
116 | asmlinkage unsigned long | ||
117 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
118 | unsigned long control_page, | ||
119 | unsigned long start_address, | ||
120 | unsigned int has_pae, | ||
121 | unsigned int preserve_context); | ||
111 | 122 | ||
112 | tracer_disable(); | 123 | tracer_disable(); |
113 | 124 | ||
114 | /* Interrupts aren't acceptable while we reboot */ | 125 | /* Interrupts aren't acceptable while we reboot */ |
115 | local_irq_disable(); | 126 | local_irq_disable(); |
116 | 127 | ||
128 | if (image->preserve_context) { | ||
129 | #ifdef CONFIG_X86_IO_APIC | ||
130 | /* We need to put APICs in legacy mode so that we can | ||
131 | * get timer interrupts in second kernel. kexec/kdump | ||
132 | * paths already have calls to disable_IO_APIC() in | ||
133 | * one form or other. kexec jump path also need | ||
134 | * one. | ||
135 | */ | ||
136 | disable_IO_APIC(); | ||
137 | #endif | ||
138 | } | ||
139 | |||
117 | control_page = page_address(image->control_code_page); | 140 | control_page = page_address(image->control_code_page); |
118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 141 | memcpy(control_page, relocate_kernel, PAGE_SIZE/2); |
119 | 142 | ||
143 | relocate_kernel_ptr = control_page; | ||
120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 144 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 145 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
122 | page_list[PA_PGD] = __pa(kexec_pgd); | 146 | page_list[PA_PGD] = __pa(kexec_pgd); |
123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 147 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
124 | #ifdef CONFIG_X86_PAE | 148 | #ifdef CONFIG_X86_PAE |
@@ -131,6 +155,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 155 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 156 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 157 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
158 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
134 | 159 | ||
135 | /* The segment registers are funny things, they have both a | 160 | /* The segment registers are funny things, they have both a |
136 | * visible and an invisible part. Whenever the visible part is | 161 | * visible and an invisible part. Whenever the visible part is |
@@ -149,8 +174,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
149 | set_idt(phys_to_virt(0),0); | 174 | set_idt(phys_to_virt(0),0); |
150 | 175 | ||
151 | /* now call it */ | 176 | /* now call it */ |
152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 177 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
153 | image->start, cpu_has_pae); | 178 | (unsigned long)page_list, |
179 | image->start, cpu_has_pae, | ||
180 | image->preserve_context); | ||
154 | } | 181 | } |
155 | 182 | ||
156 | void arch_crash_save_vmcoreinfo(void) | 183 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
183 | */ | 183 | */ |
184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
185 | { | 185 | { |
186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 56b933119a04..6994c751590e 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -388,6 +388,7 @@ static int do_microcode_update (void) | |||
388 | void *new_mc = NULL; | 388 | void *new_mc = NULL; |
389 | int cpu; | 389 | int cpu; |
390 | cpumask_t old; | 390 | cpumask_t old; |
391 | cpumask_of_cpu_ptr_declare(newmask); | ||
391 | 392 | ||
392 | old = current->cpus_allowed; | 393 | old = current->cpus_allowed; |
393 | 394 | ||
@@ -404,7 +405,8 @@ static int do_microcode_update (void) | |||
404 | 405 | ||
405 | if (!uci->valid) | 406 | if (!uci->valid) |
406 | continue; | 407 | continue; |
407 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 408 | cpumask_of_cpu_ptr_next(newmask, cpu); |
409 | set_cpus_allowed_ptr(current, newmask); | ||
408 | error = get_maching_microcode(new_mc, cpu); | 410 | error = get_maching_microcode(new_mc, cpu); |
409 | if (error < 0) | 411 | if (error < 0) |
410 | goto out; | 412 | goto out; |
@@ -574,6 +576,7 @@ static int apply_microcode_check_cpu(int cpu) | |||
574 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 576 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
575 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 577 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
576 | cpumask_t old; | 578 | cpumask_t old; |
579 | cpumask_of_cpu_ptr(newmask, cpu); | ||
577 | unsigned int val[2]; | 580 | unsigned int val[2]; |
578 | int err = 0; | 581 | int err = 0; |
579 | 582 | ||
@@ -582,7 +585,7 @@ static int apply_microcode_check_cpu(int cpu) | |||
582 | return 0; | 585 | return 0; |
583 | 586 | ||
584 | old = current->cpus_allowed; | 587 | old = current->cpus_allowed; |
585 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 588 | set_cpus_allowed_ptr(current, newmask); |
586 | 589 | ||
587 | /* Check if the microcode we have in memory matches the CPU */ | 590 | /* Check if the microcode we have in memory matches the CPU */ |
588 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | 591 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || |
@@ -620,11 +623,12 @@ static int apply_microcode_check_cpu(int cpu) | |||
620 | static void microcode_init_cpu(int cpu, int resume) | 623 | static void microcode_init_cpu(int cpu, int resume) |
621 | { | 624 | { |
622 | cpumask_t old; | 625 | cpumask_t old; |
626 | cpumask_of_cpu_ptr(newmask, cpu); | ||
623 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 627 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
624 | 628 | ||
625 | old = current->cpus_allowed; | 629 | old = current->cpus_allowed; |
626 | 630 | ||
627 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 631 | set_cpus_allowed_ptr(current, newmask); |
628 | mutex_lock(µcode_mutex); | 632 | mutex_lock(µcode_mutex); |
629 | collect_cpu_info(cpu); | 633 | collect_cpu_info(cpu); |
630 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) | 634 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) |
@@ -644,7 +648,9 @@ static void microcode_fini_cpu(int cpu) | |||
644 | mutex_unlock(µcode_mutex); | 648 | mutex_unlock(µcode_mutex); |
645 | } | 649 | } |
646 | 650 | ||
647 | static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | 651 | static ssize_t reload_store(struct sys_device *dev, |
652 | struct sysdev_attribute *attr, | ||
653 | const char *buf, size_t sz) | ||
648 | { | 654 | { |
649 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 655 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
650 | char *end; | 656 | char *end; |
@@ -656,11 +662,12 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | |||
656 | return -EINVAL; | 662 | return -EINVAL; |
657 | if (val == 1) { | 663 | if (val == 1) { |
658 | cpumask_t old; | 664 | cpumask_t old; |
665 | cpumask_of_cpu_ptr(newmask, cpu); | ||
659 | 666 | ||
660 | old = current->cpus_allowed; | 667 | old = current->cpus_allowed; |
661 | 668 | ||
662 | get_online_cpus(); | 669 | get_online_cpus(); |
663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 670 | set_cpus_allowed_ptr(current, newmask); |
664 | 671 | ||
665 | mutex_lock(µcode_mutex); | 672 | mutex_lock(µcode_mutex); |
666 | if (uci->valid) | 673 | if (uci->valid) |
@@ -674,14 +681,16 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | |||
674 | return sz; | 681 | return sz; |
675 | } | 682 | } |
676 | 683 | ||
677 | static ssize_t version_show(struct sys_device *dev, char *buf) | 684 | static ssize_t version_show(struct sys_device *dev, |
685 | struct sysdev_attribute *attr, char *buf) | ||
678 | { | 686 | { |
679 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 687 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
680 | 688 | ||
681 | return sprintf(buf, "0x%x\n", uci->rev); | 689 | return sprintf(buf, "0x%x\n", uci->rev); |
682 | } | 690 | } |
683 | 691 | ||
684 | static ssize_t pf_show(struct sys_device *dev, char *buf) | 692 | static ssize_t pf_show(struct sys_device *dev, |
693 | struct sysdev_attribute *attr, char *buf) | ||
685 | { | 694 | { |
686 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 695 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
687 | 696 | ||
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index a888e67f5874..6ba87830d4b1 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/mm.h> | ||
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <linux/bug.h> | 27 | #include <linux/bug.h> |
27 | 28 | ||
@@ -150,7 +151,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
150 | const Elf_Shdr *sechdrs, | 151 | const Elf_Shdr *sechdrs, |
151 | struct module *me) | 152 | struct module *me) |
152 | { | 153 | { |
153 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; | 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, |
155 | *para = NULL; | ||
154 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 156 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
155 | 157 | ||
156 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 158 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
@@ -160,6 +162,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
160 | alt = s; | 162 | alt = s; |
161 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
162 | locks= s; | 164 | locks= s; |
165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | ||
166 | para = s; | ||
163 | } | 167 | } |
164 | 168 | ||
165 | if (alt) { | 169 | if (alt) { |
@@ -175,6 +179,11 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
175 | tseg, tseg + text->sh_size); | 179 | tseg, tseg + text->sh_size); |
176 | } | 180 | } |
177 | 181 | ||
182 | if (para) { | ||
183 | void *pseg = (void *)para->sh_addr; | ||
184 | apply_paravirt(pseg, pseg + para->sh_size); | ||
185 | } | ||
186 | |||
178 | return module_bug_finalize(hdr, sechdrs, me); | 187 | return module_bug_finalize(hdr, sechdrs, me); |
179 | } | 188 | } |
180 | 189 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..6ae005ccaed8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
29 | #include <asm/trampoline.h> | 29 | #include <asm/trampoline.h> |
30 | #include <asm/setup.h> | ||
30 | 31 | ||
31 | #include <mach_apic.h> | 32 | #include <mach_apic.h> |
32 | #ifdef CONFIG_X86_32 | 33 | #ifdef CONFIG_X86_32 |
@@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
48 | return sum & 0xFF; | 49 | return sum & 0xFF; |
49 | } | 50 | } |
50 | 51 | ||
51 | #ifdef CONFIG_X86_NUMAQ | ||
52 | int found_numaq; | ||
53 | /* | ||
54 | * Have to match translation table entries to main table entries by counter | ||
55 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
56 | * doing this .... | ||
57 | */ | ||
58 | struct mpc_config_translation { | ||
59 | unsigned char mpc_type; | ||
60 | unsigned char trans_len; | ||
61 | unsigned char trans_type; | ||
62 | unsigned char trans_quad; | ||
63 | unsigned char trans_global; | ||
64 | unsigned char trans_local; | ||
65 | unsigned short trans_reserved; | ||
66 | }; | ||
67 | |||
68 | |||
69 | static int mpc_record; | ||
70 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
71 | __cpuinitdata; | ||
72 | |||
73 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
74 | { | ||
75 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
76 | } | ||
77 | |||
78 | |||
79 | static inline int mpc_apic_id(struct mpc_config_processor *m, | ||
80 | struct mpc_config_translation *translation_record) | ||
81 | { | ||
82 | int quad = translation_record->trans_quad; | ||
83 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
84 | |||
85 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
86 | m->mpc_apicid, | ||
87 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
88 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
89 | m->mpc_apicver, quad, logical_apicid); | ||
90 | return logical_apicid; | ||
91 | } | ||
92 | |||
93 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
94 | |||
95 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
96 | |||
97 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
98 | struct mpc_config_translation *translation) | ||
99 | { | ||
100 | int quad = translation->trans_quad; | ||
101 | int local = translation->trans_local; | ||
102 | |||
103 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
104 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
105 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
106 | m->mpc_busid, name, quad); | ||
107 | } | ||
108 | |||
109 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
110 | static void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
111 | struct mpc_config_translation *translation) | ||
112 | { | ||
113 | int quad = translation->trans_quad; | ||
114 | int local = translation->trans_local; | ||
115 | |||
116 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
117 | } | ||
118 | |||
119 | #endif | ||
120 | |||
121 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | 52 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) |
122 | { | 53 | { |
123 | int apicid; | 54 | int apicid; |
@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
127 | disabled_cpus++; | 58 | disabled_cpus++; |
128 | return; | 59 | return; |
129 | } | 60 | } |
130 | #ifdef CONFIG_X86_NUMAQ | 61 | |
131 | if (found_numaq) | 62 | if (x86_quirks->mpc_apic_id) |
132 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | 63 | apicid = x86_quirks->mpc_apic_id(m); |
133 | else | 64 | else |
134 | apicid = m->mpc_apicid; | 65 | apicid = m->mpc_apicid; |
135 | #else | 66 | |
136 | apicid = m->mpc_apicid; | ||
137 | #endif | ||
138 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 67 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
139 | bootup_cpu = " (Bootup-CPU)"; | 68 | bootup_cpu = " (Bootup-CPU)"; |
140 | boot_cpu_physical_apicid = m->mpc_apicid; | 69 | boot_cpu_physical_apicid = m->mpc_apicid; |
@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
151 | memcpy(str, m->mpc_bustype, 6); | 80 | memcpy(str, m->mpc_bustype, 6); |
152 | str[6] = 0; | 81 | str[6] = 0; |
153 | 82 | ||
154 | #ifdef CONFIG_X86_NUMAQ | 83 | if (x86_quirks->mpc_oem_bus_info) |
155 | if (found_numaq) | 84 | x86_quirks->mpc_oem_bus_info(m, str); |
156 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 85 | else |
157 | #else | 86 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); |
158 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); | ||
159 | #endif | ||
160 | 87 | ||
161 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
162 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 89 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
173 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
174 | #endif | 101 | #endif |
175 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
176 | #ifdef CONFIG_X86_NUMAQ | 103 | if (x86_quirks->mpc_oem_pci_bus) |
177 | if (found_numaq) | 104 | x86_quirks->mpc_oem_pci_bus(m); |
178 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | 105 | |
179 | #endif | ||
180 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 106 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
181 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) |
182 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
@@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | |||
316 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | 242 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
317 | } | 243 | } |
318 | 244 | ||
319 | #ifdef CONFIG_X86_NUMAQ | ||
320 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
321 | { | ||
322 | printk(KERN_INFO | ||
323 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
324 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
325 | m->trans_local); | ||
326 | |||
327 | if (mpc_record >= MAX_MPC_ENTRY) | ||
328 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
329 | else | ||
330 | translation_table[mpc_record] = m; /* stash this for later */ | ||
331 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
332 | node_set_online(m->trans_quad); | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Read/parse the MPC oem tables | ||
337 | */ | ||
338 | |||
339 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
340 | unsigned short oemsize) | ||
341 | { | ||
342 | int count = sizeof(*oemtable); /* the header size */ | ||
343 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
344 | |||
345 | mpc_record = 0; | ||
346 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
347 | oemtable); | ||
348 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
349 | printk(KERN_WARNING | ||
350 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
351 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
352 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
353 | return; | ||
354 | } | ||
355 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
356 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
357 | return; | ||
358 | } | ||
359 | while (count < oemtable->oem_length) { | ||
360 | switch (*oemptr) { | ||
361 | case MP_TRANSLATION: | ||
362 | { | ||
363 | struct mpc_config_translation *m = | ||
364 | (struct mpc_config_translation *)oemptr; | ||
365 | MP_translation_info(m); | ||
366 | oemptr += sizeof(*m); | ||
367 | count += sizeof(*m); | ||
368 | ++mpc_record; | ||
369 | break; | ||
370 | } | ||
371 | default: | ||
372 | { | ||
373 | printk(KERN_WARNING | ||
374 | "Unrecognised OEM table entry type! - %d\n", | ||
375 | (int)*oemptr); | ||
376 | return; | ||
377 | } | ||
378 | } | ||
379 | } | ||
380 | } | ||
381 | |||
382 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
383 | char *productid) | ||
384 | { | ||
385 | if (strncmp(oem, "IBM NUMA", 8)) | ||
386 | printk("Warning! Not a NUMA-Q system!\n"); | ||
387 | else | ||
388 | found_numaq = 1; | ||
389 | |||
390 | if (mpc->mpc_oemptr) | ||
391 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | ||
392 | mpc->mpc_oemsize); | ||
393 | } | ||
394 | #endif /* CONFIG_X86_NUMAQ */ | ||
395 | |||
396 | /* | 245 | /* |
397 | * Read/parse the MPC | 246 | * Read/parse the MPC |
398 | */ | 247 | */ |
@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
457 | } else | 306 | } else |
458 | mps_oem_check(mpc, oem, str); | 307 | mps_oem_check(mpc, oem, str); |
459 | #endif | 308 | #endif |
460 | |||
461 | /* save the local APIC address, it might be non-default */ | 309 | /* save the local APIC address, it might be non-default */ |
462 | if (!acpi_lapic) | 310 | if (!acpi_lapic) |
463 | mp_lapic_addr = mpc->mpc_lapic; | 311 | mp_lapic_addr = mpc->mpc_lapic; |
@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
465 | if (early) | 313 | if (early) |
466 | return 1; | 314 | return 1; |
467 | 315 | ||
316 | if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { | ||
317 | struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; | ||
318 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); | ||
319 | } | ||
320 | |||
468 | /* | 321 | /* |
469 | * Now process the configuration blocks. | 322 | * Now process the configuration blocks. |
470 | */ | 323 | */ |
471 | #ifdef CONFIG_X86_NUMAQ | 324 | if (x86_quirks->mpc_record) |
472 | mpc_record = 0; | 325 | *x86_quirks->mpc_record = 0; |
473 | #endif | 326 | |
474 | while (count < mpc->mpc_length) { | 327 | while (count < mpc->mpc_length) { |
475 | switch (*mpt) { | 328 | switch (*mpt) { |
476 | case MP_PROCESSOR: | 329 | case MP_PROCESSOR: |
@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
536 | count = mpc->mpc_length; | 389 | count = mpc->mpc_length; |
537 | break; | 390 | break; |
538 | } | 391 | } |
539 | #ifdef CONFIG_X86_NUMAQ | 392 | if (x86_quirks->mpc_record) |
540 | ++mpc_record; | 393 | (*x86_quirks->mpc_record)++; |
541 | #endif | ||
542 | } | 394 | } |
543 | 395 | ||
544 | #ifdef CONFIG_X86_GENERICARCH | 396 | #ifdef CONFIG_X86_GENERICARCH |
@@ -726,20 +578,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
726 | static struct intel_mp_floating *mpf_found; | 578 | static struct intel_mp_floating *mpf_found; |
727 | 579 | ||
728 | /* | 580 | /* |
729 | * Machine specific quirk for finding the SMP config before other setup | ||
730 | * activities destroy the table: | ||
731 | */ | ||
732 | int (*mach_get_smp_config_quirk)(unsigned int early); | ||
733 | |||
734 | /* | ||
735 | * Scan the memory blocks for an SMP configuration block. | 581 | * Scan the memory blocks for an SMP configuration block. |
736 | */ | 582 | */ |
737 | static void __init __get_smp_config(unsigned int early) | 583 | static void __init __get_smp_config(unsigned int early) |
738 | { | 584 | { |
739 | struct intel_mp_floating *mpf = mpf_found; | 585 | struct intel_mp_floating *mpf = mpf_found; |
740 | 586 | ||
741 | if (mach_get_smp_config_quirk) { | 587 | if (x86_quirks->mach_get_smp_config) { |
742 | if (mach_get_smp_config_quirk(early)) | 588 | if (x86_quirks->mach_get_smp_config(early)) |
743 | return; | 589 | return; |
744 | } | 590 | } |
745 | if (acpi_lapic && early) | 591 | if (acpi_lapic && early) |
@@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
899 | return 0; | 745 | return 0; |
900 | } | 746 | } |
901 | 747 | ||
902 | int (*mach_find_smp_config_quirk)(unsigned int reserve); | ||
903 | |||
904 | static void __init __find_smp_config(unsigned int reserve) | 748 | static void __init __find_smp_config(unsigned int reserve) |
905 | { | 749 | { |
906 | unsigned int address; | 750 | unsigned int address; |
907 | 751 | ||
908 | if (mach_find_smp_config_quirk) { | 752 | if (x86_quirks->mach_find_smp_config) { |
909 | if (mach_find_smp_config_quirk(reserve)) | 753 | if (x86_quirks->mach_find_smp_config(reserve)) |
910 | return; | 754 | return; |
911 | } | 755 | } |
912 | /* | 756 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index a153b3905f60..9fd809552447 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -149,8 +149,8 @@ static int __cpuinit msr_device_create(int cpu) | |||
149 | { | 149 | { |
150 | struct device *dev; | 150 | struct device *dev; |
151 | 151 | ||
152 | dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), | 152 | dev = device_create_drvdata(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), |
153 | "msr%d", cpu); | 153 | NULL, "msr%d", cpu); |
154 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 154 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
155 | } | 155 | } |
156 | 156 | ||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ec024b3baad0..ac6d51222e7d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs); | |||
263 | 263 | ||
264 | static void __acpi_nmi_enable(void *__unused) | 264 | static void __acpi_nmi_enable(void *__unused) |
265 | { | 265 | { |
266 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | 266 | apic_write(APIC_LVT0, APIC_DM_NMI); |
267 | } | 267 | } |
268 | 268 | ||
269 | /* | 269 | /* |
@@ -277,7 +277,7 @@ void acpi_nmi_enable(void) | |||
277 | 277 | ||
278 | static void __acpi_nmi_disable(void *__unused) | 278 | static void __acpi_nmi_disable(void *__unused) |
279 | { | 279 | { |
280 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | 280 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); |
281 | } | 281 | } |
282 | 282 | ||
283 | /* | 283 | /* |
@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
448 | 448 | ||
449 | #ifdef CONFIG_SYSCTL | 449 | #ifdef CONFIG_SYSCTL |
450 | 450 | ||
451 | static int __init setup_unknown_nmi_panic(char *str) | ||
452 | { | ||
453 | unknown_nmi_panic = 1; | ||
454 | return 1; | ||
455 | } | ||
456 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
457 | |||
451 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | 458 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) |
452 | { | 459 | { |
453 | unsigned char reason = get_nmi_reason(); | 460 | unsigned char reason = get_nmi_reason(); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a23e8233b9ac..b8c45610b20a 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | 34 | #include <asm/mpspec.h> |
35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
36 | #include <asm/setup.h> | ||
36 | 37 | ||
37 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 38 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
38 | 39 | ||
@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void) | |||
71 | } | 72 | } |
72 | } | 73 | } |
73 | 74 | ||
75 | |||
76 | void __init numaq_tsc_disable(void) | ||
77 | { | ||
78 | if (!found_numaq) | ||
79 | return; | ||
80 | |||
81 | if (num_online_nodes() > 1) { | ||
82 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
83 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static int __init numaq_pre_time_init(void) | ||
88 | { | ||
89 | numaq_tsc_disable(); | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | int found_numaq; | ||
94 | /* | ||
95 | * Have to match translation table entries to main table entries by counter | ||
96 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
97 | * doing this .... | ||
98 | */ | ||
99 | struct mpc_config_translation { | ||
100 | unsigned char mpc_type; | ||
101 | unsigned char trans_len; | ||
102 | unsigned char trans_type; | ||
103 | unsigned char trans_quad; | ||
104 | unsigned char trans_global; | ||
105 | unsigned char trans_local; | ||
106 | unsigned short trans_reserved; | ||
107 | }; | ||
108 | |||
109 | /* x86_quirks member */ | ||
110 | static int mpc_record; | ||
111 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
112 | __cpuinitdata; | ||
113 | |||
114 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
115 | { | ||
116 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
117 | } | ||
118 | |||
119 | /* x86_quirks member */ | ||
120 | static int mpc_apic_id(struct mpc_config_processor *m) | ||
121 | { | ||
122 | int quad = translation_table[mpc_record]->trans_quad; | ||
123 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
124 | |||
125 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
126 | m->mpc_apicid, | ||
127 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
128 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
129 | m->mpc_apicver, quad, logical_apicid); | ||
130 | return logical_apicid; | ||
131 | } | ||
132 | |||
133 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
134 | |||
135 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
136 | |||
137 | /* x86_quirks member */ | ||
138 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) | ||
139 | { | ||
140 | int quad = translation_table[mpc_record]->trans_quad; | ||
141 | int local = translation_table[mpc_record]->trans_local; | ||
142 | |||
143 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
144 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
145 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
146 | m->mpc_busid, name, quad); | ||
147 | } | ||
148 | |||
149 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
150 | |||
151 | /* x86_quirks member */ | ||
152 | static void mpc_oem_pci_bus(struct mpc_config_bus *m) | ||
153 | { | ||
154 | int quad = translation_table[mpc_record]->trans_quad; | ||
155 | int local = translation_table[mpc_record]->trans_local; | ||
156 | |||
157 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
158 | } | ||
159 | |||
160 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
161 | { | ||
162 | printk(KERN_INFO | ||
163 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
164 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
165 | m->trans_local); | ||
166 | |||
167 | if (mpc_record >= MAX_MPC_ENTRY) | ||
168 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
169 | else | ||
170 | translation_table[mpc_record] = m; /* stash this for later */ | ||
171 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
172 | node_set_online(m->trans_quad); | ||
173 | } | ||
174 | |||
175 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
176 | { | ||
177 | int sum = 0; | ||
178 | |||
179 | while (len--) | ||
180 | sum += *mp++; | ||
181 | |||
182 | return sum & 0xFF; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Read/parse the MPC oem tables | ||
187 | */ | ||
188 | |||
189 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
190 | unsigned short oemsize) | ||
191 | { | ||
192 | int count = sizeof(*oemtable); /* the header size */ | ||
193 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
194 | |||
195 | mpc_record = 0; | ||
196 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
197 | oemtable); | ||
198 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
199 | printk(KERN_WARNING | ||
200 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
201 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
202 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
203 | return; | ||
204 | } | ||
205 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
206 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
207 | return; | ||
208 | } | ||
209 | while (count < oemtable->oem_length) { | ||
210 | switch (*oemptr) { | ||
211 | case MP_TRANSLATION: | ||
212 | { | ||
213 | struct mpc_config_translation *m = | ||
214 | (struct mpc_config_translation *)oemptr; | ||
215 | MP_translation_info(m); | ||
216 | oemptr += sizeof(*m); | ||
217 | count += sizeof(*m); | ||
218 | ++mpc_record; | ||
219 | break; | ||
220 | } | ||
221 | default: | ||
222 | { | ||
223 | printk(KERN_WARNING | ||
224 | "Unrecognised OEM table entry type! - %d\n", | ||
225 | (int)*oemptr); | ||
226 | return; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
233 | .arch_pre_time_init = numaq_pre_time_init, | ||
234 | .arch_time_init = NULL, | ||
235 | .arch_pre_intr_init = NULL, | ||
236 | .arch_memory_setup = NULL, | ||
237 | .arch_intr_init = NULL, | ||
238 | .arch_trap_init = NULL, | ||
239 | .mach_get_smp_config = NULL, | ||
240 | .mach_find_smp_config = NULL, | ||
241 | .mpc_record = &mpc_record, | ||
242 | .mpc_apic_id = mpc_apic_id, | ||
243 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
244 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
245 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
246 | }; | ||
247 | |||
248 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
249 | char *productid) | ||
250 | { | ||
251 | if (strncmp(oem, "IBM NUMA", 8)) | ||
252 | printk("Warning! Not a NUMA-Q system!\n"); | ||
253 | else | ||
254 | found_numaq = 1; | ||
255 | } | ||
256 | |||
74 | static __init void early_check_numaq(void) | 257 | static __init void early_check_numaq(void) |
75 | { | 258 | { |
76 | /* | 259 | /* |
@@ -82,6 +265,9 @@ static __init void early_check_numaq(void) | |||
82 | */ | 265 | */ |
83 | if (smp_found_config) | 266 | if (smp_found_config) |
84 | early_get_smp_config(); | 267 | early_get_smp_config(); |
268 | |||
269 | if (found_numaq) | ||
270 | x86_quirks = &numaq_x86_quirks; | ||
85 | } | 271 | } |
86 | 272 | ||
87 | int __init get_memcfg_numaq(void) | 273 | int __init get_memcfg_numaq(void) |
@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void) | |||
92 | smp_dump_qct(); | 278 | smp_dump_qct(); |
93 | return 1; | 279 | return 1; |
94 | } | 280 | } |
95 | |||
96 | void __init numaq_tsc_disable(void) | ||
97 | { | ||
98 | if (!found_numaq) | ||
99 | return; | ||
100 | |||
101 | if (num_online_nodes() > 1) { | ||
102 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
103 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
104 | } | ||
105 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..94da4d52d798 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
31 | #include <asm/arch_hooks.h> | 31 | #include <asm/arch_hooks.h> |
32 | #include <asm/pgtable.h> | ||
32 | #include <asm/time.h> | 33 | #include <asm/time.h> |
33 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
34 | #include <asm/irq.h> | 35 | #include <asm/irq.h> |
@@ -123,6 +124,7 @@ static void *get_call_destination(u8 type) | |||
123 | .pv_irq_ops = pv_irq_ops, | 124 | .pv_irq_ops = pv_irq_ops, |
124 | .pv_apic_ops = pv_apic_ops, | 125 | .pv_apic_ops = pv_apic_ops, |
125 | .pv_mmu_ops = pv_mmu_ops, | 126 | .pv_mmu_ops = pv_mmu_ops, |
127 | .pv_lock_ops = pv_lock_ops, | ||
126 | }; | 128 | }; |
127 | return *((void **)&tmpl + type); | 129 | return *((void **)&tmpl + type); |
128 | } | 130 | } |
@@ -266,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
266 | return __get_cpu_var(paravirt_lazy_mode); | 268 | return __get_cpu_var(paravirt_lazy_mode); |
267 | } | 269 | } |
268 | 270 | ||
271 | void __init paravirt_use_bytelocks(void) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
275 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
276 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
277 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
278 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
279 | #endif | ||
280 | } | ||
281 | |||
269 | struct pv_info pv_info = { | 282 | struct pv_info pv_info = { |
270 | .name = "bare hardware", | 283 | .name = "bare hardware", |
271 | .paravirt_enabled = 0, | 284 | .paravirt_enabled = 0, |
@@ -361,7 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
361 | struct pv_apic_ops pv_apic_ops = { | 374 | struct pv_apic_ops pv_apic_ops = { |
362 | #ifdef CONFIG_X86_LOCAL_APIC | 375 | #ifdef CONFIG_X86_LOCAL_APIC |
363 | .apic_write = native_apic_write, | 376 | .apic_write = native_apic_write, |
364 | .apic_write_atomic = native_apic_write_atomic, | ||
365 | .apic_read = native_apic_read, | 377 | .apic_read = native_apic_read, |
366 | .setup_boot_clock = setup_boot_APIC_clock, | 378 | .setup_boot_clock = setup_boot_APIC_clock, |
367 | .setup_secondary_clock = setup_secondary_APIC_clock, | 379 | .setup_secondary_clock = setup_secondary_APIC_clock, |
@@ -373,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
373 | #ifndef CONFIG_X86_64 | 385 | #ifndef CONFIG_X86_64 |
374 | .pagetable_setup_start = native_pagetable_setup_start, | 386 | .pagetable_setup_start = native_pagetable_setup_start, |
375 | .pagetable_setup_done = native_pagetable_setup_done, | 387 | .pagetable_setup_done = native_pagetable_setup_done, |
388 | #else | ||
389 | .pagetable_setup_start = paravirt_nop, | ||
390 | .pagetable_setup_done = paravirt_nop, | ||
376 | #endif | 391 | #endif |
377 | 392 | ||
378 | .read_cr2 = native_read_cr2, | 393 | .read_cr2 = native_read_cr2, |
@@ -428,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
428 | #endif /* PAGETABLE_LEVELS >= 3 */ | 443 | #endif /* PAGETABLE_LEVELS >= 3 */ |
429 | 444 | ||
430 | .pte_val = native_pte_val, | 445 | .pte_val = native_pte_val, |
431 | .pte_flags = native_pte_val, | 446 | .pte_flags = native_pte_flags, |
432 | .pgd_val = native_pgd_val, | 447 | .pgd_val = native_pgd_val, |
433 | 448 | ||
434 | .make_pte = native_make_pte, | 449 | .make_pte = native_make_pte, |
@@ -446,6 +461,18 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
446 | .set_fixmap = native_set_fixmap, | 461 | .set_fixmap = native_set_fixmap, |
447 | }; | 462 | }; |
448 | 463 | ||
464 | struct pv_lock_ops pv_lock_ops = { | ||
465 | #ifdef CONFIG_SMP | ||
466 | .spin_is_locked = __ticket_spin_is_locked, | ||
467 | .spin_is_contended = __ticket_spin_is_contended, | ||
468 | |||
469 | .spin_lock = __ticket_spin_lock, | ||
470 | .spin_trylock = __ticket_spin_trylock, | ||
471 | .spin_unlock = __ticket_spin_unlock, | ||
472 | #endif | ||
473 | }; | ||
474 | EXPORT_SYMBOL_GPL(pv_lock_ops); | ||
475 | |||
449 | EXPORT_SYMBOL_GPL(pv_time_ops); | 476 | EXPORT_SYMBOL_GPL(pv_time_ops); |
450 | EXPORT_SYMBOL (pv_cpu_ops); | 477 | EXPORT_SYMBOL (pv_cpu_ops); |
451 | EXPORT_SYMBOL (pv_mmu_ops); | 478 | EXPORT_SYMBOL (pv_mmu_ops); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6959b5c45df4..b67a4b1d4eae 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/crash_dump.h> | ||
32 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
33 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
34 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
@@ -36,7 +37,8 @@ | |||
36 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
37 | #include <linux/scatterlist.h> | 38 | #include <linux/scatterlist.h> |
38 | #include <linux/iommu-helper.h> | 39 | #include <linux/iommu-helper.h> |
39 | #include <asm/gart.h> | 40 | |
41 | #include <asm/iommu.h> | ||
40 | #include <asm/calgary.h> | 42 | #include <asm/calgary.h> |
41 | #include <asm/tce.h> | 43 | #include <asm/tce.h> |
42 | #include <asm/pci-direct.h> | 44 | #include <asm/pci-direct.h> |
@@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); | |||
167 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); | 169 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); |
168 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); | 170 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); |
169 | static void calioc2_dump_error_regs(struct iommu_table *tbl); | 171 | static void calioc2_dump_error_regs(struct iommu_table *tbl); |
172 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); | ||
173 | static void get_tce_space_from_tar(void); | ||
170 | 174 | ||
171 | static struct cal_chipset_ops calgary_chip_ops = { | 175 | static struct cal_chipset_ops calgary_chip_ops = { |
172 | .handle_quirks = calgary_handle_quirks, | 176 | .handle_quirks = calgary_handle_quirks, |
@@ -410,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev, | |||
410 | } | 414 | } |
411 | } | 415 | } |
412 | 416 | ||
413 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
414 | struct scatterlist *sg, int nelems, int direction) | ||
415 | { | ||
416 | struct scatterlist *s; | ||
417 | int i; | ||
418 | |||
419 | for_each_sg(sg, s, nelems, i) { | ||
420 | struct page *p = sg_page(s); | ||
421 | |||
422 | BUG_ON(!p); | ||
423 | s->dma_address = virt_to_bus(sg_virt(s)); | ||
424 | s->dma_length = s->length; | ||
425 | } | ||
426 | return nelems; | ||
427 | } | ||
428 | |||
429 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | 417 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, |
430 | int nelems, int direction) | 418 | int nelems, int direction) |
431 | { | 419 | { |
@@ -436,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
436 | unsigned long entry; | 424 | unsigned long entry; |
437 | int i; | 425 | int i; |
438 | 426 | ||
439 | if (!translation_enabled(tbl)) | ||
440 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
441 | |||
442 | for_each_sg(sg, s, nelems, i) { | 427 | for_each_sg(sg, s, nelems, i) { |
443 | BUG_ON(!sg_page(s)); | 428 | BUG_ON(!sg_page(s)); |
444 | 429 | ||
@@ -474,7 +459,6 @@ error: | |||
474 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | 459 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, |
475 | size_t size, int direction) | 460 | size_t size, int direction) |
476 | { | 461 | { |
477 | dma_addr_t dma_handle = bad_dma_address; | ||
478 | void *vaddr = phys_to_virt(paddr); | 462 | void *vaddr = phys_to_virt(paddr); |
479 | unsigned long uaddr; | 463 | unsigned long uaddr; |
480 | unsigned int npages; | 464 | unsigned int npages; |
@@ -483,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | |||
483 | uaddr = (unsigned long)vaddr; | 467 | uaddr = (unsigned long)vaddr; |
484 | npages = num_dma_pages(uaddr, size); | 468 | npages = num_dma_pages(uaddr, size); |
485 | 469 | ||
486 | if (translation_enabled(tbl)) | 470 | return iommu_alloc(dev, tbl, vaddr, npages, direction); |
487 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); | ||
488 | else | ||
489 | dma_handle = virt_to_bus(vaddr); | ||
490 | |||
491 | return dma_handle; | ||
492 | } | 471 | } |
493 | 472 | ||
494 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | 473 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, |
@@ -497,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | |||
497 | struct iommu_table *tbl = find_iommu_table(dev); | 476 | struct iommu_table *tbl = find_iommu_table(dev); |
498 | unsigned int npages; | 477 | unsigned int npages; |
499 | 478 | ||
500 | if (!translation_enabled(tbl)) | ||
501 | return; | ||
502 | |||
503 | npages = num_dma_pages(dma_handle, size); | 479 | npages = num_dma_pages(dma_handle, size); |
504 | iommu_free(tbl, dma_handle, npages); | 480 | iommu_free(tbl, dma_handle, npages); |
505 | } | 481 | } |
@@ -522,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
522 | goto error; | 498 | goto error; |
523 | memset(ret, 0, size); | 499 | memset(ret, 0, size); |
524 | 500 | ||
525 | if (translation_enabled(tbl)) { | 501 | /* set up tces to cover the allocated range */ |
526 | /* set up tces to cover the allocated range */ | 502 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
527 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 503 | if (mapping == bad_dma_address) |
528 | if (mapping == bad_dma_address) | 504 | goto free; |
529 | goto free; | 505 | *dma_handle = mapping; |
530 | |||
531 | *dma_handle = mapping; | ||
532 | } else /* non translated slot */ | ||
533 | *dma_handle = virt_to_bus(ret); | ||
534 | |||
535 | return ret; | 506 | return ret; |
536 | |||
537 | free: | 507 | free: |
538 | free_pages((unsigned long)ret, get_order(size)); | 508 | free_pages((unsigned long)ret, get_order(size)); |
539 | ret = NULL; | 509 | ret = NULL; |
@@ -541,7 +511,7 @@ error: | |||
541 | return ret; | 511 | return ret; |
542 | } | 512 | } |
543 | 513 | ||
544 | static const struct dma_mapping_ops calgary_dma_ops = { | 514 | static struct dma_mapping_ops calgary_dma_ops = { |
545 | .alloc_coherent = calgary_alloc_coherent, | 515 | .alloc_coherent = calgary_alloc_coherent, |
546 | .map_single = calgary_map_single, | 516 | .map_single = calgary_map_single, |
547 | .unmap_single = calgary_unmap_single, | 517 | .unmap_single = calgary_unmap_single, |
@@ -830,7 +800,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
830 | 800 | ||
831 | tbl = pci_iommu(dev->bus); | 801 | tbl = pci_iommu(dev->bus); |
832 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; | 802 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; |
833 | tce_free(tbl, 0, tbl->it_size); | 803 | |
804 | if (is_kdump_kernel()) | ||
805 | calgary_init_bitmap_from_tce_table(tbl); | ||
806 | else | ||
807 | tce_free(tbl, 0, tbl->it_size); | ||
834 | 808 | ||
835 | if (is_calgary(dev->device)) | 809 | if (is_calgary(dev->device)) |
836 | tbl->chip_ops = &calgary_chip_ops; | 810 | tbl->chip_ops = &calgary_chip_ops; |
@@ -1209,6 +1183,10 @@ static int __init calgary_init(void) | |||
1209 | if (ret) | 1183 | if (ret) |
1210 | return ret; | 1184 | return ret; |
1211 | 1185 | ||
1186 | /* Purely for kdump kernel case */ | ||
1187 | if (is_kdump_kernel()) | ||
1188 | get_tce_space_from_tar(); | ||
1189 | |||
1212 | do { | 1190 | do { |
1213 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); | 1191 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); |
1214 | if (!dev) | 1192 | if (!dev) |
@@ -1230,6 +1208,16 @@ static int __init calgary_init(void) | |||
1230 | goto error; | 1208 | goto error; |
1231 | } while (1); | 1209 | } while (1); |
1232 | 1210 | ||
1211 | dev = NULL; | ||
1212 | for_each_pci_dev(dev) { | ||
1213 | struct iommu_table *tbl; | ||
1214 | |||
1215 | tbl = find_iommu_table(&dev->dev); | ||
1216 | |||
1217 | if (translation_enabled(tbl)) | ||
1218 | dev->dev.archdata.dma_ops = &calgary_dma_ops; | ||
1219 | } | ||
1220 | |||
1233 | return ret; | 1221 | return ret; |
1234 | 1222 | ||
1235 | error: | 1223 | error: |
@@ -1251,6 +1239,7 @@ error: | |||
1251 | calgary_disable_translation(dev); | 1239 | calgary_disable_translation(dev); |
1252 | calgary_free_bus(dev); | 1240 | calgary_free_bus(dev); |
1253 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ | 1241 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
1242 | dev->dev.archdata.dma_ops = NULL; | ||
1254 | } while (1); | 1243 | } while (1); |
1255 | 1244 | ||
1256 | return ret; | 1245 | return ret; |
@@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1339 | return (val != 0xffffffff); | 1328 | return (val != 0xffffffff); |
1340 | } | 1329 | } |
1341 | 1330 | ||
1331 | /* | ||
1332 | * calgary_init_bitmap_from_tce_table(): | ||
1333 | * Funtion for kdump case. In the second/kdump kernel initialize | ||
1334 | * the bitmap based on the tce table entries obtained from first kernel | ||
1335 | */ | ||
1336 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | ||
1337 | { | ||
1338 | u64 *tp; | ||
1339 | unsigned int index; | ||
1340 | tp = ((u64 *)tbl->it_base); | ||
1341 | for (index = 0 ; index < tbl->it_size; index++) { | ||
1342 | if (*tp != 0x0) | ||
1343 | set_bit(index, tbl->it_map); | ||
1344 | tp++; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | /* | ||
1349 | * get_tce_space_from_tar(): | ||
1350 | * Function for kdump case. Get the tce tables from first kernel | ||
1351 | * by reading the contents of the base adress register of calgary iommu | ||
1352 | */ | ||
1353 | static void get_tce_space_from_tar() | ||
1354 | { | ||
1355 | int bus; | ||
1356 | void __iomem *target; | ||
1357 | unsigned long tce_space; | ||
1358 | |||
1359 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | ||
1360 | struct calgary_bus_info *info = &bus_info[bus]; | ||
1361 | unsigned short pci_device; | ||
1362 | u32 val; | ||
1363 | |||
1364 | val = read_pci_config(bus, 0, 0, 0); | ||
1365 | pci_device = (val & 0xFFFF0000) >> 16; | ||
1366 | |||
1367 | if (!is_cal_pci_dev(pci_device)) | ||
1368 | continue; | ||
1369 | if (info->translation_disabled) | ||
1370 | continue; | ||
1371 | |||
1372 | if (calgary_bus_has_devices(bus, pci_device) || | ||
1373 | translate_empty_slots) { | ||
1374 | target = calgary_reg(bus_info[bus].bbar, | ||
1375 | tar_offset(bus)); | ||
1376 | tce_space = be64_to_cpu(readq(target)); | ||
1377 | tce_space = tce_space & TAR_SW_BITS; | ||
1378 | |||
1379 | tce_space = tce_space & (~specified_table_size); | ||
1380 | info->tce_space = (u64 *)__va(tce_space); | ||
1381 | } | ||
1382 | } | ||
1383 | return; | ||
1384 | } | ||
1385 | |||
1342 | void __init detect_calgary(void) | 1386 | void __init detect_calgary(void) |
1343 | { | 1387 | { |
1344 | int bus; | 1388 | int bus; |
@@ -1394,7 +1438,8 @@ void __init detect_calgary(void) | |||
1394 | return; | 1438 | return; |
1395 | } | 1439 | } |
1396 | 1440 | ||
1397 | specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); | 1441 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
1442 | saved_max_pfn : max_pfn) * PAGE_SIZE); | ||
1398 | 1443 | ||
1399 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 1444 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
1400 | struct calgary_bus_info *info = &bus_info[bus]; | 1445 | struct calgary_bus_info *info = &bus_info[bus]; |
@@ -1412,10 +1457,16 @@ void __init detect_calgary(void) | |||
1412 | 1457 | ||
1413 | if (calgary_bus_has_devices(bus, pci_device) || | 1458 | if (calgary_bus_has_devices(bus, pci_device) || |
1414 | translate_empty_slots) { | 1459 | translate_empty_slots) { |
1415 | tbl = alloc_tce_table(); | 1460 | /* |
1416 | if (!tbl) | 1461 | * If it is kdump kernel, find and use tce tables |
1417 | goto cleanup; | 1462 | * from first kernel, else allocate tce tables here |
1418 | info->tce_space = tbl; | 1463 | */ |
1464 | if (!is_kdump_kernel()) { | ||
1465 | tbl = alloc_tce_table(); | ||
1466 | if (!tbl) | ||
1467 | goto cleanup; | ||
1468 | info->tce_space = tbl; | ||
1469 | } | ||
1419 | calgary_found = 1; | 1470 | calgary_found = 1; |
1420 | } | 1471 | } |
1421 | } | 1472 | } |
@@ -1430,6 +1481,10 @@ void __init detect_calgary(void) | |||
1430 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1481 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
1431 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1482 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, |
1432 | debugging ? "enabled" : "disabled"); | 1483 | debugging ? "enabled" : "disabled"); |
1484 | |||
1485 | /* swiotlb for devices that aren't behind the Calgary. */ | ||
1486 | if (max_pfn > MAX_DMA32_PFN) | ||
1487 | swiotlb = 1; | ||
1433 | } | 1488 | } |
1434 | return; | 1489 | return; |
1435 | 1490 | ||
@@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void) | |||
1446 | { | 1501 | { |
1447 | int ret; | 1502 | int ret; |
1448 | 1503 | ||
1449 | if (no_iommu || swiotlb) | 1504 | if (no_iommu || (swiotlb && !calgary_detected)) |
1450 | return -ENODEV; | 1505 | return -ENODEV; |
1451 | 1506 | ||
1452 | if (!calgary_detected) | 1507 | if (!calgary_detected) |
@@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void) | |||
1459 | if (ret) { | 1514 | if (ret) { |
1460 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | 1515 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " |
1461 | "falling back to no_iommu\n", ret); | 1516 | "falling back to no_iommu\n", ret); |
1462 | if (max_pfn > MAX_DMA32_PFN) | ||
1463 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
1464 | "32bit PCI may malfunction.\n"); | ||
1465 | return ret; | 1517 | return ret; |
1466 | } | 1518 | } |
1467 | 1519 | ||
1468 | force_iommu = 1; | 1520 | force_iommu = 1; |
1469 | bad_dma_address = 0x0; | 1521 | bad_dma_address = 0x0; |
1470 | dma_ops = &calgary_dma_ops; | 1522 | /* dma_ops is set to swiotlb or nommu */ |
1523 | if (!dma_ops) | ||
1524 | dma_ops = &nommu_dma_ops; | ||
1471 | 1525 | ||
1472 | return 0; | 1526 | return 0; |
1473 | } | 1527 | } |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f1..37544123896d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -5,14 +5,13 @@ | |||
5 | 5 | ||
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/calgary.h> | 9 | #include <asm/calgary.h> |
10 | #include <asm/amd_iommu.h> | 10 | #include <asm/amd_iommu.h> |
11 | 11 | ||
12 | int forbid_dac __read_mostly; | 12 | static int forbid_dac __read_mostly; |
13 | EXPORT_SYMBOL(forbid_dac); | ||
14 | 13 | ||
15 | const struct dma_mapping_ops *dma_ops; | 14 | struct dma_mapping_ops *dma_ops; |
16 | EXPORT_SYMBOL(dma_ops); | 15 | EXPORT_SYMBOL(dma_ops); |
17 | 16 | ||
18 | static int iommu_sac_force __read_mostly; | 17 | static int iommu_sac_force __read_mostly; |
@@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void) | |||
114 | * The order of these functions is important for | 113 | * The order of these functions is important for |
115 | * fall-back/fail-over reasons | 114 | * fall-back/fail-over reasons |
116 | */ | 115 | */ |
117 | #ifdef CONFIG_GART_IOMMU | ||
118 | gart_iommu_hole_init(); | 116 | gart_iommu_hole_init(); |
119 | #endif | ||
120 | 117 | ||
121 | #ifdef CONFIG_CALGARY_IOMMU | ||
122 | detect_calgary(); | 118 | detect_calgary(); |
123 | #endif | ||
124 | 119 | ||
125 | detect_intel_iommu(); | 120 | detect_intel_iommu(); |
126 | 121 | ||
127 | amd_iommu_detect(); | 122 | amd_iommu_detect(); |
128 | 123 | ||
129 | #ifdef CONFIG_SWIOTLB | ||
130 | pci_swiotlb_init(); | 124 | pci_swiotlb_init(); |
131 | #endif | ||
132 | } | 125 | } |
133 | #endif | 126 | #endif |
134 | 127 | ||
@@ -184,9 +177,7 @@ static __init int iommu_setup(char *p) | |||
184 | swiotlb = 1; | 177 | swiotlb = 1; |
185 | #endif | 178 | #endif |
186 | 179 | ||
187 | #ifdef CONFIG_GART_IOMMU | ||
188 | gart_parse_options(p); | 180 | gart_parse_options(p); |
189 | #endif | ||
190 | 181 | ||
191 | #ifdef CONFIG_CALGARY_IOMMU | 182 | #ifdef CONFIG_CALGARY_IOMMU |
192 | if (!strncmp(p, "calgary", 7)) | 183 | if (!strncmp(p, "calgary", 7)) |
@@ -321,16 +312,17 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) | |||
321 | 312 | ||
322 | int dma_supported(struct device *dev, u64 mask) | 313 | int dma_supported(struct device *dev, u64 mask) |
323 | { | 314 | { |
315 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
316 | |||
324 | #ifdef CONFIG_PCI | 317 | #ifdef CONFIG_PCI |
325 | if (mask > 0xffffffff && forbid_dac > 0) { | 318 | if (mask > 0xffffffff && forbid_dac > 0) { |
326 | printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", | 319 | dev_info(dev, "PCI: Disallowing DAC for device\n"); |
327 | dev->bus_id); | ||
328 | return 0; | 320 | return 0; |
329 | } | 321 | } |
330 | #endif | 322 | #endif |
331 | 323 | ||
332 | if (dma_ops->dma_supported) | 324 | if (ops->dma_supported) |
333 | return dma_ops->dma_supported(dev, mask); | 325 | return ops->dma_supported(dev, mask); |
334 | 326 | ||
335 | /* Copied from i386. Doesn't make much sense, because it will | 327 | /* Copied from i386. Doesn't make much sense, because it will |
336 | only work for pci_alloc_coherent. | 328 | only work for pci_alloc_coherent. |
@@ -351,8 +343,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
351 | type. Normally this doesn't make any difference, but gives | 343 | type. Normally this doesn't make any difference, but gives |
352 | more gentle handling of IOMMU overflow. */ | 344 | more gentle handling of IOMMU overflow. */ |
353 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { | 345 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { |
354 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", | 346 | dev_info(dev, "Force SAC with mask %Lx\n", mask); |
355 | dev->bus_id, mask); | ||
356 | return 0; | 347 | return 0; |
357 | } | 348 | } |
358 | 349 | ||
@@ -378,6 +369,7 @@ void * | |||
378 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 369 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
379 | gfp_t gfp) | 370 | gfp_t gfp) |
380 | { | 371 | { |
372 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
381 | void *memory = NULL; | 373 | void *memory = NULL; |
382 | struct page *page; | 374 | struct page *page; |
383 | unsigned long dma_mask = 0; | 375 | unsigned long dma_mask = 0; |
@@ -446,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
446 | /* Let low level make its own zone decisions */ | 438 | /* Let low level make its own zone decisions */ |
447 | gfp &= ~(GFP_DMA32|GFP_DMA); | 439 | gfp &= ~(GFP_DMA32|GFP_DMA); |
448 | 440 | ||
449 | if (dma_ops->alloc_coherent) | 441 | if (ops->alloc_coherent) |
450 | return dma_ops->alloc_coherent(dev, size, | 442 | return ops->alloc_coherent(dev, size, |
451 | dma_handle, gfp); | 443 | dma_handle, gfp); |
452 | return NULL; | 444 | return NULL; |
453 | } | 445 | } |
@@ -459,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
459 | } | 451 | } |
460 | } | 452 | } |
461 | 453 | ||
462 | if (dma_ops->alloc_coherent) { | 454 | if (ops->alloc_coherent) { |
463 | free_pages((unsigned long)memory, get_order(size)); | 455 | free_pages((unsigned long)memory, get_order(size)); |
464 | gfp &= ~(GFP_DMA|GFP_DMA32); | 456 | gfp &= ~(GFP_DMA|GFP_DMA32); |
465 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | 457 | return ops->alloc_coherent(dev, size, dma_handle, gfp); |
466 | } | 458 | } |
467 | 459 | ||
468 | if (dma_ops->map_simple) { | 460 | if (ops->map_simple) { |
469 | *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), | 461 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), |
470 | size, | 462 | size, |
471 | PCI_DMA_BIDIRECTIONAL); | 463 | PCI_DMA_BIDIRECTIONAL); |
472 | if (*dma_handle != bad_dma_address) | 464 | if (*dma_handle != bad_dma_address) |
@@ -488,29 +480,27 @@ EXPORT_SYMBOL(dma_alloc_coherent); | |||
488 | void dma_free_coherent(struct device *dev, size_t size, | 480 | void dma_free_coherent(struct device *dev, size_t size, |
489 | void *vaddr, dma_addr_t bus) | 481 | void *vaddr, dma_addr_t bus) |
490 | { | 482 | { |
483 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
484 | |||
491 | int order = get_order(size); | 485 | int order = get_order(size); |
492 | WARN_ON(irqs_disabled()); /* for portability */ | 486 | WARN_ON(irqs_disabled()); /* for portability */ |
493 | if (dma_release_coherent(dev, order, vaddr)) | 487 | if (dma_release_coherent(dev, order, vaddr)) |
494 | return; | 488 | return; |
495 | if (dma_ops->unmap_single) | 489 | if (ops->unmap_single) |
496 | dma_ops->unmap_single(dev, bus, size, 0); | 490 | ops->unmap_single(dev, bus, size, 0); |
497 | free_pages((unsigned long)vaddr, order); | 491 | free_pages((unsigned long)vaddr, order); |
498 | } | 492 | } |
499 | EXPORT_SYMBOL(dma_free_coherent); | 493 | EXPORT_SYMBOL(dma_free_coherent); |
500 | 494 | ||
501 | static int __init pci_iommu_init(void) | 495 | static int __init pci_iommu_init(void) |
502 | { | 496 | { |
503 | #ifdef CONFIG_CALGARY_IOMMU | ||
504 | calgary_iommu_init(); | 497 | calgary_iommu_init(); |
505 | #endif | ||
506 | 498 | ||
507 | intel_iommu_init(); | 499 | intel_iommu_init(); |
508 | 500 | ||
509 | amd_iommu_init(); | 501 | amd_iommu_init(); |
510 | 502 | ||
511 | #ifdef CONFIG_GART_IOMMU | ||
512 | gart_iommu_init(); | 503 | gart_iommu_init(); |
513 | #endif | ||
514 | 504 | ||
515 | no_iommu_init(); | 505 | no_iommu_init(); |
516 | return 0; | 506 | return 0; |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c3fe78406d18..744126e64950 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
35 | #include <asm/iommu.h> | ||
35 | #include <asm/gart.h> | 36 | #include <asm/gart.h> |
36 | #include <asm/cacheflush.h> | 37 | #include <asm/cacheflush.h> |
37 | #include <asm/swiotlb.h> | 38 | #include <asm/swiotlb.h> |
@@ -197,9 +198,7 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
197 | * out. Hopefully no network devices use single mappings that big. | 198 | * out. Hopefully no network devices use single mappings that big. |
198 | */ | 199 | */ |
199 | 200 | ||
200 | printk(KERN_ERR | 201 | dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); |
201 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | ||
202 | size, dev->bus_id); | ||
203 | 202 | ||
204 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { | 203 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
205 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 204 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
@@ -693,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
693 | 692 | ||
694 | extern int agp_amd64_init(void); | 693 | extern int agp_amd64_init(void); |
695 | 694 | ||
696 | static const struct dma_mapping_ops gart_dma_ops = { | 695 | static struct dma_mapping_ops gart_dma_ops = { |
697 | .mapping_error = NULL, | ||
698 | .map_single = gart_map_single, | 696 | .map_single = gart_map_single, |
699 | .map_simple = gart_map_simple, | 697 | .map_simple = gart_map_simple, |
700 | .unmap_single = gart_unmap_single, | 698 | .unmap_single = gart_unmap_single, |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index aec43d56f49c..3f91f71cdc3e 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/dma-mapping.h> | 7 | #include <linux/dma-mapping.h> |
8 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
9 | 9 | ||
10 | #include <asm/gart.h> | 10 | #include <asm/iommu.h> |
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
13 | 13 | ||
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
72 | return nents; | 72 | return nents; |
73 | } | 73 | } |
74 | 74 | ||
75 | /* Make sure we keep the same behaviour */ | 75 | struct dma_mapping_ops nommu_dma_ops = { |
76 | static int nommu_mapping_error(dma_addr_t dma_addr) | ||
77 | { | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | return 0; | ||
80 | #else | ||
81 | return (dma_addr == bad_dma_address); | ||
82 | #endif | ||
83 | } | ||
84 | |||
85 | |||
86 | const struct dma_mapping_ops nommu_dma_ops = { | ||
87 | .map_single = nommu_map_single, | 76 | .map_single = nommu_map_single, |
88 | .map_sg = nommu_map_sg, | 77 | .map_sg = nommu_map_sg, |
89 | .mapping_error = nommu_mapping_error, | ||
90 | .is_phys = 1, | 78 | .is_phys = 1, |
91 | }; | 79 | }; |
92 | 80 | ||
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82299cd1d04d..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/dma-mapping.h> | 6 | #include <linux/dma-mapping.h> |
7 | 7 | ||
8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/swiotlb.h> | 9 | #include <asm/swiotlb.h> |
10 | #include <asm/dma.h> | 10 | #include <asm/dma.h> |
11 | 11 | ||
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | |||
18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); | 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); |
19 | } | 19 | } |
20 | 20 | ||
21 | const struct dma_mapping_ops swiotlb_dma_ops = { | 21 | struct dma_mapping_ops swiotlb_dma_ops = { |
22 | .mapping_error = swiotlb_dma_mapping_error, | 22 | .mapping_error = swiotlb_dma_mapping_error, |
23 | .alloc_coherent = swiotlb_alloc_coherent, | 23 | .alloc_coherent = swiotlb_alloc_coherent, |
24 | .free_coherent = swiotlb_free_coherent, | 24 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d629c62f4f8..7fc4d5b0a6a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -15,6 +15,7 @@ unsigned long idle_nomwait; | |||
15 | EXPORT_SYMBOL(idle_nomwait); | 15 | EXPORT_SYMBOL(idle_nomwait); |
16 | 16 | ||
17 | struct kmem_cache *task_xstate_cachep; | 17 | struct kmem_cache *task_xstate_cachep; |
18 | static int force_mwait __cpuinitdata; | ||
18 | 19 | ||
19 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 20 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
20 | { | 21 | { |
@@ -199,6 +200,7 @@ static void poll_idle(void) | |||
199 | * | 200 | * |
200 | * idle=mwait overrides this decision and forces the usage of mwait. | 201 | * idle=mwait overrides this decision and forces the usage of mwait. |
201 | */ | 202 | */ |
203 | static int __cpuinitdata force_mwait; | ||
202 | 204 | ||
203 | #define MWAIT_INFO 0x05 | 205 | #define MWAIT_INFO 0x05 |
204 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | 206 | #define MWAIT_ECX_EXTENDED_INFO 0x01 |
@@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
326 | 328 | ||
327 | static int __init idle_setup(char *str) | 329 | static int __init idle_setup(char *str) |
328 | { | 330 | { |
331 | if (!str) | ||
332 | return -EINVAL; | ||
333 | |||
329 | if (!strcmp(str, "poll")) { | 334 | if (!strcmp(str, "poll")) { |
330 | printk("using polling idle threads.\n"); | 335 | printk("using polling idle threads.\n"); |
331 | pm_idle = poll_idle; | 336 | pm_idle = poll_idle; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..53bc653ed5ca 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -128,7 +128,7 @@ void cpu_idle(void) | |||
128 | 128 | ||
129 | /* endless idle loop with no priority at all */ | 129 | /* endless idle loop with no priority at all */ |
130 | while (1) { | 130 | while (1) { |
131 | tick_nohz_stop_sched_tick(); | 131 | tick_nohz_stop_sched_tick(1); |
132 | while (!need_resched()) { | 132 | while (!need_resched()) { |
133 | 133 | ||
134 | check_pgt_cache(); | 134 | check_pgt_cache(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a8e53626ac9a..3fb62a7d9a16 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -120,7 +120,7 @@ void cpu_idle(void) | |||
120 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
121 | /* endless idle loop with no priority at all */ | 121 | /* endless idle loop with no priority at all */ |
122 | while (1) { | 122 | while (1) { |
123 | tick_nohz_stop_sched_tick(); | 123 | tick_nohz_stop_sched_tick(1); |
124 | while (!need_resched()) { | 124 | while (!need_resched()) { |
125 | 125 | ||
126 | rmb(); | 126 | rmb(); |
@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
537 | struct task_struct * | 537 | struct task_struct * |
538 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 538 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
539 | { | 539 | { |
540 | struct thread_struct *prev = &prev_p->thread, | 540 | struct thread_struct *prev = &prev_p->thread; |
541 | *next = &next_p->thread; | 541 | struct thread_struct *next = &next_p->thread; |
542 | int cpu = smp_processor_id(); | 542 | int cpu = smp_processor_id(); |
543 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 543 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
544 | unsigned fsindex, gsindex; | 544 | unsigned fsindex, gsindex; |
@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
586 | 586 | ||
587 | /* | 587 | /* |
588 | * Switch FS and GS. | 588 | * Switch FS and GS. |
589 | * | ||
590 | * Segment register != 0 always requires a reload. Also | ||
591 | * reload when it has changed. When prev process used 64bit | ||
592 | * base always reload to avoid an information leak. | ||
589 | */ | 593 | */ |
590 | { | 594 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
591 | /* segment register != 0 always requires a reload. | 595 | loadsegment(fs, next->fsindex); |
592 | also reload when it has changed. | 596 | /* |
593 | when prev process used 64bit base always reload | 597 | * Check if the user used a selector != 0; if yes |
594 | to avoid an information leak. */ | 598 | * clear 64bit base, since overloaded base is always |
595 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 599 | * mapped to the Null selector |
596 | loadsegment(fs, next->fsindex); | 600 | */ |
597 | /* check if the user used a selector != 0 | 601 | if (fsindex) |
598 | * if yes clear 64bit base, since overloaded base | ||
599 | * is always mapped to the Null selector | ||
600 | */ | ||
601 | if (fsindex) | ||
602 | prev->fs = 0; | 602 | prev->fs = 0; |
603 | } | 603 | } |
604 | /* when next process has a 64bit base use it */ | 604 | /* when next process has a 64bit base use it */ |
605 | if (next->fs) | 605 | if (next->fs) |
606 | wrmsrl(MSR_FS_BASE, next->fs); | 606 | wrmsrl(MSR_FS_BASE, next->fs); |
607 | prev->fsindex = fsindex; | 607 | prev->fsindex = fsindex; |
608 | 608 | ||
609 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 609 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
610 | load_gs_index(next->gsindex); | 610 | load_gs_index(next->gsindex); |
611 | if (gsindex) | 611 | if (gsindex) |
612 | prev->gs = 0; | 612 | prev->gs = 0; |
613 | } | ||
614 | if (next->gs) | ||
615 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
616 | prev->gsindex = gsindex; | ||
617 | } | 613 | } |
614 | if (next->gs) | ||
615 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
616 | prev->gsindex = gsindex; | ||
618 | 617 | ||
619 | /* Must be after DS reload */ | 618 | /* Must be after DS reload */ |
620 | unlazy_fpu(prev_p); | 619 | unlazy_fpu(prev_p); |
@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
627 | write_pda(pcurrent, next_p); | 626 | write_pda(pcurrent, next_p); |
628 | 627 | ||
629 | write_pda(kernelstack, | 628 | write_pda(kernelstack, |
630 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | 629 | (unsigned long)task_stack_page(next_p) + |
630 | THREAD_SIZE - PDA_STACKOFFSET); | ||
631 | #ifdef CONFIG_CC_STACKPROTECTOR | 631 | #ifdef CONFIG_CC_STACKPROTECTOR |
632 | write_pda(stack_canary, next_p->stack_canary); | 632 | write_pda(stack_canary, next_p->stack_canary); |
633 | /* | 633 | /* |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e1..e37dccce85db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
1357 | #endif | 1357 | #endif |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | #ifdef CONFIG_X86_32 | ||
1361 | |||
1362 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | 1360 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) |
1363 | { | 1361 | { |
1364 | struct siginfo info; | 1362 | struct siginfo info; |
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
1377 | force_sig_info(SIGTRAP, &info, tsk); | 1375 | force_sig_info(SIGTRAP, &info, tsk); |
1378 | } | 1376 | } |
1379 | 1377 | ||
1380 | /* notification of system call entry/exit | ||
1381 | * - triggered by current->work.syscall_trace | ||
1382 | */ | ||
1383 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
1384 | { | ||
1385 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
1386 | /* | ||
1387 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
1388 | * interception | ||
1389 | */ | ||
1390 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
1391 | int ret = 0; | ||
1392 | |||
1393 | /* do the secure computing check first */ | ||
1394 | if (!entryexit) | ||
1395 | secure_computing(regs->orig_ax); | ||
1396 | |||
1397 | if (unlikely(current->audit_context)) { | ||
1398 | if (entryexit) | ||
1399 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), | ||
1400 | regs->ax); | ||
1401 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
1402 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
1403 | * not used, entry.S will call us only on syscall exit, not | ||
1404 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
1405 | * calling send_sigtrap() on syscall entry. | ||
1406 | * | ||
1407 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
1408 | * is_singlestep is false, despite his name, so we will still do | ||
1409 | * the correct thing. | ||
1410 | */ | ||
1411 | else if (is_singlestep) | ||
1412 | goto out; | ||
1413 | } | ||
1414 | |||
1415 | if (!(current->ptrace & PT_PTRACED)) | ||
1416 | goto out; | ||
1417 | |||
1418 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
1419 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
1420 | * here. We have to check this and return */ | ||
1421 | if (is_sysemu && entryexit) | ||
1422 | return 0; | ||
1423 | |||
1424 | /* Fake a debug trap */ | ||
1425 | if (is_singlestep) | ||
1426 | send_sigtrap(current, regs, 0); | ||
1427 | |||
1428 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
1429 | goto out; | ||
1430 | |||
1431 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
1432 | between a syscall stop and SIGTRAP delivery */ | ||
1433 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
1434 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
1435 | |||
1436 | /* | ||
1437 | * this isn't the same as continuing with a signal, but it will do | ||
1438 | * for normal use. strace only continues with a signal if the | ||
1439 | * stopping signal is not SIGTRAP. -brl | ||
1440 | */ | ||
1441 | if (current->exit_code) { | ||
1442 | send_sig(current->exit_code, current, 1); | ||
1443 | current->exit_code = 0; | ||
1444 | } | ||
1445 | ret = is_sysemu; | ||
1446 | out: | ||
1447 | if (unlikely(current->audit_context) && !entryexit) | ||
1448 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, | ||
1449 | regs->bx, regs->cx, regs->dx, regs->si); | ||
1450 | if (ret == 0) | ||
1451 | return 0; | ||
1452 | |||
1453 | regs->orig_ax = -1; /* force skip of syscall restarting */ | ||
1454 | if (unlikely(current->audit_context)) | ||
1455 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1456 | return 1; | ||
1457 | } | ||
1458 | |||
1459 | #else /* CONFIG_X86_64 */ | ||
1460 | |||
1461 | static void syscall_trace(struct pt_regs *regs) | 1378 | static void syscall_trace(struct pt_regs *regs) |
1462 | { | 1379 | { |
1380 | if (!(current->ptrace & PT_PTRACED)) | ||
1381 | return; | ||
1463 | 1382 | ||
1464 | #if 0 | 1383 | #if 0 |
1465 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | 1384 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", |
@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs) | |||
1481 | } | 1400 | } |
1482 | } | 1401 | } |
1483 | 1402 | ||
1484 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | 1403 | #ifdef CONFIG_X86_32 |
1404 | # define IS_IA32 1 | ||
1405 | #elif defined CONFIG_IA32_EMULATION | ||
1406 | # define IS_IA32 test_thread_flag(TIF_IA32) | ||
1407 | #else | ||
1408 | # define IS_IA32 0 | ||
1409 | #endif | ||
1410 | |||
1411 | /* | ||
1412 | * We must return the syscall number to actually look up in the table. | ||
1413 | * This can be -1L to skip running any syscall at all. | ||
1414 | */ | ||
1415 | asmregparm long syscall_trace_enter(struct pt_regs *regs) | ||
1485 | { | 1416 | { |
1417 | long ret = 0; | ||
1418 | |||
1419 | /* | ||
1420 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
1421 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
1422 | * If user-mode had set TF itself, then it's still clear from | ||
1423 | * do_debug() and we need to set it again to restore the user | ||
1424 | * state. If we entered on the slow path, TF was already set. | ||
1425 | */ | ||
1426 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
1427 | regs->flags |= X86_EFLAGS_TF; | ||
1428 | |||
1486 | /* do the secure computing check first */ | 1429 | /* do the secure computing check first */ |
1487 | secure_computing(regs->orig_ax); | 1430 | secure_computing(regs->orig_ax); |
1488 | 1431 | ||
1489 | if (test_thread_flag(TIF_SYSCALL_TRACE) | 1432 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
1490 | && (current->ptrace & PT_PTRACED)) | 1433 | ret = -1L; |
1434 | |||
1435 | if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1491 | syscall_trace(regs); | 1436 | syscall_trace(regs); |
1492 | 1437 | ||
1493 | if (unlikely(current->audit_context)) { | 1438 | if (unlikely(current->audit_context)) { |
1494 | if (test_thread_flag(TIF_IA32)) { | 1439 | if (IS_IA32) |
1495 | audit_syscall_entry(AUDIT_ARCH_I386, | 1440 | audit_syscall_entry(AUDIT_ARCH_I386, |
1496 | regs->orig_ax, | 1441 | regs->orig_ax, |
1497 | regs->bx, regs->cx, | 1442 | regs->bx, regs->cx, |
1498 | regs->dx, regs->si); | 1443 | regs->dx, regs->si); |
1499 | } else { | 1444 | #ifdef CONFIG_X86_64 |
1445 | else | ||
1500 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1446 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1501 | regs->orig_ax, | 1447 | regs->orig_ax, |
1502 | regs->di, regs->si, | 1448 | regs->di, regs->si, |
1503 | regs->dx, regs->r10); | 1449 | regs->dx, regs->r10); |
1504 | } | 1450 | #endif |
1505 | } | 1451 | } |
1452 | |||
1453 | return ret ?: regs->orig_ax; | ||
1506 | } | 1454 | } |
1507 | 1455 | ||
1508 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | 1456 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
1509 | { | 1457 | { |
1510 | if (unlikely(current->audit_context)) | 1458 | if (unlikely(current->audit_context)) |
1511 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1459 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1512 | 1460 | ||
1513 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | 1461 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1514 | || test_thread_flag(TIF_SINGLESTEP)) | ||
1515 | && (current->ptrace & PT_PTRACED)) | ||
1516 | syscall_trace(regs); | 1462 | syscall_trace(regs); |
1517 | } | ||
1518 | 1463 | ||
1519 | #endif /* CONFIG_X86_32 */ | 1464 | /* |
1465 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
1466 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
1467 | * We already reported this syscall instruction in | ||
1468 | * syscall_trace_enter(), so don't do any more now. | ||
1469 | */ | ||
1470 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
1471 | return; | ||
1472 | |||
1473 | /* | ||
1474 | * If we are single-stepping, synthesize a trap to follow the | ||
1475 | * system call instruction. | ||
1476 | */ | ||
1477 | if (test_thread_flag(TIF_SINGLESTEP) && | ||
1478 | (current->ptrace & PT_PTRACED)) | ||
1479 | send_sigtrap(current, regs, 0); | ||
1480 | } | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e151..06a9f643817e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
177 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), | 177 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), |
178 | }, | 178 | }, |
179 | }, | 179 | }, |
180 | { /* Handle problems with rebooting on Dell T5400's */ | ||
181 | .callback = set_bios_reboot, | ||
182 | .ident = "Dell Precision T5400", | ||
183 | .matches = { | ||
184 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
185 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), | ||
186 | }, | ||
187 | }, | ||
180 | { /* Handle problems with rebooting on HP laptops */ | 188 | { /* Handle problems with rebooting on HP laptops */ |
181 | .callback = set_bios_reboot, | 189 | .callback = set_bios_reboot, |
182 | .ident = "HP Compaq Laptop", | 190 | .ident = "HP Compaq Laptop", |
@@ -403,24 +411,28 @@ void native_machine_shutdown(void) | |||
403 | { | 411 | { |
404 | /* Stop the cpus and apics */ | 412 | /* Stop the cpus and apics */ |
405 | #ifdef CONFIG_SMP | 413 | #ifdef CONFIG_SMP |
406 | int reboot_cpu_id; | ||
407 | 414 | ||
408 | /* The boot cpu is always logical cpu 0 */ | 415 | /* The boot cpu is always logical cpu 0 */ |
409 | reboot_cpu_id = 0; | 416 | int reboot_cpu_id = 0; |
417 | cpumask_of_cpu_ptr(newmask, reboot_cpu_id); | ||
410 | 418 | ||
411 | #ifdef CONFIG_X86_32 | 419 | #ifdef CONFIG_X86_32 |
412 | /* See if there has been given a command line override */ | 420 | /* See if there has been given a command line override */ |
413 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && | 421 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && |
414 | cpu_online(reboot_cpu)) | 422 | cpu_online(reboot_cpu)) { |
415 | reboot_cpu_id = reboot_cpu; | 423 | reboot_cpu_id = reboot_cpu; |
424 | cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); | ||
425 | } | ||
416 | #endif | 426 | #endif |
417 | 427 | ||
418 | /* Make certain the cpu I'm about to reboot on is online */ | 428 | /* Make certain the cpu I'm about to reboot on is online */ |
419 | if (!cpu_online(reboot_cpu_id)) | 429 | if (!cpu_online(reboot_cpu_id)) { |
420 | reboot_cpu_id = smp_processor_id(); | 430 | reboot_cpu_id = smp_processor_id(); |
431 | cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); | ||
432 | } | ||
421 | 433 | ||
422 | /* Make certain I only run on the appropriate processor */ | 434 | /* Make certain I only run on the appropriate processor */ |
423 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); | 435 | set_cpus_allowed_ptr(current, newmask); |
424 | 436 | ||
425 | /* O.K Now that I'm on the appropriate processor, | 437 | /* O.K Now that I'm on the appropriate processor, |
426 | * stop all of the others. | 438 | * stop all of the others. |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..703310a99023 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -20,11 +20,44 @@ | |||
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
22 | 22 | ||
23 | /* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are | ||
24 | * used to save some data for jumping back | ||
25 | */ | ||
26 | #define DATA(offset) (PAGE_SIZE/2+(offset)) | ||
27 | |||
28 | /* Minimal CPU state */ | ||
29 | #define ESP DATA(0x0) | ||
30 | #define CR0 DATA(0x4) | ||
31 | #define CR3 DATA(0x8) | ||
32 | #define CR4 DATA(0xc) | ||
33 | |||
34 | /* other data */ | ||
35 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
36 | #define CP_PA_PGD DATA(0x14) | ||
37 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
39 | |||
23 | .text | 40 | .text |
24 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
25 | .globl relocate_kernel | 42 | .globl relocate_kernel |
26 | relocate_kernel: | 43 | relocate_kernel: |
27 | movl 8(%esp), %ebp /* list of pages */ | 44 | /* Save the CPU context, used for jumping back */ |
45 | |||
46 | pushl %ebx | ||
47 | pushl %esi | ||
48 | pushl %edi | ||
49 | pushl %ebp | ||
50 | pushf | ||
51 | |||
52 | movl 20+8(%esp), %ebp /* list of pages */ | ||
53 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
54 | movl %esp, ESP(%edi) | ||
55 | movl %cr0, %eax | ||
56 | movl %eax, CR0(%edi) | ||
57 | movl %cr3, %eax | ||
58 | movl %eax, CR3(%edi) | ||
59 | movl %cr4, %eax | ||
60 | movl %eax, CR4(%edi) | ||
28 | 61 | ||
29 | #ifdef CONFIG_X86_PAE | 62 | #ifdef CONFIG_X86_PAE |
30 | /* map the control page at its virtual address */ | 63 | /* map the control page at its virtual address */ |
@@ -138,15 +171,25 @@ relocate_kernel: | |||
138 | 171 | ||
139 | relocate_new_kernel: | 172 | relocate_new_kernel: |
140 | /* read the arguments and say goodbye to the stack */ | 173 | /* read the arguments and say goodbye to the stack */ |
141 | movl 4(%esp), %ebx /* page_list */ | 174 | movl 20+4(%esp), %ebx /* page_list */ |
142 | movl 8(%esp), %ebp /* list of pages */ | 175 | movl 20+8(%esp), %ebp /* list of pages */ |
143 | movl 12(%esp), %edx /* start address */ | 176 | movl 20+12(%esp), %edx /* start address */ |
144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 177 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
178 | movl 20+20(%esp), %esi /* preserve_context */ | ||
145 | 179 | ||
146 | /* zero out flags, and disable interrupts */ | 180 | /* zero out flags, and disable interrupts */ |
147 | pushl $0 | 181 | pushl $0 |
148 | popfl | 182 | popfl |
149 | 183 | ||
184 | /* save some information for jumping back */ | ||
185 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
186 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
187 | movl PTR(PA_PGD)(%ebp), %eax | ||
188 | movl %eax, CP_PA_PGD(%edi) | ||
189 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
190 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
191 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
192 | |||
150 | /* get physical address of control page now */ | 193 | /* get physical address of control page now */ |
151 | /* this is impossible after page table switch */ | 194 | /* this is impossible after page table switch */ |
152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 195 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
@@ -197,8 +240,90 @@ identity_mapped: | |||
197 | xorl %eax, %eax | 240 | xorl %eax, %eax |
198 | movl %eax, %cr3 | 241 | movl %eax, %cr3 |
199 | 242 | ||
243 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
244 | pushl %eax | ||
245 | pushl %ebx | ||
246 | call swap_pages | ||
247 | addl $8, %esp | ||
248 | |||
249 | /* To be certain of avoiding problems with self-modifying code | ||
250 | * I need to execute a serializing instruction here. | ||
251 | * So I flush the TLB, it's handy, and not processor dependent. | ||
252 | */ | ||
253 | xorl %eax, %eax | ||
254 | movl %eax, %cr3 | ||
255 | |||
256 | /* set all of the registers to known values */ | ||
257 | /* leave %esp alone */ | ||
258 | |||
259 | testl %esi, %esi | ||
260 | jnz 1f | ||
261 | xorl %edi, %edi | ||
262 | xorl %eax, %eax | ||
263 | xorl %ebx, %ebx | ||
264 | xorl %ecx, %ecx | ||
265 | xorl %edx, %edx | ||
266 | xorl %esi, %esi | ||
267 | xorl %ebp, %ebp | ||
268 | ret | ||
269 | 1: | ||
270 | popl %edx | ||
271 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
272 | addl $PAGE_SIZE, %esp | ||
273 | 2: | ||
274 | call *%edx | ||
275 | |||
276 | /* get the re-entry point of the peer system */ | ||
277 | movl 0(%esp), %ebp | ||
278 | call 1f | ||
279 | 1: | ||
280 | popl %ebx | ||
281 | subl $(1b - relocate_kernel), %ebx | ||
282 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
283 | lea PAGE_SIZE(%ebx), %esp | ||
284 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
285 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
286 | pushl %eax | ||
287 | pushl %edx | ||
288 | call swap_pages | ||
289 | addl $8, %esp | ||
290 | movl CP_PA_PGD(%ebx), %eax | ||
291 | movl %eax, %cr3 | ||
292 | movl %cr0, %eax | ||
293 | orl $(1<<31), %eax | ||
294 | movl %eax, %cr0 | ||
295 | lea PAGE_SIZE(%edi), %esp | ||
296 | movl %edi, %eax | ||
297 | addl $(virtual_mapped - relocate_kernel), %eax | ||
298 | pushl %eax | ||
299 | ret | ||
300 | |||
301 | virtual_mapped: | ||
302 | movl CR4(%edi), %eax | ||
303 | movl %eax, %cr4 | ||
304 | movl CR3(%edi), %eax | ||
305 | movl %eax, %cr3 | ||
306 | movl CR0(%edi), %eax | ||
307 | movl %eax, %cr0 | ||
308 | movl ESP(%edi), %esp | ||
309 | movl %ebp, %eax | ||
310 | |||
311 | popf | ||
312 | popl %ebp | ||
313 | popl %edi | ||
314 | popl %esi | ||
315 | popl %ebx | ||
316 | ret | ||
317 | |||
200 | /* Do the copies */ | 318 | /* Do the copies */ |
201 | movl %ebx, %ecx | 319 | swap_pages: |
320 | movl 8(%esp), %edx | ||
321 | movl 4(%esp), %ecx | ||
322 | pushl %ebp | ||
323 | pushl %ebx | ||
324 | pushl %edi | ||
325 | pushl %esi | ||
326 | movl %ecx, %ebx | ||
202 | jmp 1f | 327 | jmp 1f |
203 | 328 | ||
204 | 0: /* top, read another word from the indirection page */ | 329 | 0: /* top, read another word from the indirection page */ |
@@ -226,27 +351,28 @@ identity_mapped: | |||
226 | movl %ecx, %esi /* For every source page do a copy */ | 351 | movl %ecx, %esi /* For every source page do a copy */ |
227 | andl $0xfffff000, %esi | 352 | andl $0xfffff000, %esi |
228 | 353 | ||
354 | movl %edi, %eax | ||
355 | movl %esi, %ebp | ||
356 | |||
357 | movl %edx, %edi | ||
229 | movl $1024, %ecx | 358 | movl $1024, %ecx |
230 | rep ; movsl | 359 | rep ; movsl |
231 | jmp 0b | ||
232 | 360 | ||
233 | 3: | 361 | movl %ebp, %edi |
234 | 362 | movl %eax, %esi | |
235 | /* To be certain of avoiding problems with self-modifying code | 363 | movl $1024, %ecx |
236 | * I need to execute a serializing instruction here. | 364 | rep ; movsl |
237 | * So I flush the TLB, it's handy, and not processor dependent. | ||
238 | */ | ||
239 | xorl %eax, %eax | ||
240 | movl %eax, %cr3 | ||
241 | 365 | ||
242 | /* set all of the registers to known values */ | 366 | movl %eax, %edi |
243 | /* leave %esp alone */ | 367 | movl %edx, %esi |
368 | movl $1024, %ecx | ||
369 | rep ; movsl | ||
244 | 370 | ||
245 | xorl %eax, %eax | 371 | lea PAGE_SIZE(%ebp), %esi |
246 | xorl %ebx, %ebx | 372 | jmp 0b |
247 | xorl %ecx, %ecx | 373 | 3: |
248 | xorl %edx, %edx | 374 | popl %esi |
249 | xorl %esi, %esi | 375 | popl %edi |
250 | xorl %edi, %edi | 376 | popl %ebx |
251 | xorl %ebp, %ebp | 377 | popl %ebp |
252 | ret | 378 | ret |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81a..b520dae02bf4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -57,12 +57,8 @@ | |||
57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
58 | #include <linux/user.h> | 58 | #include <linux/user.h> |
59 | #include <linux/delay.h> | 59 | #include <linux/delay.h> |
60 | #include <linux/highmem.h> | ||
61 | 60 | ||
62 | #include <linux/kallsyms.h> | 61 | #include <linux/kallsyms.h> |
63 | #include <linux/edd.h> | ||
64 | #include <linux/iscsi_ibft.h> | ||
65 | #include <linux/kexec.h> | ||
66 | #include <linux/cpufreq.h> | 62 | #include <linux/cpufreq.h> |
67 | #include <linux/dma-mapping.h> | 63 | #include <linux/dma-mapping.h> |
68 | #include <linux/ctype.h> | 64 | #include <linux/ctype.h> |
@@ -96,7 +92,7 @@ | |||
96 | #include <asm/smp.h> | 92 | #include <asm/smp.h> |
97 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
98 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
99 | #include <asm/gart.h> | 95 | #include <asm/iommu.h> |
100 | #include <asm/mmu_context.h> | 96 | #include <asm/mmu_context.h> |
101 | #include <asm/proto.h> | 97 | #include <asm/proto.h> |
102 | 98 | ||
@@ -104,7 +100,6 @@ | |||
104 | #include <asm/paravirt.h> | 100 | #include <asm/paravirt.h> |
105 | 101 | ||
106 | #include <asm/percpu.h> | 102 | #include <asm/percpu.h> |
107 | #include <asm/sections.h> | ||
108 | #include <asm/topology.h> | 103 | #include <asm/topology.h> |
109 | #include <asm/apicdef.h> | 104 | #include <asm/apicdef.h> |
110 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg) | |||
579 | early_param("elfcorehdr", setup_elfcorehdr); | 574 | early_param("elfcorehdr", setup_elfcorehdr); |
580 | #endif | 575 | #endif |
581 | 576 | ||
577 | static struct x86_quirks default_x86_quirks __initdata; | ||
578 | |||
579 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
580 | |||
582 | /* | 581 | /* |
583 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 582 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
584 | * passed the efi memmap, systab, etc., so we should use these data structures | 583 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -598,11 +597,11 @@ void __init setup_arch(char **cmdline_p) | |||
598 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 597 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
599 | visws_early_detect(); | 598 | visws_early_detect(); |
600 | pre_setup_arch_hook(); | 599 | pre_setup_arch_hook(); |
601 | early_cpu_init(); | ||
602 | #else | 600 | #else |
603 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 601 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
604 | #endif | 602 | #endif |
605 | 603 | ||
604 | early_cpu_init(); | ||
606 | early_ioremap_init(); | 605 | early_ioremap_init(); |
607 | 606 | ||
608 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 607 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
@@ -666,9 +665,6 @@ void __init setup_arch(char **cmdline_p) | |||
666 | bss_resource.start = virt_to_phys(&__bss_start); | 665 | bss_resource.start = virt_to_phys(&__bss_start); |
667 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 666 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
668 | 667 | ||
669 | #ifdef CONFIG_X86_64 | ||
670 | early_cpu_init(); | ||
671 | #endif | ||
672 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 668 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
673 | *cmdline_p = command_line; | 669 | *cmdline_p = command_line; |
674 | 670 | ||
@@ -681,7 +677,7 @@ void __init setup_arch(char **cmdline_p) | |||
681 | #ifdef CONFIG_X86_LOCAL_APIC | 677 | #ifdef CONFIG_X86_LOCAL_APIC |
682 | disable_apic = 1; | 678 | disable_apic = 1; |
683 | #endif | 679 | #endif |
684 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 680 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
685 | } | 681 | } |
686 | 682 | ||
687 | #ifdef CONFIG_PCI | 683 | #ifdef CONFIG_PCI |
@@ -824,7 +820,10 @@ void __init setup_arch(char **cmdline_p) | |||
824 | vmi_init(); | 820 | vmi_init(); |
825 | #endif | 821 | #endif |
826 | 822 | ||
823 | paravirt_pagetable_setup_start(swapper_pg_dir); | ||
827 | paging_init(); | 824 | paging_init(); |
825 | paravirt_pagetable_setup_done(swapper_pg_dir); | ||
826 | paravirt_post_allocator_init(); | ||
828 | 827 | ||
829 | #ifdef CONFIG_X86_64 | 828 | #ifdef CONFIG_X86_64 |
830 | map_vsyscall(); | 829 | map_vsyscall(); |
@@ -854,14 +853,6 @@ void __init setup_arch(char **cmdline_p) | |||
854 | init_cpu_to_node(); | 853 | init_cpu_to_node(); |
855 | #endif | 854 | #endif |
856 | 855 | ||
857 | #ifdef CONFIG_X86_NUMAQ | ||
858 | /* | ||
859 | * need to check online nodes num, call it | ||
860 | * here before time_init/tsc_init | ||
861 | */ | ||
862 | numaq_tsc_disable(); | ||
863 | #endif | ||
864 | |||
865 | init_apic_mappings(); | 856 | init_apic_mappings(); |
866 | ioapic_init_mappings(); | 857 | ioapic_init_mappings(); |
867 | 858 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cac68430d31f..f7745f94c006 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -227,8 +227,8 @@ static void __init setup_node_to_cpumask_map(void) | |||
227 | /* allocate the map */ | 227 | /* allocate the map */ |
228 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | 228 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); |
229 | 229 | ||
230 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | 230 | pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", |
231 | map, nr_node_ids); | 231 | map, nr_node_ids); |
232 | 232 | ||
233 | /* node_to_cpumask() will now work */ | 233 | /* node_to_cpumask() will now work */ |
234 | node_to_cpumask_map = map; | 234 | node_to_cpumask_map = map; |
@@ -248,7 +248,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
248 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 248 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
249 | 249 | ||
250 | else | 250 | else |
251 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | 251 | pr_debug("Setting node for non-present cpu %d\n", cpu); |
252 | } | 252 | } |
253 | 253 | ||
254 | void __cpuinit numa_clear_node(int cpu) | 254 | void __cpuinit numa_clear_node(int cpu) |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d92373630963..6fb5bcdd8933 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | |||
212 | 212 | ||
213 | badframe: | 213 | badframe: |
214 | if (show_unhandled_signals && printk_ratelimit()) { | 214 | if (show_unhandled_signals && printk_ratelimit()) { |
215 | printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" | 215 | printk("%s%s[%d] bad frame in sigreturn frame:" |
216 | "%p ip:%lx sp:%lx oeax:%lx", | 216 | "%p ip:%lx sp:%lx oeax:%lx", |
217 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | 217 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, |
218 | current->comm, task_pid_nr(current), frame, regs->ip, | 218 | current->comm, task_pid_nr(current), frame, regs->ip, |
@@ -657,18 +657,9 @@ static void do_signal(struct pt_regs *regs) | |||
657 | void | 657 | void |
658 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 658 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
659 | { | 659 | { |
660 | /* Pending single-step? */ | ||
661 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
662 | regs->flags |= X86_EFLAGS_TF; | ||
663 | clear_thread_flag(TIF_SINGLESTEP); | ||
664 | } | ||
665 | |||
666 | /* deal with pending signal delivery */ | 660 | /* deal with pending signal delivery */ |
667 | if (thread_info_flags & _TIF_SIGPENDING) | 661 | if (thread_info_flags & _TIF_SIGPENDING) |
668 | do_signal(regs); | 662 | do_signal(regs); |
669 | 663 | ||
670 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
671 | hrtick_resched(); | ||
672 | |||
673 | clear_thread_flag(TIF_IRET); | 664 | clear_thread_flag(TIF_IRET); |
674 | } | 665 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index e53b267662e7..b45ef8ddd651 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -53,6 +53,59 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
53 | return do_sigaltstack(uss, uoss, regs->sp); | 53 | return do_sigaltstack(uss, uoss, regs->sp); |
54 | } | 54 | } |
55 | 55 | ||
56 | /* | ||
57 | * Signal frame handlers. | ||
58 | */ | ||
59 | |||
60 | static inline int save_i387(struct _fpstate __user *buf) | ||
61 | { | ||
62 | struct task_struct *tsk = current; | ||
63 | int err = 0; | ||
64 | |||
65 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != | ||
66 | sizeof(tsk->thread.xstate->fxsave)); | ||
67 | |||
68 | if ((unsigned long)buf % 16) | ||
69 | printk("save_i387: bad fpstate %p\n", buf); | ||
70 | |||
71 | if (!used_math()) | ||
72 | return 0; | ||
73 | clear_used_math(); /* trigger finit */ | ||
74 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
75 | err = save_i387_checking((struct i387_fxsave_struct __user *) | ||
76 | buf); | ||
77 | if (err) | ||
78 | return err; | ||
79 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
80 | stts(); | ||
81 | } else { | ||
82 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | ||
83 | sizeof(struct i387_fxsave_struct))) | ||
84 | return -1; | ||
85 | } | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * This restores directly out of user space. Exceptions are handled. | ||
91 | */ | ||
92 | static inline int restore_i387(struct _fpstate __user *buf) | ||
93 | { | ||
94 | struct task_struct *tsk = current; | ||
95 | int err; | ||
96 | |||
97 | if (!used_math()) { | ||
98 | err = init_fpu(tsk); | ||
99 | if (err) | ||
100 | return err; | ||
101 | } | ||
102 | |||
103 | if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
104 | clts(); | ||
105 | task_thread_info(current)->status |= TS_USEDFPU; | ||
106 | } | ||
107 | return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); | ||
108 | } | ||
56 | 109 | ||
57 | /* | 110 | /* |
58 | * Do a signal return; undo the signal stack. | 111 | * Do a signal return; undo the signal stack. |
@@ -487,12 +540,6 @@ static void do_signal(struct pt_regs *regs) | |||
487 | void do_notify_resume(struct pt_regs *regs, void *unused, | 540 | void do_notify_resume(struct pt_regs *regs, void *unused, |
488 | __u32 thread_info_flags) | 541 | __u32 thread_info_flags) |
489 | { | 542 | { |
490 | /* Pending single-step? */ | ||
491 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
492 | regs->flags |= X86_EFLAGS_TF; | ||
493 | clear_thread_flag(TIF_SINGLESTEP); | ||
494 | } | ||
495 | |||
496 | #ifdef CONFIG_X86_MCE | 543 | #ifdef CONFIG_X86_MCE |
497 | /* notify userspace of pending MCEs */ | 544 | /* notify userspace of pending MCEs */ |
498 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 545 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
@@ -502,9 +549,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
502 | /* deal with pending signal delivery */ | 549 | /* deal with pending signal delivery */ |
503 | if (thread_info_flags & _TIF_SIGPENDING) | 550 | if (thread_info_flags & _TIF_SIGPENDING) |
504 | do_signal(regs); | 551 | do_signal(regs); |
505 | |||
506 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
507 | hrtick_resched(); | ||
508 | } | 552 | } |
509 | 553 | ||
510 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 554 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e8..332512767f4f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -216,7 +216,7 @@ static void __cpuinit smp_callin(void) | |||
216 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 216 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
217 | phys_id, cpuid); | 217 | phys_id, cpuid); |
218 | } | 218 | } |
219 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | 219 | pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); |
220 | 220 | ||
221 | /* | 221 | /* |
222 | * STARTUP IPIs are fragile beasts as they might sometimes | 222 | * STARTUP IPIs are fragile beasts as they might sometimes |
@@ -251,7 +251,7 @@ static void __cpuinit smp_callin(void) | |||
251 | * boards) | 251 | * boards) |
252 | */ | 252 | */ |
253 | 253 | ||
254 | Dprintk("CALLIN, before setup_local_APIC().\n"); | 254 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
255 | smp_callin_clear_local_apic(); | 255 | smp_callin_clear_local_apic(); |
256 | setup_local_APIC(); | 256 | setup_local_APIC(); |
257 | end_local_APIC_setup(); | 257 | end_local_APIC_setup(); |
@@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void) | |||
266 | local_irq_enable(); | 266 | local_irq_enable(); |
267 | calibrate_delay(); | 267 | calibrate_delay(); |
268 | local_irq_disable(); | 268 | local_irq_disable(); |
269 | Dprintk("Stack at about %p\n", &cpuid); | 269 | pr_debug("Stack at about %p\n", &cpuid); |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * Save our processor parameters | 272 | * Save our processor parameters |
@@ -438,7 +438,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
438 | cpu_set(cpu, cpu_sibling_setup_map); | 438 | cpu_set(cpu, cpu_sibling_setup_map); |
439 | 439 | ||
440 | if (smp_num_siblings > 1) { | 440 | if (smp_num_siblings > 1) { |
441 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 441 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && | 442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
443 | c->cpu_core_id == cpu_data(i).cpu_core_id) { | 443 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
444 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); | 444 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
@@ -461,7 +461,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
461 | return; | 461 | return; |
462 | } | 462 | } |
463 | 463 | ||
464 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 464 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
467 | cpu_set(i, c->llc_shared_map); | 467 | cpu_set(i, c->llc_shared_map); |
@@ -513,7 +513,7 @@ static void impress_friends(void) | |||
513 | /* | 513 | /* |
514 | * Allow the user to impress friends. | 514 | * Allow the user to impress friends. |
515 | */ | 515 | */ |
516 | Dprintk("Before bogomips.\n"); | 516 | pr_debug("Before bogomips.\n"); |
517 | for_each_possible_cpu(cpu) | 517 | for_each_possible_cpu(cpu) |
518 | if (cpu_isset(cpu, cpu_callout_map)) | 518 | if (cpu_isset(cpu, cpu_callout_map)) |
519 | bogosum += cpu_data(cpu).loops_per_jiffy; | 519 | bogosum += cpu_data(cpu).loops_per_jiffy; |
@@ -523,7 +523,7 @@ static void impress_friends(void) | |||
523 | bogosum/(500000/HZ), | 523 | bogosum/(500000/HZ), |
524 | (bogosum/(5000/HZ))%100); | 524 | (bogosum/(5000/HZ))%100); |
525 | 525 | ||
526 | Dprintk("Before bogocount - setting activated=1.\n"); | 526 | pr_debug("Before bogocount - setting activated=1.\n"); |
527 | } | 527 | } |
528 | 528 | ||
529 | static inline void __inquire_remote_apic(int apicid) | 529 | static inline void __inquire_remote_apic(int apicid) |
@@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid) | |||
546 | printk(KERN_CONT | 546 | printk(KERN_CONT |
547 | "a previous APIC delivery may have failed\n"); | 547 | "a previous APIC delivery may have failed\n"); |
548 | 548 | ||
549 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 549 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
550 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | 550 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); |
551 | 551 | ||
552 | timeout = 0; | 552 | timeout = 0; |
553 | do { | 553 | do { |
@@ -579,29 +579,24 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
579 | int maxlvt; | 579 | int maxlvt; |
580 | 580 | ||
581 | /* Target chip */ | 581 | /* Target chip */ |
582 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | 582 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); |
583 | 583 | ||
584 | /* Boot on the stack */ | 584 | /* Boot on the stack */ |
585 | /* Kick the second */ | 585 | /* Kick the second */ |
586 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | 586 | apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); |
587 | 587 | ||
588 | Dprintk("Waiting for send to finish...\n"); | 588 | pr_debug("Waiting for send to finish...\n"); |
589 | send_status = safe_apic_wait_icr_idle(); | 589 | send_status = safe_apic_wait_icr_idle(); |
590 | 590 | ||
591 | /* | 591 | /* |
592 | * Give the other CPU some time to accept the IPI. | 592 | * Give the other CPU some time to accept the IPI. |
593 | */ | 593 | */ |
594 | udelay(200); | 594 | udelay(200); |
595 | /* | ||
596 | * Due to the Pentium erratum 3AP. | ||
597 | */ | ||
598 | maxlvt = lapic_get_maxlvt(); | 595 | maxlvt = lapic_get_maxlvt(); |
599 | if (maxlvt > 3) { | 596 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
600 | apic_read_around(APIC_SPIV); | ||
601 | apic_write(APIC_ESR, 0); | 597 | apic_write(APIC_ESR, 0); |
602 | } | ||
603 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 598 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
604 | Dprintk("NMI sent.\n"); | 599 | pr_debug("NMI sent.\n"); |
605 | 600 | ||
606 | if (send_status) | 601 | if (send_status) |
607 | printk(KERN_ERR "APIC never delivered???\n"); | 602 | printk(KERN_ERR "APIC never delivered???\n"); |
@@ -625,42 +620,44 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
625 | return send_status; | 620 | return send_status; |
626 | } | 621 | } |
627 | 622 | ||
623 | maxlvt = lapic_get_maxlvt(); | ||
624 | |||
628 | /* | 625 | /* |
629 | * Be paranoid about clearing APIC errors. | 626 | * Be paranoid about clearing APIC errors. |
630 | */ | 627 | */ |
631 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 628 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
632 | apic_read_around(APIC_SPIV); | 629 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
633 | apic_write(APIC_ESR, 0); | 630 | apic_write(APIC_ESR, 0); |
634 | apic_read(APIC_ESR); | 631 | apic_read(APIC_ESR); |
635 | } | 632 | } |
636 | 633 | ||
637 | Dprintk("Asserting INIT.\n"); | 634 | pr_debug("Asserting INIT.\n"); |
638 | 635 | ||
639 | /* | 636 | /* |
640 | * Turn INIT on target chip | 637 | * Turn INIT on target chip |
641 | */ | 638 | */ |
642 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 639 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
643 | 640 | ||
644 | /* | 641 | /* |
645 | * Send IPI | 642 | * Send IPI |
646 | */ | 643 | */ |
647 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | 644 | apic_write(APIC_ICR, |
648 | | APIC_DM_INIT); | 645 | APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); |
649 | 646 | ||
650 | Dprintk("Waiting for send to finish...\n"); | 647 | pr_debug("Waiting for send to finish...\n"); |
651 | send_status = safe_apic_wait_icr_idle(); | 648 | send_status = safe_apic_wait_icr_idle(); |
652 | 649 | ||
653 | mdelay(10); | 650 | mdelay(10); |
654 | 651 | ||
655 | Dprintk("Deasserting INIT.\n"); | 652 | pr_debug("Deasserting INIT.\n"); |
656 | 653 | ||
657 | /* Target chip */ | 654 | /* Target chip */ |
658 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 655 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
659 | 656 | ||
660 | /* Send IPI */ | 657 | /* Send IPI */ |
661 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 658 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
662 | 659 | ||
663 | Dprintk("Waiting for send to finish...\n"); | 660 | pr_debug("Waiting for send to finish...\n"); |
664 | send_status = safe_apic_wait_icr_idle(); | 661 | send_status = safe_apic_wait_icr_idle(); |
665 | 662 | ||
666 | mb(); | 663 | mb(); |
@@ -687,55 +684,47 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
687 | /* | 684 | /* |
688 | * Run STARTUP IPI loop. | 685 | * Run STARTUP IPI loop. |
689 | */ | 686 | */ |
690 | Dprintk("#startup loops: %d.\n", num_starts); | 687 | pr_debug("#startup loops: %d.\n", num_starts); |
691 | |||
692 | maxlvt = lapic_get_maxlvt(); | ||
693 | 688 | ||
694 | for (j = 1; j <= num_starts; j++) { | 689 | for (j = 1; j <= num_starts; j++) { |
695 | Dprintk("Sending STARTUP #%d.\n", j); | 690 | pr_debug("Sending STARTUP #%d.\n", j); |
696 | apic_read_around(APIC_SPIV); | 691 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
697 | apic_write(APIC_ESR, 0); | 692 | apic_write(APIC_ESR, 0); |
698 | apic_read(APIC_ESR); | 693 | apic_read(APIC_ESR); |
699 | Dprintk("After apic_write.\n"); | 694 | pr_debug("After apic_write.\n"); |
700 | 695 | ||
701 | /* | 696 | /* |
702 | * STARTUP IPI | 697 | * STARTUP IPI |
703 | */ | 698 | */ |
704 | 699 | ||
705 | /* Target chip */ | 700 | /* Target chip */ |
706 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 701 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
707 | 702 | ||
708 | /* Boot on the stack */ | 703 | /* Boot on the stack */ |
709 | /* Kick the second */ | 704 | /* Kick the second */ |
710 | apic_write_around(APIC_ICR, APIC_DM_STARTUP | 705 | apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); |
711 | | (start_eip >> 12)); | ||
712 | 706 | ||
713 | /* | 707 | /* |
714 | * Give the other CPU some time to accept the IPI. | 708 | * Give the other CPU some time to accept the IPI. |
715 | */ | 709 | */ |
716 | udelay(300); | 710 | udelay(300); |
717 | 711 | ||
718 | Dprintk("Startup point 1.\n"); | 712 | pr_debug("Startup point 1.\n"); |
719 | 713 | ||
720 | Dprintk("Waiting for send to finish...\n"); | 714 | pr_debug("Waiting for send to finish...\n"); |
721 | send_status = safe_apic_wait_icr_idle(); | 715 | send_status = safe_apic_wait_icr_idle(); |
722 | 716 | ||
723 | /* | 717 | /* |
724 | * Give the other CPU some time to accept the IPI. | 718 | * Give the other CPU some time to accept the IPI. |
725 | */ | 719 | */ |
726 | udelay(200); | 720 | udelay(200); |
727 | /* | 721 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
728 | * Due to the Pentium erratum 3AP. | ||
729 | */ | ||
730 | if (maxlvt > 3) { | ||
731 | apic_read_around(APIC_SPIV); | ||
732 | apic_write(APIC_ESR, 0); | 722 | apic_write(APIC_ESR, 0); |
733 | } | ||
734 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 723 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
735 | if (send_status || accept_status) | 724 | if (send_status || accept_status) |
736 | break; | 725 | break; |
737 | } | 726 | } |
738 | Dprintk("After Startup.\n"); | 727 | pr_debug("After Startup.\n"); |
739 | 728 | ||
740 | if (send_status) | 729 | if (send_status) |
741 | printk(KERN_ERR "APIC never delivered???\n"); | 730 | printk(KERN_ERR "APIC never delivered???\n"); |
@@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
768 | * | 757 | * |
769 | * Must be called after the _cpu_pda pointer table is initialized. | 758 | * Must be called after the _cpu_pda pointer table is initialized. |
770 | */ | 759 | */ |
771 | static int __cpuinit get_local_pda(int cpu) | 760 | int __cpuinit get_local_pda(int cpu) |
772 | { | 761 | { |
773 | struct x8664_pda *oldpda, *newpda; | 762 | struct x8664_pda *oldpda, *newpda; |
774 | unsigned long size = sizeof(struct x8664_pda); | 763 | unsigned long size = sizeof(struct x8664_pda); |
@@ -886,7 +875,7 @@ do_rest: | |||
886 | 875 | ||
887 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 876 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
888 | 877 | ||
889 | Dprintk("Setting warm reset code and vector.\n"); | 878 | pr_debug("Setting warm reset code and vector.\n"); |
890 | 879 | ||
891 | store_NMI_vector(&nmi_high, &nmi_low); | 880 | store_NMI_vector(&nmi_high, &nmi_low); |
892 | 881 | ||
@@ -907,9 +896,9 @@ do_rest: | |||
907 | /* | 896 | /* |
908 | * allow APs to start initializing. | 897 | * allow APs to start initializing. |
909 | */ | 898 | */ |
910 | Dprintk("Before Callout %d.\n", cpu); | 899 | pr_debug("Before Callout %d.\n", cpu); |
911 | cpu_set(cpu, cpu_callout_map); | 900 | cpu_set(cpu, cpu_callout_map); |
912 | Dprintk("After Callout %d.\n", cpu); | 901 | pr_debug("After Callout %d.\n", cpu); |
913 | 902 | ||
914 | /* | 903 | /* |
915 | * Wait 5s total for a response | 904 | * Wait 5s total for a response |
@@ -922,10 +911,10 @@ do_rest: | |||
922 | 911 | ||
923 | if (cpu_isset(cpu, cpu_callin_map)) { | 912 | if (cpu_isset(cpu, cpu_callin_map)) { |
924 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 913 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
925 | Dprintk("OK.\n"); | 914 | pr_debug("OK.\n"); |
926 | printk(KERN_INFO "CPU%d: ", cpu); | 915 | printk(KERN_INFO "CPU%d: ", cpu); |
927 | print_cpu_info(&cpu_data(cpu)); | 916 | print_cpu_info(&cpu_data(cpu)); |
928 | Dprintk("CPU has booted.\n"); | 917 | pr_debug("CPU has booted.\n"); |
929 | } else { | 918 | } else { |
930 | boot_error = 1; | 919 | boot_error = 1; |
931 | if (*((volatile unsigned char *)trampoline_base) | 920 | if (*((volatile unsigned char *)trampoline_base) |
@@ -970,7 +959,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
970 | 959 | ||
971 | WARN_ON(irqs_disabled()); | 960 | WARN_ON(irqs_disabled()); |
972 | 961 | ||
973 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 962 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
974 | 963 | ||
975 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 964 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
976 | !physid_isset(apicid, phys_cpu_present_map)) { | 965 | !physid_isset(apicid, phys_cpu_present_map)) { |
@@ -982,7 +971,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
982 | * Already booted CPU? | 971 | * Already booted CPU? |
983 | */ | 972 | */ |
984 | if (cpu_isset(cpu, cpu_callin_map)) { | 973 | if (cpu_isset(cpu, cpu_callin_map)) { |
985 | Dprintk("do_boot_cpu %d Already started\n", cpu); | 974 | pr_debug("do_boot_cpu %d Already started\n", cpu); |
986 | return -ENOSYS; | 975 | return -ENOSYS; |
987 | } | 976 | } |
988 | 977 | ||
@@ -1009,7 +998,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
1009 | err = do_boot_cpu(apicid, cpu); | 998 | err = do_boot_cpu(apicid, cpu); |
1010 | #endif | 999 | #endif |
1011 | if (err) { | 1000 | if (err) { |
1012 | Dprintk("do_boot_cpu failed %d\n", err); | 1001 | pr_debug("do_boot_cpu failed %d\n", err); |
1013 | return -EIO; | 1002 | return -EIO; |
1014 | } | 1003 | } |
1015 | 1004 | ||
@@ -1213,7 +1202,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1213 | 1202 | ||
1214 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1203 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1215 | { | 1204 | { |
1216 | Dprintk("Boot done.\n"); | 1205 | pr_debug("Boot done.\n"); |
1217 | 1206 | ||
1218 | impress_friends(); | 1207 | impress_friends(); |
1219 | smp_checks(); | 1208 | smp_checks(); |
@@ -1230,7 +1219,7 @@ static void remove_siblinginfo(int cpu) | |||
1230 | int sibling; | 1219 | int sibling; |
1231 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1220 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1232 | 1221 | ||
1233 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { | 1222 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { |
1234 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | 1223 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); |
1235 | /*/ | 1224 | /*/ |
1236 | * last thread sibling in this cpu core going down | 1225 | * last thread sibling in this cpu core going down |
@@ -1239,7 +1228,7 @@ static void remove_siblinginfo(int cpu) | |||
1239 | cpu_data(sibling).booted_cores--; | 1228 | cpu_data(sibling).booted_cores--; |
1240 | } | 1229 | } |
1241 | 1230 | ||
1242 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) | 1231 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) |
1243 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | 1232 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); |
1244 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 1233 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
1245 | cpus_clear(per_cpu(cpu_core_map, cpu)); | 1234 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
@@ -1311,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1311 | cpu_clear(cpu, cpu_callout_map); | 1300 | cpu_clear(cpu, cpu_callout_map); |
1312 | cpu_clear(cpu, cpu_callin_map); | 1301 | cpu_clear(cpu, cpu_callin_map); |
1313 | /* was set by cpu_init() */ | 1302 | /* was set by cpu_init() */ |
1314 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1303 | cpu_clear(cpu, cpu_initialized); |
1315 | numa_remove_cpu(cpu); | 1304 | numa_remove_cpu(cpu); |
1316 | } | 1305 | } |
1317 | 1306 | ||
@@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg) | |||
1390 | { | 1379 | { |
1391 | extern unsigned int maxcpus; | 1380 | extern unsigned int maxcpus; |
1392 | 1381 | ||
1393 | maxcpus = simple_strtoul(arg, NULL, 0); | 1382 | if (arg) |
1383 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
1394 | return 0; | 1384 | return 0; |
1395 | } | 1385 | } |
1396 | early_param("maxcpus", parse_maxcpus); | 1386 | early_param("maxcpus", parse_maxcpus); |
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index 8b137891791f..000000000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | |||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 92c20fee6781..e8b9863ef8c4 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
105 | static int enable_single_step(struct task_struct *child) | 105 | static int enable_single_step(struct task_struct *child) |
106 | { | 106 | { |
107 | struct pt_regs *regs = task_pt_regs(child); | 107 | struct pt_regs *regs = task_pt_regs(child); |
108 | unsigned long oflags; | ||
109 | |||
110 | /* | ||
111 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
112 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
113 | * If user-mode had set TF itself, then it's still clear from | ||
114 | * do_debug() and we need to set it again to restore the user | ||
115 | * state so we don't wrongly set TIF_FORCED_TF below. | ||
116 | * If enable_single_step() was used last and that is what | ||
117 | * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are | ||
118 | * already set and our bookkeeping is fine. | ||
119 | */ | ||
120 | if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) | ||
121 | regs->flags |= X86_EFLAGS_TF; | ||
108 | 122 | ||
109 | /* | 123 | /* |
110 | * Always set TIF_SINGLESTEP - this guarantees that | 124 | * Always set TIF_SINGLESTEP - this guarantees that |
@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child) | |||
113 | */ | 127 | */ |
114 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | 128 | set_tsk_thread_flag(child, TIF_SINGLESTEP); |
115 | 129 | ||
116 | /* | 130 | oflags = regs->flags; |
117 | * If TF was already set, don't do anything else | ||
118 | */ | ||
119 | if (regs->flags & X86_EFLAGS_TF) | ||
120 | return 0; | ||
121 | 131 | ||
122 | /* Set TF on the kernel stack.. */ | 132 | /* Set TF on the kernel stack.. */ |
123 | regs->flags |= X86_EFLAGS_TF; | 133 | regs->flags |= X86_EFLAGS_TF; |
@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child) | |||
126 | * ..but if TF is changed by the instruction we will trace, | 136 | * ..but if TF is changed by the instruction we will trace, |
127 | * don't mark it as being "us" that set it, so that we | 137 | * don't mark it as being "us" that set it, so that we |
128 | * won't clear it by hand later. | 138 | * won't clear it by hand later. |
139 | * | ||
140 | * Note that if we don't actually execute the popf because | ||
141 | * of a signal arriving right now or suchlike, we will lose | ||
142 | * track of the fact that it really was "us" that set it. | ||
129 | */ | 143 | */ |
130 | if (is_setting_trap_flag(child, regs)) | 144 | if (is_setting_trap_flag(child, regs)) { |
145 | clear_tsk_thread_flag(child, TIF_FORCED_TF); | ||
131 | return 0; | 146 | return 0; |
147 | } | ||
148 | |||
149 | /* | ||
150 | * If TF was already set, check whether it was us who set it. | ||
151 | * If not, we should never attempt a block step. | ||
152 | */ | ||
153 | if (oflags & X86_EFLAGS_TF) | ||
154 | return test_tsk_thread_flag(child, TIF_FORCED_TF); | ||
132 | 155 | ||
133 | set_tsk_thread_flag(child, TIF_FORCED_TF); | 156 | set_tsk_thread_flag(child, TIF_FORCED_TF); |
134 | 157 | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index adff5562f5fd..d44395ff34c3 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -326,3 +326,9 @@ ENTRY(sys_call_table) | |||
326 | .long sys_fallocate | 326 | .long sys_fallocate |
327 | .long sys_timerfd_settime /* 325 */ | 327 | .long sys_timerfd_settime /* 325 */ |
328 | .long sys_timerfd_gettime | 328 | .long sys_timerfd_gettime |
329 | .long sys_signalfd4 | ||
330 | .long sys_eventfd2 | ||
331 | .long sys_epoll_create1 | ||
332 | .long sys_dup3 /* 330 */ | ||
333 | .long sys_pipe2 | ||
334 | .long sys_inotify_init1 | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 059ca6ee59b4..ffe3c664afc0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -129,6 +129,7 @@ void __init hpet_time_init(void) | |||
129 | */ | 129 | */ |
130 | void __init time_init(void) | 130 | void __init time_init(void) |
131 | { | 131 | { |
132 | pre_time_init_hook(); | ||
132 | tsc_init(); | 133 | tsc_init(); |
133 | late_time_init = choose_time_init(); | 134 | late_time_init = choose_time_init(); |
134 | } | 135 | } |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8a768973c4f0..03df8e45e5a1 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/nmi.h> | 58 | #include <asm/nmi.h> |
59 | #include <asm/smp.h> | 59 | #include <asm/smp.h> |
60 | #include <asm/io.h> | 60 | #include <asm/io.h> |
61 | #include <asm/traps.h> | ||
61 | 62 | ||
62 | #include "mach_traps.h" | 63 | #include "mach_traps.h" |
63 | 64 | ||
@@ -77,26 +78,6 @@ char ignore_fpu_irq; | |||
77 | gate_desc idt_table[256] | 78 | gate_desc idt_table[256] |
78 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 79 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
79 | 80 | ||
80 | asmlinkage void divide_error(void); | ||
81 | asmlinkage void debug(void); | ||
82 | asmlinkage void nmi(void); | ||
83 | asmlinkage void int3(void); | ||
84 | asmlinkage void overflow(void); | ||
85 | asmlinkage void bounds(void); | ||
86 | asmlinkage void invalid_op(void); | ||
87 | asmlinkage void device_not_available(void); | ||
88 | asmlinkage void coprocessor_segment_overrun(void); | ||
89 | asmlinkage void invalid_TSS(void); | ||
90 | asmlinkage void segment_not_present(void); | ||
91 | asmlinkage void stack_segment(void); | ||
92 | asmlinkage void general_protection(void); | ||
93 | asmlinkage void page_fault(void); | ||
94 | asmlinkage void coprocessor_error(void); | ||
95 | asmlinkage void simd_coprocessor_error(void); | ||
96 | asmlinkage void alignment_check(void); | ||
97 | asmlinkage void spurious_interrupt_bug(void); | ||
98 | asmlinkage void machine_check(void); | ||
99 | |||
100 | int panic_on_unrecovered_nmi; | 81 | int panic_on_unrecovered_nmi; |
101 | int kstack_depth_to_print = 24; | 82 | int kstack_depth_to_print = 24; |
102 | static unsigned int code_bytes = 64; | 83 | static unsigned int code_bytes = 64; |
@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = { | |||
256 | 237 | ||
257 | static void | 238 | static void |
258 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 239 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
259 | unsigned long *stack, unsigned long bp, char *log_lvl) | 240 | unsigned long *stack, unsigned long bp, char *log_lvl) |
260 | { | 241 | { |
261 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 242 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
262 | printk("%s =======================\n", log_lvl); | 243 | printk("%s =======================\n", log_lvl); |
@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip) | |||
383 | return ud2 == 0x0b0f; | 364 | return ud2 == 0x0b0f; |
384 | } | 365 | } |
385 | 366 | ||
367 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
368 | static int die_owner = -1; | ||
369 | static unsigned int die_nest_count; | ||
370 | |||
371 | unsigned __kprobes long oops_begin(void) | ||
372 | { | ||
373 | unsigned long flags; | ||
374 | |||
375 | oops_enter(); | ||
376 | |||
377 | if (die_owner != raw_smp_processor_id()) { | ||
378 | console_verbose(); | ||
379 | raw_local_irq_save(flags); | ||
380 | __raw_spin_lock(&die_lock); | ||
381 | die_owner = smp_processor_id(); | ||
382 | die_nest_count = 0; | ||
383 | bust_spinlocks(1); | ||
384 | } else { | ||
385 | raw_local_irq_save(flags); | ||
386 | } | ||
387 | die_nest_count++; | ||
388 | return flags; | ||
389 | } | ||
390 | |||
391 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
392 | { | ||
393 | bust_spinlocks(0); | ||
394 | die_owner = -1; | ||
395 | add_taint(TAINT_DIE); | ||
396 | __raw_spin_unlock(&die_lock); | ||
397 | raw_local_irq_restore(flags); | ||
398 | |||
399 | if (!regs) | ||
400 | return; | ||
401 | |||
402 | if (kexec_should_crash(current)) | ||
403 | crash_kexec(regs); | ||
404 | |||
405 | if (in_interrupt()) | ||
406 | panic("Fatal exception in interrupt"); | ||
407 | |||
408 | if (panic_on_oops) | ||
409 | panic("Fatal exception"); | ||
410 | |||
411 | oops_exit(); | ||
412 | do_exit(signr); | ||
413 | } | ||
414 | |||
386 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | 415 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) |
387 | { | 416 | { |
388 | unsigned short ss; | 417 | unsigned short ss; |
@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
423 | */ | 452 | */ |
424 | void die(const char *str, struct pt_regs *regs, long err) | 453 | void die(const char *str, struct pt_regs *regs, long err) |
425 | { | 454 | { |
426 | static struct { | 455 | unsigned long flags = oops_begin(); |
427 | raw_spinlock_t lock; | ||
428 | u32 lock_owner; | ||
429 | int lock_owner_depth; | ||
430 | } die = { | ||
431 | .lock = __RAW_SPIN_LOCK_UNLOCKED, | ||
432 | .lock_owner = -1, | ||
433 | .lock_owner_depth = 0 | ||
434 | }; | ||
435 | unsigned long flags; | ||
436 | |||
437 | oops_enter(); | ||
438 | |||
439 | if (die.lock_owner != raw_smp_processor_id()) { | ||
440 | console_verbose(); | ||
441 | raw_local_irq_save(flags); | ||
442 | __raw_spin_lock(&die.lock); | ||
443 | die.lock_owner = smp_processor_id(); | ||
444 | die.lock_owner_depth = 0; | ||
445 | bust_spinlocks(1); | ||
446 | } else { | ||
447 | raw_local_irq_save(flags); | ||
448 | } | ||
449 | 456 | ||
450 | if (++die.lock_owner_depth < 3) { | 457 | if (die_nest_count < 3) { |
451 | report_bug(regs->ip, regs); | 458 | report_bug(regs->ip, regs); |
452 | 459 | ||
453 | if (__die(str, regs, err)) | 460 | if (__die(str, regs, err)) |
@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
456 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | 463 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); |
457 | } | 464 | } |
458 | 465 | ||
459 | bust_spinlocks(0); | 466 | oops_end(flags, regs, SIGSEGV); |
460 | die.lock_owner = -1; | ||
461 | add_taint(TAINT_DIE); | ||
462 | __raw_spin_unlock(&die.lock); | ||
463 | raw_local_irq_restore(flags); | ||
464 | |||
465 | if (!regs) | ||
466 | return; | ||
467 | |||
468 | if (kexec_should_crash(current)) | ||
469 | crash_kexec(regs); | ||
470 | |||
471 | if (in_interrupt()) | ||
472 | panic("Fatal exception in interrupt"); | ||
473 | |||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | |||
477 | oops_exit(); | ||
478 | do_exit(SIGSEGV); | ||
479 | } | 467 | } |
480 | 468 | ||
481 | static inline void | 469 | static inline void |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 2696a6837782..3f18d73f420c 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -51,30 +51,10 @@ | |||
51 | #include <asm/pgalloc.h> | 51 | #include <asm/pgalloc.h> |
52 | #include <asm/proto.h> | 52 | #include <asm/proto.h> |
53 | #include <asm/pda.h> | 53 | #include <asm/pda.h> |
54 | #include <asm/traps.h> | ||
54 | 55 | ||
55 | #include <mach_traps.h> | 56 | #include <mach_traps.h> |
56 | 57 | ||
57 | asmlinkage void divide_error(void); | ||
58 | asmlinkage void debug(void); | ||
59 | asmlinkage void nmi(void); | ||
60 | asmlinkage void int3(void); | ||
61 | asmlinkage void overflow(void); | ||
62 | asmlinkage void bounds(void); | ||
63 | asmlinkage void invalid_op(void); | ||
64 | asmlinkage void device_not_available(void); | ||
65 | asmlinkage void double_fault(void); | ||
66 | asmlinkage void coprocessor_segment_overrun(void); | ||
67 | asmlinkage void invalid_TSS(void); | ||
68 | asmlinkage void segment_not_present(void); | ||
69 | asmlinkage void stack_segment(void); | ||
70 | asmlinkage void general_protection(void); | ||
71 | asmlinkage void page_fault(void); | ||
72 | asmlinkage void coprocessor_error(void); | ||
73 | asmlinkage void simd_coprocessor_error(void); | ||
74 | asmlinkage void alignment_check(void); | ||
75 | asmlinkage void spurious_interrupt_bug(void); | ||
76 | asmlinkage void machine_check(void); | ||
77 | |||
78 | int panic_on_unrecovered_nmi; | 58 | int panic_on_unrecovered_nmi; |
79 | int kstack_depth_to_print = 12; | 59 | int kstack_depth_to_print = 12; |
80 | static unsigned int code_bytes = 64; | 60 | static unsigned int code_bytes = 64; |
@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = { | |||
355 | .address = print_trace_address, | 335 | .address = print_trace_address, |
356 | }; | 336 | }; |
357 | 337 | ||
358 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 338 | static void |
359 | unsigned long *stack, unsigned long bp) | 339 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
340 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
360 | { | 341 | { |
361 | printk("\nCall Trace:\n"); | 342 | printk("\nCall Trace:\n"); |
362 | dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); | 343 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
363 | printk("\n"); | 344 | printk("\n"); |
364 | } | 345 | } |
365 | 346 | ||
347 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
348 | unsigned long *stack, unsigned long bp) | ||
349 | { | ||
350 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
351 | } | ||
352 | |||
366 | static void | 353 | static void |
367 | _show_stack(struct task_struct *task, struct pt_regs *regs, | 354 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
368 | unsigned long *sp, unsigned long bp) | 355 | unsigned long *sp, unsigned long bp, char *log_lvl) |
369 | { | 356 | { |
370 | unsigned long *stack; | 357 | unsigned long *stack; |
371 | int i; | 358 | int i; |
@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs, | |||
399 | printk(" %016lx", *stack++); | 386 | printk(" %016lx", *stack++); |
400 | touch_nmi_watchdog(); | 387 | touch_nmi_watchdog(); |
401 | } | 388 | } |
402 | show_trace(task, regs, sp, bp); | 389 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
403 | } | 390 | } |
404 | 391 | ||
405 | void show_stack(struct task_struct *task, unsigned long *sp) | 392 | void show_stack(struct task_struct *task, unsigned long *sp) |
406 | { | 393 | { |
407 | _show_stack(task, NULL, sp, 0); | 394 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
408 | } | 395 | } |
409 | 396 | ||
410 | /* | 397 | /* |
@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs) | |||
454 | u8 *ip; | 441 | u8 *ip; |
455 | 442 | ||
456 | printk("Stack: "); | 443 | printk("Stack: "); |
457 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | 444 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
445 | regs->bp, ""); | ||
458 | printk("\n"); | 446 | printk("\n"); |
459 | 447 | ||
460 | printk(KERN_EMERG "Code: "); | 448 | printk(KERN_EMERG "Code: "); |
@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void) | |||
518 | } | 506 | } |
519 | 507 | ||
520 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | 508 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
521 | { | 509 | { |
522 | die_owner = -1; | 510 | die_owner = -1; |
523 | bust_spinlocks(0); | 511 | bust_spinlocks(0); |
524 | die_nest_count--; | 512 | die_nest_count--; |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e94bdb6add1d..41e01b145c48 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -73,7 +73,7 @@ int is_visws_box(void) | |||
73 | return visws_board_type >= 0; | 73 | return visws_board_type >= 0; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int __init visws_time_init_quirk(void) | 76 | static int __init visws_time_init(void) |
77 | { | 77 | { |
78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
79 | 79 | ||
@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void) | |||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int __init visws_pre_intr_init_quirk(void) | 96 | static int __init visws_pre_intr_init(void) |
97 | { | 97 | { |
98 | init_VISWS_APIC_irqs(); | 98 | init_VISWS_APIC_irqs(); |
99 | 99 | ||
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); | |||
114 | 114 | ||
115 | long long mem_size __initdata = 0; | 115 | long long mem_size __initdata = 0; |
116 | 116 | ||
117 | static char * __init visws_memory_setup_quirk(void) | 117 | static char * __init visws_memory_setup(void) |
118 | { | 118 | { |
119 | long long gfx_mem_size = 8 * MB; | 119 | long long gfx_mem_size = 8 * MB; |
120 | 120 | ||
@@ -176,7 +176,7 @@ static void visws_machine_power_off(void) | |||
176 | outl(PIIX_SPECIAL_STOP, 0xCFC); | 176 | outl(PIIX_SPECIAL_STOP, 0xCFC); |
177 | } | 177 | } |
178 | 178 | ||
179 | static int __init visws_get_smp_config_quirk(unsigned int early) | 179 | static int __init visws_get_smp_config(unsigned int early) |
180 | { | 180 | { |
181 | /* | 181 | /* |
182 | * Prevent MP-table parsing by the generic code: | 182 | * Prevent MP-table parsing by the generic code: |
@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus; | |||
192 | * No problem for Linux. | 192 | * No problem for Linux. |
193 | */ | 193 | */ |
194 | 194 | ||
195 | static void __init MP_processor_info (struct mpc_config_processor *m) | 195 | static void __init MP_processor_info(struct mpc_config_processor *m) |
196 | { | 196 | { |
197 | int ver, logical_apicid; | 197 | int ver, logical_apicid; |
198 | physid_mask_t apic_cpus; | 198 | physid_mask_t apic_cpus; |
@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
232 | apic_version[m->mpc_apicid] = ver; | 232 | apic_version[m->mpc_apicid] = ver; |
233 | } | 233 | } |
234 | 234 | ||
235 | int __init visws_find_smp_config_quirk(unsigned int reserve) | 235 | static int __init visws_find_smp_config(unsigned int reserve) |
236 | { | 236 | { |
237 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 237 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
238 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 238 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) | |||
258 | return 1; | 258 | return 1; |
259 | } | 259 | } |
260 | 260 | ||
261 | extern int visws_trap_init_quirk(void); | 261 | static int visws_trap_init(void); |
262 | |||
263 | static struct x86_quirks visws_x86_quirks __initdata = { | ||
264 | .arch_time_init = visws_time_init, | ||
265 | .arch_pre_intr_init = visws_pre_intr_init, | ||
266 | .arch_memory_setup = visws_memory_setup, | ||
267 | .arch_intr_init = NULL, | ||
268 | .arch_trap_init = visws_trap_init, | ||
269 | .mach_get_smp_config = visws_get_smp_config, | ||
270 | .mach_find_smp_config = visws_find_smp_config, | ||
271 | }; | ||
262 | 272 | ||
263 | void __init visws_early_detect(void) | 273 | void __init visws_early_detect(void) |
264 | { | 274 | { |
@@ -272,16 +282,10 @@ void __init visws_early_detect(void) | |||
272 | 282 | ||
273 | /* | 283 | /* |
274 | * Install special quirks for timer, interrupt and memory setup: | 284 | * Install special quirks for timer, interrupt and memory setup: |
275 | */ | ||
276 | arch_time_init_quirk = visws_time_init_quirk; | ||
277 | arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; | ||
278 | arch_memory_setup_quirk = visws_memory_setup_quirk; | ||
279 | |||
280 | /* | ||
281 | * Fall back to generic behavior for traps: | 285 | * Fall back to generic behavior for traps: |
286 | * Override generic MP-table parsing: | ||
282 | */ | 287 | */ |
283 | arch_intr_init_quirk = NULL; | 288 | x86_quirks = &visws_x86_quirks; |
284 | arch_trap_init_quirk = visws_trap_init_quirk; | ||
285 | 289 | ||
286 | /* | 290 | /* |
287 | * Install reboot quirks: | 291 | * Install reboot quirks: |
@@ -294,12 +298,6 @@ void __init visws_early_detect(void) | |||
294 | */ | 298 | */ |
295 | no_broadcast = 0; | 299 | no_broadcast = 0; |
296 | 300 | ||
297 | /* | ||
298 | * Override generic MP-table parsing: | ||
299 | */ | ||
300 | mach_get_smp_config_quirk = visws_get_smp_config_quirk; | ||
301 | mach_find_smp_config_quirk = visws_find_smp_config_quirk; | ||
302 | |||
303 | #ifdef CONFIG_X86_IO_APIC | 301 | #ifdef CONFIG_X86_IO_APIC |
304 | /* | 302 | /* |
305 | * Turn off IO-APIC detection and initialization: | 303 | * Turn off IO-APIC detection and initialization: |
@@ -426,7 +424,7 @@ static __init void cobalt_init(void) | |||
426 | co_apic_read(CO_APIC_ID)); | 424 | co_apic_read(CO_APIC_ID)); |
427 | } | 425 | } |
428 | 426 | ||
429 | int __init visws_trap_init_quirk(void) | 427 | static int __init visws_trap_init(void) |
430 | { | 428 | { |
431 | lithium_init(); | 429 | lithium_init(); |
432 | cobalt_init(); | 430 | cobalt_init(); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..0a1b1a9d922d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -906,7 +906,6 @@ static inline int __init activate_vmi(void) | |||
906 | #ifdef CONFIG_X86_LOCAL_APIC | 906 | #ifdef CONFIG_X86_LOCAL_APIC |
907 | para_fill(pv_apic_ops.apic_read, APICRead); | 907 | para_fill(pv_apic_ops.apic_read, APICRead); |
908 | para_fill(pv_apic_ops.apic_write, APICWrite); | 908 | para_fill(pv_apic_ops.apic_write, APICWrite); |
909 | para_fill(pv_apic_ops.apic_write_atomic, APICWrite); | ||
910 | #endif | 909 | #endif |
911 | 910 | ||
912 | /* | 911 | /* |