aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-22 03:06:21 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-22 03:06:21 -0400
commit76c3bb15d6786a0b8da0ad0090e0c9c3672fc08b (patch)
tree3824e008db9d554229a70c85fbbc13238276bd7a /arch/x86/kernel
parent7be42004065ce4df193aeef5befd26805267d0d9 (diff)
parent93ded9b8fd42abe2c3607097963d8de6ad9117eb (diff)
Merge branch 'linus' into x86/x2apic
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c6
-rw-r--r--arch/x86/kernel/acpi/sleep.c10
-rw-r--r--arch/x86/kernel/amd_iommu.c231
-rw-r--r--arch/x86/kernel/amd_iommu_init.c357
-rw-r--r--arch/x86/kernel/aperture_64.c1
-rw-r--r--arch/x86/kernel/apic_32.c5
-rw-r--r--arch/x86/kernel/apic_64.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/common_64.c15
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.h1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c7
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/e820.c33
-rw-r--r--arch/x86/kernel/early-quirks.c5
-rw-r--r--arch/x86/kernel/entry_32.S24
-rw-r--r--arch/x86/kernel/entry_64.S120
-rw-r--r--arch/x86/kernel/head64.c11
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/io_delay.c3
-rw-r--r--arch/x86/kernel/irq_32.c7
-rw-r--r--arch/x86/kernel/kdebugfs.c8
-rw-r--r--arch/x86/kernel/kprobes.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/module_64.c10
-rw-r--r--arch/x86/kernel/mpparse.c208
-rw-r--r--arch/x86/kernel/nmi.c7
-rw-r--r--arch/x86/kernel/numaq_32.c197
-rw-r--r--arch/x86/kernel/paravirt.c28
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c17
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_64.c56
-rw-r--r--arch/x86/kernel/ptrace.c151
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/signal_32.c8
-rw-r--r--arch/x86/kernel/signal_64.c6
-rw-r--r--arch/x86/kernel/smpboot.c59
-rw-r--r--arch/x86/kernel/smpcommon_32.c1
-rw-r--r--arch/x86/kernel/step.c35
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/traps_32.c118
-rw-r--r--arch/x86/kernel/traps_64.c48
-rw-r--r--arch/x86/kernel/visws_quirks.c42
51 files changed, 1191 insertions, 721 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ea000467553f..a07ec14f3312 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE 9ifdef CONFIG_FTRACE
10# Do not profile debug utilities 10# Do not profile debug and lowlevel utilities
11CFLAGS_REMOVE_tsc.o = -pg 11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 12CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt.o = -pg
13endif 14endif
14 15
15# 16#
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b41b27af33e6..12e260e8fb2a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1021,7 +1021,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1021 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; 1021 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
1022#endif 1022#endif
1023 set_bit(MP_ISA_BUS, mp_bus_not_pci); 1023 set_bit(MP_ISA_BUS, mp_bus_not_pci);
1024 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); 1024 pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
1025 1025
1026#ifdef CONFIG_X86_ES7000 1026#ifdef CONFIG_X86_ES7000
1027 /* 1027 /*
@@ -1127,8 +1127,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1127 return gsi; 1127 return gsi;
1128 } 1128 }
1129 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { 1129 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
1130 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1130 pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n",
1131 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1131 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1132#ifdef CONFIG_X86_32 1132#ifdef CONFIG_X86_32
1133 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); 1133 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
1134#else 1134#else
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 868de3d5c39d..a3ddad18aaa3 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -9,6 +9,7 @@
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/dmi.h> 10#include <linux/dmi.h>
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <asm/segment.h>
12 13
13#include "realmode/wakeup.h" 14#include "realmode/wakeup.h"
14#include "sleep.h" 15#include "sleep.h"
@@ -23,15 +24,6 @@ static unsigned long acpi_realmode;
23static char temp_stack[10240]; 24static char temp_stack[10240];
24#endif 25#endif
25 26
26/* XXX: this macro should move to asm-x86/segment.h and be shared with the
27 boot code... */
28#define GDT_ENTRY(flags, base, limit) \
29 (((u64)(base & 0xff000000) << 32) | \
30 ((u64)flags << 40) | \
31 ((u64)(limit & 0x00ff0000) << 32) | \
32 ((u64)(base & 0x00ffffff) << 16) | \
33 ((u64)(limit & 0x0000ffff)))
34
35/** 27/**
36 * acpi_save_state_mem - save kernel state 28 * acpi_save_state_mem - save kernel state
37 * 29 *
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f2766d84c7a0..c25210e6ac88 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -23,7 +23,7 @@
23#include <linux/scatterlist.h> 23#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 24#include <linux/iommu-helper.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/gart.h> 26#include <asm/iommu.h>
27#include <asm/amd_iommu_types.h> 27#include <asm/amd_iommu_types.h>
28#include <asm/amd_iommu.h> 28#include <asm/amd_iommu.h>
29 29
@@ -32,21 +32,37 @@
32#define to_pages(addr, size) \ 32#define to_pages(addr, size) \
33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34 34
35#define EXIT_LOOP_COUNT 10000000
36
35static DEFINE_RWLOCK(amd_iommu_devtable_lock); 37static DEFINE_RWLOCK(amd_iommu_devtable_lock);
36 38
37struct command { 39/*
40 * general struct to manage commands send to an IOMMU
41 */
42struct iommu_cmd {
38 u32 data[4]; 43 u32 data[4];
39}; 44};
40 45
41static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 46static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
42 struct unity_map_entry *e); 47 struct unity_map_entry *e);
43 48
49/* returns !0 if the IOMMU is caching non-present entries in its TLB */
44static int iommu_has_npcache(struct amd_iommu *iommu) 50static int iommu_has_npcache(struct amd_iommu *iommu)
45{ 51{
46 return iommu->cap & IOMMU_CAP_NPCACHE; 52 return iommu->cap & IOMMU_CAP_NPCACHE;
47} 53}
48 54
49static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) 55/****************************************************************************
56 *
57 * IOMMU command queuing functions
58 *
59 ****************************************************************************/
60
61/*
62 * Writes the command to the IOMMUs command buffer and informs the
63 * hardware about the new command. Must be called with iommu->lock held.
64 */
65static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
50{ 66{
51 u32 tail, head; 67 u32 tail, head;
52 u8 *target; 68 u8 *target;
@@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
63 return 0; 79 return 0;
64} 80}
65 81
66static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) 82/*
83 * General queuing function for commands. Takes iommu->lock and calls
84 * __iommu_queue_command().
85 */
86static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
67{ 87{
68 unsigned long flags; 88 unsigned long flags;
69 int ret; 89 int ret;
@@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
75 return ret; 95 return ret;
76} 96}
77 97
98/*
99 * This function is called whenever we need to ensure that the IOMMU has
100 * completed execution of all commands we sent. It sends a
101 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
102 * us about that by writing a value to a physical address we pass with
103 * the command.
104 */
78static int iommu_completion_wait(struct amd_iommu *iommu) 105static int iommu_completion_wait(struct amd_iommu *iommu)
79{ 106{
80 int ret; 107 int ret;
81 struct command cmd; 108 struct iommu_cmd cmd;
82 volatile u64 ready = 0; 109 volatile u64 ready = 0;
83 unsigned long ready_phys = virt_to_phys(&ready); 110 unsigned long ready_phys = virt_to_phys(&ready);
111 unsigned long i = 0;
84 112
85 memset(&cmd, 0, sizeof(cmd)); 113 memset(&cmd, 0, sizeof(cmd));
86 cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; 114 cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
87 cmd.data[1] = HIGH_U32(ready_phys); 115 cmd.data[1] = upper_32_bits(ready_phys);
88 cmd.data[2] = 1; /* value written to 'ready' */ 116 cmd.data[2] = 1; /* value written to 'ready' */
89 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 117 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
90 118
@@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
95 if (ret) 123 if (ret)
96 return ret; 124 return ret;
97 125
98 while (!ready) 126 while (!ready && (i < EXIT_LOOP_COUNT)) {
127 ++i;
99 cpu_relax(); 128 cpu_relax();
129 }
130
131 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
132 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
100 133
101 return 0; 134 return 0;
102} 135}
103 136
137/*
138 * Command send function for invalidating a device table entry
139 */
104static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 140static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
105{ 141{
106 struct command cmd; 142 struct iommu_cmd cmd;
107 143
108 BUG_ON(iommu == NULL); 144 BUG_ON(iommu == NULL);
109 145
@@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
116 return iommu_queue_command(iommu, &cmd); 152 return iommu_queue_command(iommu, &cmd);
117} 153}
118 154
155/*
156 * Generic command send function for invalidaing TLB entries
157 */
119static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, 158static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
120 u64 address, u16 domid, int pde, int s) 159 u64 address, u16 domid, int pde, int s)
121{ 160{
122 struct command cmd; 161 struct iommu_cmd cmd;
123 162
124 memset(&cmd, 0, sizeof(cmd)); 163 memset(&cmd, 0, sizeof(cmd));
125 address &= PAGE_MASK; 164 address &= PAGE_MASK;
126 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 165 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
127 cmd.data[1] |= domid; 166 cmd.data[1] |= domid;
128 cmd.data[2] = LOW_U32(address); 167 cmd.data[2] = LOW_U32(address);
129 cmd.data[3] = HIGH_U32(address); 168 cmd.data[3] = upper_32_bits(address);
130 if (s) 169 if (s) /* size bit - we flush more than one 4kb page */
131 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 170 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
132 if (pde) 171 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
133 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 172 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
134 173
135 iommu->need_sync = 1; 174 iommu->need_sync = 1;
@@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
137 return iommu_queue_command(iommu, &cmd); 176 return iommu_queue_command(iommu, &cmd);
138} 177}
139 178
179/*
180 * TLB invalidation function which is called from the mapping functions.
181 * It invalidates a single PTE if the range to flush is within a single
182 * page. Otherwise it flushes the whole TLB of the IOMMU.
183 */
140static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, 184static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
141 u64 address, size_t size) 185 u64 address, size_t size)
142{ 186{
@@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
159 return 0; 203 return 0;
160} 204}
161 205
206/****************************************************************************
207 *
208 * The functions below are used the create the page table mappings for
209 * unity mapped regions.
210 *
211 ****************************************************************************/
212
213/*
214 * Generic mapping functions. It maps a physical address into a DMA
215 * address space. It allocates the page table pages if necessary.
216 * In the future it can be extended to a generic mapping function
217 * supporting all features of AMD IOMMU page tables like level skipping
218 * and full 64 bit address spaces.
219 */
162static int iommu_map(struct protection_domain *dom, 220static int iommu_map(struct protection_domain *dom,
163 unsigned long bus_addr, 221 unsigned long bus_addr,
164 unsigned long phys_addr, 222 unsigned long phys_addr,
@@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom,
209 return 0; 267 return 0;
210} 268}
211 269
270/*
271 * This function checks if a specific unity mapping entry is needed for
272 * this specific IOMMU.
273 */
212static int iommu_for_unity_map(struct amd_iommu *iommu, 274static int iommu_for_unity_map(struct amd_iommu *iommu,
213 struct unity_map_entry *entry) 275 struct unity_map_entry *entry)
214{ 276{
@@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
223 return 0; 285 return 0;
224} 286}
225 287
288/*
289 * Init the unity mappings for a specific IOMMU in the system
290 *
291 * Basically iterates over all unity mapping entries and applies them to
292 * the default domain DMA of that IOMMU if necessary.
293 */
226static int iommu_init_unity_mappings(struct amd_iommu *iommu) 294static int iommu_init_unity_mappings(struct amd_iommu *iommu)
227{ 295{
228 struct unity_map_entry *entry; 296 struct unity_map_entry *entry;
@@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu)
239 return 0; 307 return 0;
240} 308}
241 309
310/*
311 * This function actually applies the mapping to the page table of the
312 * dma_ops domain.
313 */
242static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 314static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
243 struct unity_map_entry *e) 315 struct unity_map_entry *e)
244{ 316{
@@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
261 return 0; 333 return 0;
262} 334}
263 335
336/*
337 * Inits the unity mappings required for a specific device
338 */
264static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, 339static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
265 u16 devid) 340 u16 devid)
266{ 341{
@@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
278 return 0; 353 return 0;
279} 354}
280 355
356/****************************************************************************
357 *
358 * The next functions belong to the address allocator for the dma_ops
359 * interface functions. They work like the allocators in the other IOMMU
360 * drivers. Its basically a bitmap which marks the allocated pages in
361 * the aperture. Maybe it could be enhanced in the future to a more
362 * efficient allocator.
363 *
364 ****************************************************************************/
281static unsigned long dma_mask_to_pages(unsigned long mask) 365static unsigned long dma_mask_to_pages(unsigned long mask)
282{ 366{
283 return (mask >> PAGE_SHIFT) + 367 return (mask >> PAGE_SHIFT) +
284 (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); 368 (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
285} 369}
286 370
371/*
372 * The address allocator core function.
373 *
374 * called with domain->lock held
375 */
287static unsigned long dma_ops_alloc_addresses(struct device *dev, 376static unsigned long dma_ops_alloc_addresses(struct device *dev,
288 struct dma_ops_domain *dom, 377 struct dma_ops_domain *dom,
289 unsigned int pages) 378 unsigned int pages)
@@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
317 return address; 406 return address;
318} 407}
319 408
409/*
410 * The address free function.
411 *
412 * called with domain->lock held
413 */
320static void dma_ops_free_addresses(struct dma_ops_domain *dom, 414static void dma_ops_free_addresses(struct dma_ops_domain *dom,
321 unsigned long address, 415 unsigned long address,
322 unsigned int pages) 416 unsigned int pages)
@@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
325 iommu_area_free(dom->bitmap, address, pages); 419 iommu_area_free(dom->bitmap, address, pages);
326} 420}
327 421
422/****************************************************************************
423 *
424 * The next functions belong to the domain allocation. A domain is
425 * allocated for every IOMMU as the default domain. If device isolation
426 * is enabled, every device get its own domain. The most important thing
427 * about domains is the page table mapping the DMA address space they
428 * contain.
429 *
430 ****************************************************************************/
431
328static u16 domain_id_alloc(void) 432static u16 domain_id_alloc(void)
329{ 433{
330 unsigned long flags; 434 unsigned long flags;
@@ -342,6 +446,10 @@ static u16 domain_id_alloc(void)
342 return id; 446 return id;
343} 447}
344 448
449/*
450 * Used to reserve address ranges in the aperture (e.g. for exclusion
451 * ranges.
452 */
345static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, 453static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
346 unsigned long start_page, 454 unsigned long start_page,
347 unsigned int pages) 455 unsigned int pages)
@@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
382 free_page((unsigned long)p1); 490 free_page((unsigned long)p1);
383} 491}
384 492
493/*
494 * Free a domain, only used if something went wrong in the
495 * allocation path and we need to free an already allocated page table
496 */
385static void dma_ops_domain_free(struct dma_ops_domain *dom) 497static void dma_ops_domain_free(struct dma_ops_domain *dom)
386{ 498{
387 if (!dom) 499 if (!dom)
@@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
396 kfree(dom); 508 kfree(dom);
397} 509}
398 510
511/*
512 * Allocates a new protection domain usable for the dma_ops functions.
513 * It also intializes the page table and the address allocator data
514 * structures required for the dma_ops interface
515 */
399static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, 516static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
400 unsigned order) 517 unsigned order)
401{ 518{
@@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
436 dma_dom->bitmap[0] = 1; 553 dma_dom->bitmap[0] = 1;
437 dma_dom->next_bit = 0; 554 dma_dom->next_bit = 0;
438 555
556 /* Intialize the exclusion range if necessary */
439 if (iommu->exclusion_start && 557 if (iommu->exclusion_start &&
440 iommu->exclusion_start < dma_dom->aperture_size) { 558 iommu->exclusion_start < dma_dom->aperture_size) {
441 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 559 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
@@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
444 dma_ops_reserve_addresses(dma_dom, startpage, pages); 562 dma_ops_reserve_addresses(dma_dom, startpage, pages);
445 } 563 }
446 564
565 /*
566 * At the last step, build the page tables so we don't need to
567 * allocate page table pages in the dma_ops mapping/unmapping
568 * path.
569 */
447 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); 570 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
448 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), 571 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
449 GFP_KERNEL); 572 GFP_KERNEL);
@@ -472,6 +595,10 @@ free_dma_dom:
472 return NULL; 595 return NULL;
473} 596}
474 597
598/*
599 * Find out the protection domain structure for a given PCI device. This
600 * will give us the pointer to the page table root for example.
601 */
475static struct protection_domain *domain_for_device(u16 devid) 602static struct protection_domain *domain_for_device(u16 devid)
476{ 603{
477 struct protection_domain *dom; 604 struct protection_domain *dom;
@@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid)
484 return dom; 611 return dom;
485} 612}
486 613
614/*
615 * If a device is not yet associated with a domain, this function does
616 * assigns it visible for the hardware
617 */
487static void set_device_domain(struct amd_iommu *iommu, 618static void set_device_domain(struct amd_iommu *iommu,
488 struct protection_domain *domain, 619 struct protection_domain *domain,
489 u16 devid) 620 u16 devid)
@@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu,
508 iommu->need_sync = 1; 639 iommu->need_sync = 1;
509} 640}
510 641
642/*****************************************************************************
643 *
644 * The next functions belong to the dma_ops mapping/unmapping code.
645 *
646 *****************************************************************************/
647
648/*
649 * In the dma_ops path we only have the struct device. This function
650 * finds the corresponding IOMMU, the protection domain and the
651 * requestor id for a given device.
652 * If the device is not yet associated with a domain this is also done
653 * in this function.
654 */
511static int get_device_resources(struct device *dev, 655static int get_device_resources(struct device *dev,
512 struct amd_iommu **iommu, 656 struct amd_iommu **iommu,
513 struct protection_domain **domain, 657 struct protection_domain **domain,
@@ -520,8 +664,9 @@ static int get_device_resources(struct device *dev,
520 BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); 664 BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
521 665
522 pcidev = to_pci_dev(dev); 666 pcidev = to_pci_dev(dev);
523 _bdf = (pcidev->bus->number << 8) | pcidev->devfn; 667 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
524 668
669 /* device not translated by any IOMMU in the system? */
525 if (_bdf >= amd_iommu_last_bdf) { 670 if (_bdf >= amd_iommu_last_bdf) {
526 *iommu = NULL; 671 *iommu = NULL;
527 *domain = NULL; 672 *domain = NULL;
@@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev,
547 return 1; 692 return 1;
548} 693}
549 694
695/*
696 * This is the generic map function. It maps one 4kb page at paddr to
697 * the given address in the DMA address space for the domain.
698 */
550static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, 699static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
551 struct dma_ops_domain *dom, 700 struct dma_ops_domain *dom,
552 unsigned long address, 701 unsigned long address,
@@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
578 return (dma_addr_t)address; 727 return (dma_addr_t)address;
579} 728}
580 729
730/*
731 * The generic unmapping function for on page in the DMA address space.
732 */
581static void dma_ops_domain_unmap(struct amd_iommu *iommu, 733static void dma_ops_domain_unmap(struct amd_iommu *iommu,
582 struct dma_ops_domain *dom, 734 struct dma_ops_domain *dom,
583 unsigned long address) 735 unsigned long address)
@@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
597 *pte = 0ULL; 749 *pte = 0ULL;
598} 750}
599 751
752/*
753 * This function contains common code for mapping of a physically
754 * contiguous memory region into DMA address space. It is uses by all
755 * mapping functions provided by this IOMMU driver.
756 * Must be called with the domain lock held.
757 */
600static dma_addr_t __map_single(struct device *dev, 758static dma_addr_t __map_single(struct device *dev,
601 struct amd_iommu *iommu, 759 struct amd_iommu *iommu,
602 struct dma_ops_domain *dma_dom, 760 struct dma_ops_domain *dma_dom,
@@ -628,6 +786,10 @@ out:
628 return address; 786 return address;
629} 787}
630 788
789/*
790 * Does the reverse of the __map_single function. Must be called with
791 * the domain lock held too
792 */
631static void __unmap_single(struct amd_iommu *iommu, 793static void __unmap_single(struct amd_iommu *iommu,
632 struct dma_ops_domain *dma_dom, 794 struct dma_ops_domain *dma_dom,
633 dma_addr_t dma_addr, 795 dma_addr_t dma_addr,
@@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu,
652 dma_ops_free_addresses(dma_dom, dma_addr, pages); 814 dma_ops_free_addresses(dma_dom, dma_addr, pages);
653} 815}
654 816
817/*
818 * The exported map_single function for dma_ops.
819 */
655static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, 820static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
656 size_t size, int dir) 821 size_t size, int dir)
657{ 822{
@@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
664 get_device_resources(dev, &iommu, &domain, &devid); 829 get_device_resources(dev, &iommu, &domain, &devid);
665 830
666 if (iommu == NULL || domain == NULL) 831 if (iommu == NULL || domain == NULL)
832 /* device not handled by any AMD IOMMU */
667 return (dma_addr_t)paddr; 833 return (dma_addr_t)paddr;
668 834
669 spin_lock_irqsave(&domain->lock, flags); 835 spin_lock_irqsave(&domain->lock, flags);
@@ -683,6 +849,9 @@ out:
683 return addr; 849 return addr;
684} 850}
685 851
852/*
853 * The exported unmap_single function for dma_ops.
854 */
686static void unmap_single(struct device *dev, dma_addr_t dma_addr, 855static void unmap_single(struct device *dev, dma_addr_t dma_addr,
687 size_t size, int dir) 856 size_t size, int dir)
688{ 857{
@@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
692 u16 devid; 861 u16 devid;
693 862
694 if (!get_device_resources(dev, &iommu, &domain, &devid)) 863 if (!get_device_resources(dev, &iommu, &domain, &devid))
864 /* device not handled by any AMD IOMMU */
695 return; 865 return;
696 866
697 spin_lock_irqsave(&domain->lock, flags); 867 spin_lock_irqsave(&domain->lock, flags);
@@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
706 spin_unlock_irqrestore(&domain->lock, flags); 876 spin_unlock_irqrestore(&domain->lock, flags);
707} 877}
708 878
879/*
880 * This is a special map_sg function which is used if we should map a
881 * device which is not handled by an AMD IOMMU in the system.
882 */
709static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, 883static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
710 int nelems, int dir) 884 int nelems, int dir)
711{ 885{
@@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
720 return nelems; 894 return nelems;
721} 895}
722 896
897/*
898 * The exported map_sg function for dma_ops (handles scatter-gather
899 * lists).
900 */
723static int map_sg(struct device *dev, struct scatterlist *sglist, 901static int map_sg(struct device *dev, struct scatterlist *sglist,
724 int nelems, int dir) 902 int nelems, int dir)
725{ 903{
@@ -775,6 +953,10 @@ unmap:
775 goto out; 953 goto out;
776} 954}
777 955
956/*
957 * The exported map_sg function for dma_ops (handles scatter-gather
958 * lists).
959 */
778static void unmap_sg(struct device *dev, struct scatterlist *sglist, 960static void unmap_sg(struct device *dev, struct scatterlist *sglist,
779 int nelems, int dir) 961 int nelems, int dir)
780{ 962{
@@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
804 spin_unlock_irqrestore(&domain->lock, flags); 986 spin_unlock_irqrestore(&domain->lock, flags);
805} 987}
806 988
989/*
990 * The exported alloc_coherent function for dma_ops.
991 */
807static void *alloc_coherent(struct device *dev, size_t size, 992static void *alloc_coherent(struct device *dev, size_t size,
808 dma_addr_t *dma_addr, gfp_t flag) 993 dma_addr_t *dma_addr, gfp_t flag)
809{ 994{
@@ -851,6 +1036,11 @@ out:
851 return virt_addr; 1036 return virt_addr;
852} 1037}
853 1038
1039/*
1040 * The exported free_coherent function for dma_ops.
1041 * FIXME: fix the generic x86 DMA layer so that it actually calls that
1042 * function.
1043 */
854static void free_coherent(struct device *dev, size_t size, 1044static void free_coherent(struct device *dev, size_t size,
855 void *virt_addr, dma_addr_t dma_addr) 1045 void *virt_addr, dma_addr_t dma_addr)
856{ 1046{
@@ -879,6 +1069,8 @@ free_mem:
879} 1069}
880 1070
881/* 1071/*
1072 * The function for pre-allocating protection domains.
1073 *
882 * If the driver core informs the DMA layer if a driver grabs a device 1074 * If the driver core informs the DMA layer if a driver grabs a device
883 * we don't need to preallocate the protection domains anymore. 1075 * we don't need to preallocate the protection domains anymore.
884 * For now we have to. 1076 * For now we have to.
@@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
921 .unmap_sg = unmap_sg, 1113 .unmap_sg = unmap_sg,
922}; 1114};
923 1115
1116/*
1117 * The function which clues the AMD IOMMU driver into dma_ops.
1118 */
924int __init amd_iommu_init_dma_ops(void) 1119int __init amd_iommu_init_dma_ops(void)
925{ 1120{
926 struct amd_iommu *iommu; 1121 struct amd_iommu *iommu;
927 int order = amd_iommu_aperture_order; 1122 int order = amd_iommu_aperture_order;
928 int ret; 1123 int ret;
929 1124
1125 /*
1126 * first allocate a default protection domain for every IOMMU we
1127 * found in the system. Devices not assigned to any other
1128 * protection domain will be assigned to the default one.
1129 */
930 list_for_each_entry(iommu, &amd_iommu_list, list) { 1130 list_for_each_entry(iommu, &amd_iommu_list, list) {
931 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1131 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
932 if (iommu->default_dom == NULL) 1132 if (iommu->default_dom == NULL)
@@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void)
936 goto free_domains; 1136 goto free_domains;
937 } 1137 }
938 1138
1139 /*
1140 * If device isolation is enabled, pre-allocate the protection
1141 * domains for each device.
1142 */
939 if (amd_iommu_isolate) 1143 if (amd_iommu_isolate)
940 prealloc_protection_domains(); 1144 prealloc_protection_domains();
941 1145
@@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void)
947 gart_iommu_aperture = 0; 1151 gart_iommu_aperture = 0;
948#endif 1152#endif
949 1153
1154 /* Make the driver finally visible to the drivers */
950 dma_ops = &amd_iommu_dma_ops; 1155 dma_ops = &amd_iommu_dma_ops;
951 1156
952 return 0; 1157 return 0;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 2a13e430437d..c9d8ff2eb130 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -25,20 +25,13 @@
25#include <asm/pci-direct.h> 25#include <asm/pci-direct.h>
26#include <asm/amd_iommu_types.h> 26#include <asm/amd_iommu_types.h>
27#include <asm/amd_iommu.h> 27#include <asm/amd_iommu.h>
28#include <asm/gart.h> 28#include <asm/iommu.h>
29 29
30/* 30/*
31 * definitions for the ACPI scanning code 31 * definitions for the ACPI scanning code
32 */ 32 */
33#define UPDATE_LAST_BDF(x) do {\
34 if ((x) > amd_iommu_last_bdf) \
35 amd_iommu_last_bdf = (x); \
36 } while (0);
37
38#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
39#define PCI_BUS(x) (((x) >> 8) & 0xff) 33#define PCI_BUS(x) (((x) >> 8) & 0xff)
40#define IVRS_HEADER_LENGTH 48 34#define IVRS_HEADER_LENGTH 48
41#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
42 35
43#define ACPI_IVHD_TYPE 0x10 36#define ACPI_IVHD_TYPE 0x10
44#define ACPI_IVMD_TYPE_ALL 0x20 37#define ACPI_IVMD_TYPE_ALL 0x20
@@ -71,6 +64,17 @@
71#define ACPI_DEVFLAG_LINT1 0x80 64#define ACPI_DEVFLAG_LINT1 0x80
72#define ACPI_DEVFLAG_ATSDIS 0x10000000 65#define ACPI_DEVFLAG_ATSDIS 0x10000000
73 66
67/*
68 * ACPI table definitions
69 *
70 * These data structures are laid over the table to parse the important values
71 * out of it.
72 */
73
74/*
75 * structure describing one IOMMU in the ACPI table. Typically followed by one
76 * or more ivhd_entrys.
77 */
74struct ivhd_header { 78struct ivhd_header {
75 u8 type; 79 u8 type;
76 u8 flags; 80 u8 flags;
@@ -83,6 +87,10 @@ struct ivhd_header {
83 u32 reserved; 87 u32 reserved;
84} __attribute__((packed)); 88} __attribute__((packed));
85 89
90/*
91 * A device entry describing which devices a specific IOMMU translates and
92 * which requestor ids they use.
93 */
86struct ivhd_entry { 94struct ivhd_entry {
87 u8 type; 95 u8 type;
88 u16 devid; 96 u16 devid;
@@ -90,6 +98,10 @@ struct ivhd_entry {
90 u32 ext; 98 u32 ext;
91} __attribute__((packed)); 99} __attribute__((packed));
92 100
101/*
102 * An AMD IOMMU memory definition structure. It defines things like exclusion
103 * ranges for devices and regions that should be unity mapped.
104 */
93struct ivmd_header { 105struct ivmd_header {
94 u8 type; 106 u8 type;
95 u8 flags; 107 u8 flags;
@@ -103,22 +115,80 @@ struct ivmd_header {
103 115
104static int __initdata amd_iommu_detected; 116static int __initdata amd_iommu_detected;
105 117
106u16 amd_iommu_last_bdf; 118u16 amd_iommu_last_bdf; /* largest PCI device id we have
107struct list_head amd_iommu_unity_map; 119 to handle */
108unsigned amd_iommu_aperture_order = 26; 120LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
109int amd_iommu_isolate; 121 we find in ACPI */
122unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
123int amd_iommu_isolate; /* if 1, device isolation is enabled */
124
125LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
126 system */
110 127
111struct list_head amd_iommu_list; 128/*
129 * Pointer to the device table which is shared by all AMD IOMMUs
130 * it is indexed by the PCI device id or the HT unit id and contains
131 * information about the domain the device belongs to as well as the
132 * page table root pointer.
133 */
112struct dev_table_entry *amd_iommu_dev_table; 134struct dev_table_entry *amd_iommu_dev_table;
135
136/*
137 * The alias table is a driver specific data structure which contains the
138 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
139 * More than one device can share the same requestor id.
140 */
113u16 *amd_iommu_alias_table; 141u16 *amd_iommu_alias_table;
142
143/*
144 * The rlookup table is used to find the IOMMU which is responsible
145 * for a specific device. It is also indexed by the PCI device id.
146 */
114struct amd_iommu **amd_iommu_rlookup_table; 147struct amd_iommu **amd_iommu_rlookup_table;
148
149/*
150 * The pd table (protection domain table) is used to find the protection domain
151 * data structure a device belongs to. Indexed with the PCI device id too.
152 */
115struct protection_domain **amd_iommu_pd_table; 153struct protection_domain **amd_iommu_pd_table;
154
155/*
156 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
157 * to know which ones are already in use.
158 */
116unsigned long *amd_iommu_pd_alloc_bitmap; 159unsigned long *amd_iommu_pd_alloc_bitmap;
117 160
118static u32 dev_table_size; 161static u32 dev_table_size; /* size of the device table */
119static u32 alias_table_size; 162static u32 alias_table_size; /* size of the alias table */
120static u32 rlookup_table_size; 163static u32 rlookup_table_size; /* size if the rlookup table */
121 164
165static inline void update_last_devid(u16 devid)
166{
167 if (devid > amd_iommu_last_bdf)
168 amd_iommu_last_bdf = devid;
169}
170
171static inline unsigned long tbl_size(int entry_size)
172{
173 unsigned shift = PAGE_SHIFT +
174 get_order(amd_iommu_last_bdf * entry_size);
175
176 return 1UL << shift;
177}
178
179/****************************************************************************
180 *
181 * AMD IOMMU MMIO register space handling functions
182 *
183 * These functions are used to program the IOMMU device registers in
184 * MMIO space required for that driver.
185 *
186 ****************************************************************************/
187
188/*
189 * This function set the exclusion range in the IOMMU. DMA accesses to the
190 * exclusion range are passed through untranslated
191 */
122static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) 192static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
123{ 193{
124 u64 start = iommu->exclusion_start & PAGE_MASK; 194 u64 start = iommu->exclusion_start & PAGE_MASK;
@@ -137,6 +207,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
137 &entry, sizeof(entry)); 207 &entry, sizeof(entry));
138} 208}
139 209
210/* Programs the physical address of the device table into the IOMMU hardware */
140static void __init iommu_set_device_table(struct amd_iommu *iommu) 211static void __init iommu_set_device_table(struct amd_iommu *iommu)
141{ 212{
142 u32 entry; 213 u32 entry;
@@ -149,6 +220,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
149 &entry, sizeof(entry)); 220 &entry, sizeof(entry));
150} 221}
151 222
223/* Generic functions to enable/disable certain features of the IOMMU. */
152static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 224static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
153{ 225{
154 u32 ctrl; 226 u32 ctrl;
@@ -167,6 +239,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
167 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 239 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
168} 240}
169 241
242/* Function to enable the hardware */
170void __init iommu_enable(struct amd_iommu *iommu) 243void __init iommu_enable(struct amd_iommu *iommu)
171{ 244{
172 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); 245 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
@@ -176,6 +249,10 @@ void __init iommu_enable(struct amd_iommu *iommu)
176 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 249 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
177} 250}
178 251
252/*
253 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
254 * the system has one.
255 */
179static u8 * __init iommu_map_mmio_space(u64 address) 256static u8 * __init iommu_map_mmio_space(u64 address)
180{ 257{
181 u8 *ret; 258 u8 *ret;
@@ -199,16 +276,33 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
199 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); 276 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
200} 277}
201 278
279/****************************************************************************
280 *
281 * The functions below belong to the first pass of AMD IOMMU ACPI table
282 * parsing. In this pass we try to find out the highest device id this
283 * code has to handle. Upon this information the size of the shared data
284 * structures is determined later.
285 *
286 ****************************************************************************/
287
288/*
289 * This function reads the last device id the IOMMU has to handle from the PCI
290 * capability header for this IOMMU
291 */
202static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) 292static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
203{ 293{
204 u32 cap; 294 u32 cap;
205 295
206 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); 296 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
207 UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); 297 update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
208 298
209 return 0; 299 return 0;
210} 300}
211 301
302/*
303 * After reading the highest device id from the IOMMU PCI capability header
304 * this function looks if there is a higher device id defined in the ACPI table
305 */
212static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 306static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
213{ 307{
214 u8 *p = (void *)h, *end = (void *)h; 308 u8 *p = (void *)h, *end = (void *)h;
@@ -229,7 +323,8 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
229 case IVHD_DEV_RANGE_END: 323 case IVHD_DEV_RANGE_END:
230 case IVHD_DEV_ALIAS: 324 case IVHD_DEV_ALIAS:
231 case IVHD_DEV_EXT_SELECT: 325 case IVHD_DEV_EXT_SELECT:
232 UPDATE_LAST_BDF(dev->devid); 326 /* all the above subfield types refer to device ids */
327 update_last_devid(dev->devid);
233 break; 328 break;
234 default: 329 default:
235 break; 330 break;
@@ -242,6 +337,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
242 return 0; 337 return 0;
243} 338}
244 339
340/*
341 * Iterate over all IVHD entries in the ACPI table and find the highest device
342 * id which we need to handle. This is the first of three functions which parse
343 * the ACPI table. So we check the checksum here.
344 */
245static int __init find_last_devid_acpi(struct acpi_table_header *table) 345static int __init find_last_devid_acpi(struct acpi_table_header *table)
246{ 346{
247 int i; 347 int i;
@@ -277,19 +377,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
277 return 0; 377 return 0;
278} 378}
279 379
380/****************************************************************************
381 *
382 * The following functions belong the the code path which parses the ACPI table
383 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
384 * data structures, initialize the device/alias/rlookup table and also
385 * basically initialize the hardware.
386 *
387 ****************************************************************************/
388
389/*
390 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
391 * write commands to that buffer later and the IOMMU will execute them
392 * asynchronously
393 */
280static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) 394static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
281{ 395{
282 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, 396 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
283 get_order(CMD_BUFFER_SIZE)); 397 get_order(CMD_BUFFER_SIZE));
284 u64 entry = 0; 398 u64 entry;
285 399
286 if (cmd_buf == NULL) 400 if (cmd_buf == NULL)
287 return NULL; 401 return NULL;
288 402
289 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 403 iommu->cmd_buf_size = CMD_BUFFER_SIZE;
290 404
291 memset(cmd_buf, 0, CMD_BUFFER_SIZE);
292
293 entry = (u64)virt_to_phys(cmd_buf); 405 entry = (u64)virt_to_phys(cmd_buf);
294 entry |= MMIO_CMD_SIZE_512; 406 entry |= MMIO_CMD_SIZE_512;
295 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 407 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
@@ -302,11 +414,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
302 414
303static void __init free_command_buffer(struct amd_iommu *iommu) 415static void __init free_command_buffer(struct amd_iommu *iommu)
304{ 416{
305 if (iommu->cmd_buf) 417 free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
306 free_pages((unsigned long)iommu->cmd_buf,
307 get_order(CMD_BUFFER_SIZE));
308} 418}
309 419
420/* sets a specific bit in the device table entry. */
310static void set_dev_entry_bit(u16 devid, u8 bit) 421static void set_dev_entry_bit(u16 devid, u8 bit)
311{ 422{
312 int i = (bit >> 5) & 0x07; 423 int i = (bit >> 5) & 0x07;
@@ -315,7 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
315 amd_iommu_dev_table[devid].data[i] |= (1 << _bit); 426 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
316} 427}
317 428
318static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) 429/* Writes the specific IOMMU for a device into the rlookup table */
430static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
431{
432 amd_iommu_rlookup_table[devid] = iommu;
433}
434
435/*
436 * This function takes the device specific flags read from the ACPI
437 * table and sets up the device table entry with that information
438 */
439static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
440 u16 devid, u32 flags, u32 ext_flags)
319{ 441{
320 if (flags & ACPI_DEVFLAG_INITPASS) 442 if (flags & ACPI_DEVFLAG_INITPASS)
321 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); 443 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
@@ -331,13 +453,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
331 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); 453 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
332 if (flags & ACPI_DEVFLAG_LINT1) 454 if (flags & ACPI_DEVFLAG_LINT1)
333 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); 455 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
334}
335 456
336static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) 457 set_iommu_for_device(iommu, devid);
337{
338 amd_iommu_rlookup_table[devid] = iommu;
339} 458}
340 459
460/*
461 * Reads the device exclusion range from ACPI and initialize IOMMU with
462 * it
463 */
341static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) 464static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
342{ 465{
343 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 466 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
@@ -346,12 +469,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
346 return; 469 return;
347 470
348 if (iommu) { 471 if (iommu) {
472 /*
473 * We only can configure exclusion ranges per IOMMU, not
474 * per device. But we can enable the exclusion range per
475 * device. This is done here
476 */
349 set_dev_entry_bit(m->devid, DEV_ENTRY_EX); 477 set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
350 iommu->exclusion_start = m->range_start; 478 iommu->exclusion_start = m->range_start;
351 iommu->exclusion_length = m->range_length; 479 iommu->exclusion_length = m->range_length;
352 } 480 }
353} 481}
354 482
483/*
484 * This function reads some important data from the IOMMU PCI space and
485 * initializes the driver data structure with it. It reads the hardware
486 * capabilities and the first/last device entries
487 */
355static void __init init_iommu_from_pci(struct amd_iommu *iommu) 488static void __init init_iommu_from_pci(struct amd_iommu *iommu)
356{ 489{
357 int bus = PCI_BUS(iommu->devid); 490 int bus = PCI_BUS(iommu->devid);
@@ -363,10 +496,16 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
363 iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); 496 iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
364 497
365 range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); 498 range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
366 iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); 499 iommu->first_device = calc_devid(MMIO_GET_BUS(range),
367 iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); 500 MMIO_GET_FD(range));
501 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
502 MMIO_GET_LD(range));
368} 503}
369 504
505/*
506 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
507 * initializes the hardware and our data structures with it.
508 */
370static void __init init_iommu_from_acpi(struct amd_iommu *iommu, 509static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
371 struct ivhd_header *h) 510 struct ivhd_header *h)
372{ 511{
@@ -374,7 +513,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
374 u8 *end = p, flags = 0; 513 u8 *end = p, flags = 0;
375 u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; 514 u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
376 u32 ext_flags = 0; 515 u32 ext_flags = 0;
377 bool alias = 0; 516 bool alias = false;
378 struct ivhd_entry *e; 517 struct ivhd_entry *e;
379 518
380 /* 519 /*
@@ -414,22 +553,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
414 case IVHD_DEV_ALL: 553 case IVHD_DEV_ALL:
415 for (dev_i = iommu->first_device; 554 for (dev_i = iommu->first_device;
416 dev_i <= iommu->last_device; ++dev_i) 555 dev_i <= iommu->last_device; ++dev_i)
417 set_dev_entry_from_acpi(dev_i, e->flags, 0); 556 set_dev_entry_from_acpi(iommu, dev_i,
557 e->flags, 0);
418 break; 558 break;
419 case IVHD_DEV_SELECT: 559 case IVHD_DEV_SELECT:
420 devid = e->devid; 560 devid = e->devid;
421 set_dev_entry_from_acpi(devid, e->flags, 0); 561 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
422 break; 562 break;
423 case IVHD_DEV_SELECT_RANGE_START: 563 case IVHD_DEV_SELECT_RANGE_START:
424 devid_start = e->devid; 564 devid_start = e->devid;
425 flags = e->flags; 565 flags = e->flags;
426 ext_flags = 0; 566 ext_flags = 0;
427 alias = 0; 567 alias = false;
428 break; 568 break;
429 case IVHD_DEV_ALIAS: 569 case IVHD_DEV_ALIAS:
430 devid = e->devid; 570 devid = e->devid;
431 devid_to = e->ext >> 8; 571 devid_to = e->ext >> 8;
432 set_dev_entry_from_acpi(devid, e->flags, 0); 572 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
433 amd_iommu_alias_table[devid] = devid_to; 573 amd_iommu_alias_table[devid] = devid_to;
434 break; 574 break;
435 case IVHD_DEV_ALIAS_RANGE: 575 case IVHD_DEV_ALIAS_RANGE:
@@ -437,24 +577,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
437 flags = e->flags; 577 flags = e->flags;
438 devid_to = e->ext >> 8; 578 devid_to = e->ext >> 8;
439 ext_flags = 0; 579 ext_flags = 0;
440 alias = 1; 580 alias = true;
441 break; 581 break;
442 case IVHD_DEV_EXT_SELECT: 582 case IVHD_DEV_EXT_SELECT:
443 devid = e->devid; 583 devid = e->devid;
444 set_dev_entry_from_acpi(devid, e->flags, e->ext); 584 set_dev_entry_from_acpi(iommu, devid, e->flags,
585 e->ext);
445 break; 586 break;
446 case IVHD_DEV_EXT_SELECT_RANGE: 587 case IVHD_DEV_EXT_SELECT_RANGE:
447 devid_start = e->devid; 588 devid_start = e->devid;
448 flags = e->flags; 589 flags = e->flags;
449 ext_flags = e->ext; 590 ext_flags = e->ext;
450 alias = 0; 591 alias = false;
451 break; 592 break;
452 case IVHD_DEV_RANGE_END: 593 case IVHD_DEV_RANGE_END:
453 devid = e->devid; 594 devid = e->devid;
454 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 595 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
455 if (alias) 596 if (alias)
456 amd_iommu_alias_table[dev_i] = devid_to; 597 amd_iommu_alias_table[dev_i] = devid_to;
457 set_dev_entry_from_acpi( 598 set_dev_entry_from_acpi(iommu,
458 amd_iommu_alias_table[dev_i], 599 amd_iommu_alias_table[dev_i],
459 flags, ext_flags); 600 flags, ext_flags);
460 } 601 }
@@ -467,6 +608,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
467 } 608 }
468} 609}
469 610
611/* Initializes the device->iommu mapping for the driver */
470static int __init init_iommu_devices(struct amd_iommu *iommu) 612static int __init init_iommu_devices(struct amd_iommu *iommu)
471{ 613{
472 u16 i; 614 u16 i;
@@ -494,6 +636,11 @@ static void __init free_iommu_all(void)
494 } 636 }
495} 637}
496 638
639/*
640 * This function clues the initialization function for one IOMMU
641 * together and also allocates the command buffer and programs the
642 * hardware. It does NOT enable the IOMMU. This is done afterwards.
643 */
497static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) 644static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
498{ 645{
499 spin_lock_init(&iommu->lock); 646 spin_lock_init(&iommu->lock);
@@ -521,6 +668,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
521 return 0; 668 return 0;
522} 669}
523 670
671/*
672 * Iterates over all IOMMU entries in the ACPI table, allocates the
673 * IOMMU structure and initializes it with init_iommu_one()
674 */
524static int __init init_iommu_all(struct acpi_table_header *table) 675static int __init init_iommu_all(struct acpi_table_header *table)
525{ 676{
526 u8 *p = (u8 *)table, *end = (u8 *)table; 677 u8 *p = (u8 *)table, *end = (u8 *)table;
@@ -528,8 +679,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
528 struct amd_iommu *iommu; 679 struct amd_iommu *iommu;
529 int ret; 680 int ret;
530 681
531 INIT_LIST_HEAD(&amd_iommu_list);
532
533 end += table->length; 682 end += table->length;
534 p += IVRS_HEADER_LENGTH; 683 p += IVRS_HEADER_LENGTH;
535 684
@@ -555,6 +704,14 @@ static int __init init_iommu_all(struct acpi_table_header *table)
555 return 0; 704 return 0;
556} 705}
557 706
707/****************************************************************************
708 *
709 * The next functions belong to the third pass of parsing the ACPI
710 * table. In this last pass the memory mapping requirements are
711 * gathered (like exclusion and unity mapping reanges).
712 *
713 ****************************************************************************/
714
558static void __init free_unity_maps(void) 715static void __init free_unity_maps(void)
559{ 716{
560 struct unity_map_entry *entry, *next; 717 struct unity_map_entry *entry, *next;
@@ -565,6 +722,7 @@ static void __init free_unity_maps(void)
565 } 722 }
566} 723}
567 724
725/* called when we find an exclusion range definition in ACPI */
568static int __init init_exclusion_range(struct ivmd_header *m) 726static int __init init_exclusion_range(struct ivmd_header *m)
569{ 727{
570 int i; 728 int i;
@@ -588,6 +746,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
588 return 0; 746 return 0;
589} 747}
590 748
749/* called for unity map ACPI definition */
591static int __init init_unity_map_range(struct ivmd_header *m) 750static int __init init_unity_map_range(struct ivmd_header *m)
592{ 751{
593 struct unity_map_entry *e = 0; 752 struct unity_map_entry *e = 0;
@@ -619,13 +778,12 @@ static int __init init_unity_map_range(struct ivmd_header *m)
619 return 0; 778 return 0;
620} 779}
621 780
781/* iterates over all memory definitions we find in the ACPI table */
622static int __init init_memory_definitions(struct acpi_table_header *table) 782static int __init init_memory_definitions(struct acpi_table_header *table)
623{ 783{
624 u8 *p = (u8 *)table, *end = (u8 *)table; 784 u8 *p = (u8 *)table, *end = (u8 *)table;
625 struct ivmd_header *m; 785 struct ivmd_header *m;
626 786
627 INIT_LIST_HEAD(&amd_iommu_unity_map);
628
629 end += table->length; 787 end += table->length;
630 p += IVRS_HEADER_LENGTH; 788 p += IVRS_HEADER_LENGTH;
631 789
@@ -642,6 +800,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
642 return 0; 800 return 0;
643} 801}
644 802
803/*
804 * This function finally enables all IOMMUs found in the system after
805 * they have been initialized
806 */
645static void __init enable_iommus(void) 807static void __init enable_iommus(void)
646{ 808{
647 struct amd_iommu *iommu; 809 struct amd_iommu *iommu;
@@ -678,6 +840,34 @@ static struct sys_device device_amd_iommu = {
678 .cls = &amd_iommu_sysdev_class, 840 .cls = &amd_iommu_sysdev_class,
679}; 841};
680 842
843/*
844 * This is the core init function for AMD IOMMU hardware in the system.
845 * This function is called from the generic x86 DMA layer initialization
846 * code.
847 *
848 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
849 * three times:
850 *
851 * 1 pass) Find the highest PCI device id the driver has to handle.
852 * Upon this information the size of the data structures is
853 * determined that needs to be allocated.
854 *
855 * 2 pass) Initialize the data structures just allocated with the
856 * information in the ACPI table about available AMD IOMMUs
857 * in the system. It also maps the PCI devices in the
858 * system to specific IOMMUs
859 *
860 * 3 pass) After the basic data structures are allocated and
861 * initialized we update them with information about memory
862 * remapping requirements parsed out of the ACPI table in
863 * this last pass.
864 *
865 * After that the hardware is initialized and ready to go. In the last
866 * step we do some Linux specific things like registering the driver in
867 * the dma_ops interface and initializing the suspend/resume support
868 * functions. Finally it prints some information about AMD IOMMUs and
869 * the driver state and enables the hardware.
870 */
681int __init amd_iommu_init(void) 871int __init amd_iommu_init(void)
682{ 872{
683 int i, ret = 0; 873 int i, ret = 0;
@@ -699,14 +889,14 @@ int __init amd_iommu_init(void)
699 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) 889 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
700 return -ENODEV; 890 return -ENODEV;
701 891
702 dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); 892 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
703 alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); 893 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
704 rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); 894 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
705 895
706 ret = -ENOMEM; 896 ret = -ENOMEM;
707 897
708 /* Device table - directly used by all IOMMUs */ 898 /* Device table - directly used by all IOMMUs */
709 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, 899 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
710 get_order(dev_table_size)); 900 get_order(dev_table_size));
711 if (amd_iommu_dev_table == NULL) 901 if (amd_iommu_dev_table == NULL)
712 goto out; 902 goto out;
@@ -730,27 +920,23 @@ int __init amd_iommu_init(void)
730 * Protection Domain table - maps devices to protection domains 920 * Protection Domain table - maps devices to protection domains
731 * This table has the same size as the rlookup_table 921 * This table has the same size as the rlookup_table
732 */ 922 */
733 amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, 923 amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
734 get_order(rlookup_table_size)); 924 get_order(rlookup_table_size));
735 if (amd_iommu_pd_table == NULL) 925 if (amd_iommu_pd_table == NULL)
736 goto free; 926 goto free;
737 927
738 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, 928 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
929 GFP_KERNEL | __GFP_ZERO,
739 get_order(MAX_DOMAIN_ID/8)); 930 get_order(MAX_DOMAIN_ID/8));
740 if (amd_iommu_pd_alloc_bitmap == NULL) 931 if (amd_iommu_pd_alloc_bitmap == NULL)
741 goto free; 932 goto free;
742 933
743 /* 934 /*
744 * memory is allocated now; initialize the device table with all zeroes 935 * let all alias entries point to itself
745 * and let all alias entries point to itself
746 */ 936 */
747 memset(amd_iommu_dev_table, 0, dev_table_size);
748 for (i = 0; i < amd_iommu_last_bdf; ++i) 937 for (i = 0; i < amd_iommu_last_bdf; ++i)
749 amd_iommu_alias_table[i] = i; 938 amd_iommu_alias_table[i] = i;
750 939
751 memset(amd_iommu_pd_table, 0, rlookup_table_size);
752 memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
753
754 /* 940 /*
755 * never allocate domain 0 because its used as the non-allocated and 941 * never allocate domain 0 because its used as the non-allocated and
756 * error value placeholder 942 * error value placeholder
@@ -795,24 +981,19 @@ out:
795 return ret; 981 return ret;
796 982
797free: 983free:
798 if (amd_iommu_pd_alloc_bitmap) 984 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
799 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
800 985
801 if (amd_iommu_pd_table) 986 free_pages((unsigned long)amd_iommu_pd_table,
802 free_pages((unsigned long)amd_iommu_pd_table, 987 get_order(rlookup_table_size));
803 get_order(rlookup_table_size));
804 988
805 if (amd_iommu_rlookup_table) 989 free_pages((unsigned long)amd_iommu_rlookup_table,
806 free_pages((unsigned long)amd_iommu_rlookup_table, 990 get_order(rlookup_table_size));
807 get_order(rlookup_table_size));
808 991
809 if (amd_iommu_alias_table) 992 free_pages((unsigned long)amd_iommu_alias_table,
810 free_pages((unsigned long)amd_iommu_alias_table, 993 get_order(alias_table_size));
811 get_order(alias_table_size));
812 994
813 if (amd_iommu_dev_table) 995 free_pages((unsigned long)amd_iommu_dev_table,
814 free_pages((unsigned long)amd_iommu_dev_table, 996 get_order(dev_table_size));
815 get_order(dev_table_size));
816 997
817 free_iommu_all(); 998 free_iommu_all();
818 999
@@ -821,6 +1002,13 @@ free:
821 goto out; 1002 goto out;
822} 1003}
823 1004
1005/****************************************************************************
1006 *
1007 * Early detect code. This code runs at IOMMU detection time in the DMA
1008 * layer. It just looks if there is an IVRS ACPI table to detect AMD
1009 * IOMMUs
1010 *
1011 ****************************************************************************/
824static int __init early_amd_iommu_detect(struct acpi_table_header *table) 1012static int __init early_amd_iommu_detect(struct acpi_table_header *table)
825{ 1013{
826 return 0; 1014 return 0;
@@ -828,7 +1016,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
828 1016
829void __init amd_iommu_detect(void) 1017void __init amd_iommu_detect(void)
830{ 1018{
831 if (swiotlb || no_iommu || iommu_detected) 1019 if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
832 return; 1020 return;
833 1021
834 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1022 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
@@ -841,6 +1029,13 @@ void __init amd_iommu_detect(void)
841 } 1029 }
842} 1030}
843 1031
1032/****************************************************************************
1033 *
1034 * Parsing functions for the AMD IOMMU specific kernel command line
1035 * options.
1036 *
1037 ****************************************************************************/
1038
844static int __init parse_amd_iommu_options(char *str) 1039static int __init parse_amd_iommu_options(char *str)
845{ 1040{
846 for (; *str; ++str) { 1041 for (; *str; ++str) {
@@ -853,20 +1048,10 @@ static int __init parse_amd_iommu_options(char *str)
853 1048
854static int __init parse_amd_iommu_size_options(char *str) 1049static int __init parse_amd_iommu_size_options(char *str)
855{ 1050{
856 for (; *str; ++str) { 1051 unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
857 if (strcmp(str, "32M") == 0) 1052
858 amd_iommu_aperture_order = 25; 1053 if ((order > 24) && (order < 31))
859 if (strcmp(str, "64M") == 0) 1054 amd_iommu_aperture_order = order;
860 amd_iommu_aperture_order = 26;
861 if (strcmp(str, "128M") == 0)
862 amd_iommu_aperture_order = 27;
863 if (strcmp(str, "256M") == 0)
864 amd_iommu_aperture_order = 28;
865 if (strcmp(str, "512M") == 0)
866 amd_iommu_aperture_order = 29;
867 if (strcmp(str, "1G") == 0)
868 amd_iommu_aperture_order = 30;
869 }
870 1055
871 return 1; 1056 return 1;
872} 1057}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9f907806c1a5..44e21826db11 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -21,6 +21,7 @@
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23#include <asm/io.h> 23#include <asm/io.h>
24#include <asm/iommu.h>
24#include <asm/gart.h> 25#include <asm/gart.h>
25#include <asm/pci-direct.h> 26#include <asm/pci-direct.h>
26#include <asm/dma.h> 27#include <asm/dma.h>
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8728f54a93d8..f93c18f5b79d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1251,9 +1251,6 @@ int apic_version[MAX_APICS];
1251 1251
1252int __init APIC_init_uniprocessor(void) 1252int __init APIC_init_uniprocessor(void)
1253{ 1253{
1254 if (disable_apic)
1255 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1256
1257 if (!smp_found_config && !cpu_has_apic) 1254 if (!smp_found_config && !cpu_has_apic)
1258 return -1; 1255 return -1;
1259 1256
@@ -1737,7 +1734,7 @@ early_param("lapic", parse_lapic);
1737static int __init parse_nolapic(char *arg) 1734static int __init parse_nolapic(char *arg)
1738{ 1735{
1739 disable_apic = 1; 1736 disable_apic = 1;
1740 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1737 setup_clear_cpu_cap(X86_FEATURE_APIC);
1741 return 0; 1738 return 0;
1742} 1739}
1743early_param("nolapic", parse_nolapic); 1740early_param("nolapic", parse_nolapic);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index a850bc63fb1c..cd63c0bc6180 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1556,7 +1556,7 @@ early_param("apic", apic_set_verbosity);
1556static __init int setup_disableapic(char *str) 1556static __init int setup_disableapic(char *str)
1557{ 1557{
1558 disable_apic = 1; 1558 disable_apic = 1;
1559 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1559 setup_clear_cpu_cap(X86_FEATURE_APIC);
1560 return 0; 1560 return 0;
1561} 1561}
1562early_param("disableapic", setup_disableapic); 1562early_param("disableapic", setup_disableapic);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bacf5deeec2d..aa89387006fe 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -18,6 +18,8 @@
18#include <asm/ia32.h> 18#include <asm/ia32.h>
19#include <asm/bootparam.h> 19#include <asm/bootparam.h>
20 20
21#include <xen/interface/xen.h>
22
21#define __NO_STUBS 1 23#define __NO_STUBS 1
22#undef __SYSCALL 24#undef __SYSCALL
23#undef _ASM_X86_64_UNISTD_H_ 25#undef _ASM_X86_64_UNISTD_H_
@@ -131,5 +133,14 @@ int main(void)
131 OFFSET(BP_loadflags, boot_params, hdr.loadflags); 133 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
132 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); 134 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
133 OFFSET(BP_version, boot_params, hdr.version); 135 OFFSET(BP_version, boot_params, hdr.version);
136
137 BLANK();
138 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
139#ifdef CONFIG_XEN
140 BLANK();
141 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
142 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
143#undef ENTRY
144#endif
134 return 0; 145 return 0;
135} 146}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 81a07ca65d44..cae9cabc3031 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,8 +24,6 @@
24extern void vide(void); 24extern void vide(void);
25__asm__(".align 4\nvide: ret"); 25__asm__(".align 4\nvide: ret");
26 26
27int force_mwait __cpuinitdata;
28
29static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 27static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
30{ 28{
31 if (cpuid_eax(0x80000000) >= 0x80000007) { 29 if (cpuid_eax(0x80000000) >= 0x80000007) {
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 7c36fb8a28d4..d1692b2a41ff 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ 115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8)) 116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118
119 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
118} 120}
119 121
120static void __cpuinit init_amd(struct cpuinfo_x86 *c) 122static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index c6bee77ca9e6..6f9b8924bdc0 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -7,15 +7,13 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/kgdb.h> 8#include <linux/kgdb.h>
9#include <linux/topology.h> 9#include <linux/topology.h>
10#include <linux/string.h>
11#include <linux/delay.h> 10#include <linux/delay.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
13#include <linux/module.h>
14#include <linux/percpu.h> 12#include <linux/percpu.h>
15#include <asm/processor.h>
16#include <asm/i387.h> 13#include <asm/i387.h>
17#include <asm/msr.h> 14#include <asm/msr.h>
18#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/linkage.h>
19#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
20#include <asm/mtrr.h> 18#include <asm/mtrr.h>
21#include <asm/mce.h> 19#include <asm/mce.h>
@@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
305 c->x86_capability[2] = cpuid_edx(0x80860001); 303 c->x86_capability[2] = cpuid_edx(0x80860001);
306 } 304 }
307 305
308 c->extended_cpuid_level = cpuid_eax(0x80000000);
309 if (c->extended_cpuid_level >= 0x80000007) 306 if (c->extended_cpuid_level >= 0x80000007)
310 c->x86_power = cpuid_edx(0x80000007); 307 c->x86_power = cpuid_edx(0x80000007);
311 308
@@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
316 c->x86_phys_bits = eax & 0xff; 313 c->x86_phys_bits = eax & 0xff;
317 } 314 }
318 315
319 /* Assume all 64-bit CPUs support 32-bit syscall */
320 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
321
322 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 316 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
323 cpu_devs[c->x86_vendor]->c_early_init) 317 cpu_devs[c->x86_vendor]->c_early_init)
324 cpu_devs[c->x86_vendor]->c_early_init(c); 318 cpu_devs[c->x86_vendor]->c_early_init(c);
325 319
326 validate_pat_support(c); 320 validate_pat_support(c);
327
328 /* early_param could clear that, but recall get it set again */
329 if (disable_apic)
330 clear_cpu_cap(c, X86_FEATURE_APIC);
331} 321}
332 322
333/* 323/*
@@ -517,8 +507,7 @@ void pda_init(int cpu)
517} 507}
518 508
519char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 509char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
520 DEBUG_STKSZ] 510 DEBUG_STKSZ] __page_aligned_bss;
521__attribute__((section(".bss.page_aligned")));
522 511
523extern asmlinkage void ignore_sysret(void); 512extern asmlinkage void ignore_sysret(void);
524 513
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
index f8a63b3664e3..35fb4eaf6e1c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
@@ -1,5 +1,4 @@
1/* 1/*
2 * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
3 * (C) 2003 Dave Jones. 2 * (C) 2003 Dave Jones.
4 * 3 *
5 * Licensed under the terms of the GNU GPL License version 2. 4 * Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 2c8afafa18e8..ff517f0b8cc4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
780 } 780 }
781 kobject_put(per_cpu(cache_kobject, cpu)); 781 kobject_put(per_cpu(cache_kobject, cpu));
782 cpuid4_cache_sysfs_exit(cpu); 782 cpuid4_cache_sysfs_exit(cpu);
783 break; 783 return retval;
784 } 784 }
785 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 785 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
786 } 786 }
787 if (!retval) 787 cpu_set(cpu, cache_dev_map);
788 cpu_set(cpu, cache_dev_map);
789 788
790 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 789 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
791 return retval; 790 return 0;
792} 791}
793 792
794static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) 793static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6d4bdc02388a..de7439f82b92 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -250,7 +250,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
250 250
251 do_div(count, nmi_hz); 251 do_div(count, nmi_hz);
252 if(descr) 252 if(descr)
253 Dprintk("setting %s to -0x%08Lx\n", descr, count); 253 pr_debug("setting %s to -0x%08Lx\n", descr, count);
254 wrmsrl(perfctr_msr, 0 - count); 254 wrmsrl(perfctr_msr, 0 - count);
255} 255}
256 256
@@ -261,7 +261,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
261 261
262 do_div(count, nmi_hz); 262 do_div(count, nmi_hz);
263 if(descr) 263 if(descr)
264 Dprintk("setting %s to -0x%08Lx\n", descr, count); 264 pr_debug("setting %s to -0x%08Lx\n", descr, count);
265 wrmsr(perfctr_msr, (u32)(-count), 0); 265 wrmsr(perfctr_msr, (u32)(-count), 0);
266} 266}
267 267
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 28c29180b380..9af89078f7bb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end)
877 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) 877 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
878 count++; 878 count++;
879 879
880 printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); 880 printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
881 count, start, end);
881 for (i = 0; i < count; i++) { 882 for (i = 0; i < count; i++) {
882 struct early_res *r = &early_res[i]; 883 struct early_res *r = &early_res[i];
883 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, 884 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void)
1298 } 1299 }
1299} 1300}
1300 1301
1301/*
1302 * Non-standard memory setup can be specified via this quirk:
1303 */
1304char * (*arch_memory_setup_quirk)(void);
1305
1306char *__init default_machine_specific_memory_setup(void) 1302char *__init default_machine_specific_memory_setup(void)
1307{ 1303{
1308 char *who = "BIOS-e820"; 1304 char *who = "BIOS-e820";
@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void)
1343 1339
1344char *__init __attribute__((weak)) machine_specific_memory_setup(void) 1340char *__init __attribute__((weak)) machine_specific_memory_setup(void)
1345{ 1341{
1346 if (arch_memory_setup_quirk) { 1342 if (x86_quirks->arch_memory_setup) {
1347 char *who = arch_memory_setup_quirk(); 1343 char *who = x86_quirks->arch_memory_setup();
1348 1344
1349 if (who) 1345 if (who)
1350 return who; 1346 return who;
@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void)
1367 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1363 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1368 e820_print_map(who); 1364 e820_print_map(who);
1369} 1365}
1370
1371#ifdef CONFIG_X86_64
1372int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
1373{
1374 int i;
1375
1376 if (slot < 0 || slot >= e820.nr_map)
1377 return -1;
1378 for (i = slot; i < e820.nr_map; i++) {
1379 if (e820.map[i].type != E820_RAM)
1380 continue;
1381 break;
1382 }
1383 if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
1384 return -1;
1385 *addr = e820.map[i].addr;
1386 *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
1387 max_pfn << PAGE_SHIFT) - *addr;
1388 return i + 1;
1389}
1390#endif
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a0e11c0cc872..4353cf5e6fac 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -16,10 +16,7 @@
16#include <asm/dma.h> 16#include <asm/dma.h>
17#include <asm/io_apic.h> 17#include <asm/io_apic.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19 19#include <asm/iommu.h>
20#ifdef CONFIG_GART_IOMMU
21#include <asm/gart.h>
22#endif
23 20
24static void __init fix_hypertransport_config(int num, int slot, int func) 21static void __init fix_hypertransport_config(int num, int slot, int func)
25{ 22{
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6bc07f0f1202..cdfd94cc6b14 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -332,7 +332,7 @@ sysenter_past_esp:
332 GET_THREAD_INFO(%ebp) 332 GET_THREAD_INFO(%ebp)
333 333
334 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 334 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
335 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 335 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
336 jnz syscall_trace_entry 336 jnz syscall_trace_entry
337 cmpl $(nr_syscalls), %eax 337 cmpl $(nr_syscalls), %eax
338 jae syscall_badsys 338 jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
370 GET_THREAD_INFO(%ebp) 370 GET_THREAD_INFO(%ebp)
371 # system call tracing in operation / emulation 371 # system call tracing in operation / emulation
372 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 372 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
373 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 373 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
374 jnz syscall_trace_entry 374 jnz syscall_trace_entry
375 cmpl $(nr_syscalls), %eax 375 cmpl $(nr_syscalls), %eax
376 jae syscall_badsys 376 jae syscall_badsys
@@ -383,10 +383,6 @@ syscall_exit:
383 # setting need_resched or sigpending 383 # setting need_resched or sigpending
384 # between sampling and the iret 384 # between sampling and the iret
385 TRACE_IRQS_OFF 385 TRACE_IRQS_OFF
386 testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
387 jz no_singlestep
388 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
389no_singlestep:
390 movl TI_flags(%ebp), %ecx 386 movl TI_flags(%ebp), %ecx
391 testw $_TIF_ALLWORK_MASK, %cx # current->work 387 testw $_TIF_ALLWORK_MASK, %cx # current->work
392 jne syscall_exit_work 388 jne syscall_exit_work
@@ -514,12 +510,8 @@ END(work_pending)
514syscall_trace_entry: 510syscall_trace_entry:
515 movl $-ENOSYS,PT_EAX(%esp) 511 movl $-ENOSYS,PT_EAX(%esp)
516 movl %esp, %eax 512 movl %esp, %eax
517 xorl %edx,%edx 513 call syscall_trace_enter
518 call do_syscall_trace 514 /* What it returned is what we'll actually use. */
519 cmpl $0, %eax
520 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
521 # so must skip actual syscall
522 movl PT_ORIG_EAX(%esp), %eax
523 cmpl $(nr_syscalls), %eax 515 cmpl $(nr_syscalls), %eax
524 jnae syscall_call 516 jnae syscall_call
525 jmp syscall_exit 517 jmp syscall_exit
@@ -528,14 +520,13 @@ END(syscall_trace_entry)
528 # perform syscall exit tracing 520 # perform syscall exit tracing
529 ALIGN 521 ALIGN
530syscall_exit_work: 522syscall_exit_work:
531 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 523 testb $_TIF_WORK_SYSCALL_EXIT, %cl
532 jz work_pending 524 jz work_pending
533 TRACE_IRQS_ON 525 TRACE_IRQS_ON
534 ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call 526 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
535 # schedule() instead 527 # schedule() instead
536 movl %esp, %eax 528 movl %esp, %eax
537 movl $1, %edx 529 call syscall_trace_leave
538 call do_syscall_trace
539 jmp resume_userspace 530 jmp resume_userspace
540END(syscall_exit_work) 531END(syscall_exit_work)
541 CFI_ENDPROC 532 CFI_ENDPROC
@@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper)
1024ENTRY(xen_sysenter_target) 1015ENTRY(xen_sysenter_target)
1025 RING0_INT_FRAME 1016 RING0_INT_FRAME
1026 addl $5*4, %esp /* remove xen-provided frame */ 1017 addl $5*4, %esp /* remove xen-provided frame */
1018 CFI_ADJUST_CFA_OFFSET -5*4
1027 jmp sysenter_past_esp 1019 jmp sysenter_past_esp
1028 CFI_ENDPROC 1020 CFI_ENDPROC
1029 1021
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ae63e584c340..8410e26f4183 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
349 movq %rcx,RIP-ARGOFFSET(%rsp) 349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET 350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
351 GET_THREAD_INFO(%rcx) 351 GET_THREAD_INFO(%rcx)
352 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ 352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
353 TI_flags(%rcx)
354 jnz tracesys 353 jnz tracesys
355 cmpq $__NR_syscall_max,%rax 354 cmpq $__NR_syscall_max,%rax
356 ja badsys 355 ja badsys
@@ -430,7 +429,12 @@ tracesys:
430 FIXUP_TOP_OF_STACK %rdi 429 FIXUP_TOP_OF_STACK %rdi
431 movq %rsp,%rdi 430 movq %rsp,%rdi
432 call syscall_trace_enter 431 call syscall_trace_enter
433 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 432 /*
433 * Reload arg registers from stack in case ptrace changed them.
434 * We don't reload %rax because syscall_trace_enter() returned
435 * the value it wants us to use in the table lookup.
436 */
437 LOAD_ARGS ARGOFFSET, 1
434 RESTORE_REST 438 RESTORE_REST
435 cmpq $__NR_syscall_max,%rax 439 cmpq $__NR_syscall_max,%rax
436 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 440 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
483 ENABLE_INTERRUPTS(CLBR_NONE) 487 ENABLE_INTERRUPTS(CLBR_NONE)
484 SAVE_REST 488 SAVE_REST
485 /* Check for syscall exit trace */ 489 /* Check for syscall exit trace */
486 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx 490 testl $_TIF_WORK_SYSCALL_EXIT,%edx
487 jz int_signal 491 jz int_signal
488 pushq %rdi 492 pushq %rdi
489 CFI_ADJUST_CFA_OFFSET 8 493 CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
491 call syscall_trace_leave 495 call syscall_trace_leave
492 popq %rdi 496 popq %rdi
493 CFI_ADJUST_CFA_OFFSET -8 497 CFI_ADJUST_CFA_OFFSET -8
494 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 498 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
495 jmp int_restore_rest 499 jmp int_restore_rest
496 500
497int_signal: 501int_signal:
@@ -1189,6 +1193,7 @@ END(device_not_available)
1189 /* runs on exception stack */ 1193 /* runs on exception stack */
1190KPROBE_ENTRY(debug) 1194KPROBE_ENTRY(debug)
1191 INTR_FRAME 1195 INTR_FRAME
1196 PARAVIRT_ADJUST_EXCEPTION_FRAME
1192 pushq $0 1197 pushq $0
1193 CFI_ADJUST_CFA_OFFSET 8 1198 CFI_ADJUST_CFA_OFFSET 8
1194 paranoidentry do_debug, DEBUG_STACK 1199 paranoidentry do_debug, DEBUG_STACK
@@ -1198,6 +1203,7 @@ KPROBE_END(debug)
1198 /* runs on exception stack */ 1203 /* runs on exception stack */
1199KPROBE_ENTRY(nmi) 1204KPROBE_ENTRY(nmi)
1200 INTR_FRAME 1205 INTR_FRAME
1206 PARAVIRT_ADJUST_EXCEPTION_FRAME
1201 pushq $-1 1207 pushq $-1
1202 CFI_ADJUST_CFA_OFFSET 8 1208 CFI_ADJUST_CFA_OFFSET 8
1203 paranoidentry do_nmi, 0, 0 1209 paranoidentry do_nmi, 0, 0
@@ -1211,6 +1217,7 @@ KPROBE_END(nmi)
1211 1217
1212KPROBE_ENTRY(int3) 1218KPROBE_ENTRY(int3)
1213 INTR_FRAME 1219 INTR_FRAME
1220 PARAVIRT_ADJUST_EXCEPTION_FRAME
1214 pushq $0 1221 pushq $0
1215 CFI_ADJUST_CFA_OFFSET 8 1222 CFI_ADJUST_CFA_OFFSET 8
1216 paranoidentry do_int3, DEBUG_STACK 1223 paranoidentry do_int3, DEBUG_STACK
@@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun)
1237 /* runs on exception stack */ 1244 /* runs on exception stack */
1238ENTRY(double_fault) 1245ENTRY(double_fault)
1239 XCPT_FRAME 1246 XCPT_FRAME
1247 PARAVIRT_ADJUST_EXCEPTION_FRAME
1240 paranoidentry do_double_fault 1248 paranoidentry do_double_fault
1241 jmp paranoid_exit1 1249 jmp paranoid_exit1
1242 CFI_ENDPROC 1250 CFI_ENDPROC
@@ -1253,6 +1261,7 @@ END(segment_not_present)
1253 /* runs on exception stack */ 1261 /* runs on exception stack */
1254ENTRY(stack_segment) 1262ENTRY(stack_segment)
1255 XCPT_FRAME 1263 XCPT_FRAME
1264 PARAVIRT_ADJUST_EXCEPTION_FRAME
1256 paranoidentry do_stack_segment 1265 paranoidentry do_stack_segment
1257 jmp paranoid_exit1 1266 jmp paranoid_exit1
1258 CFI_ENDPROC 1267 CFI_ENDPROC
@@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug)
1278 /* runs on exception stack */ 1287 /* runs on exception stack */
1279ENTRY(machine_check) 1288ENTRY(machine_check)
1280 INTR_FRAME 1289 INTR_FRAME
1290 PARAVIRT_ADJUST_EXCEPTION_FRAME
1281 pushq $0 1291 pushq $0
1282 CFI_ADJUST_CFA_OFFSET 8 1292 CFI_ADJUST_CFA_OFFSET 8
1283 paranoidentry do_machine_check 1293 paranoidentry do_machine_check
@@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret)
1312 sysret 1322 sysret
1313 CFI_ENDPROC 1323 CFI_ENDPROC
1314ENDPROC(ignore_sysret) 1324ENDPROC(ignore_sysret)
1325
1326#ifdef CONFIG_XEN
1327ENTRY(xen_hypervisor_callback)
1328 zeroentry xen_do_hypervisor_callback
1329END(xen_hypervisor_callback)
1330
1331/*
1332# A note on the "critical region" in our callback handler.
1333# We want to avoid stacking callback handlers due to events occurring
1334# during handling of the last event. To do this, we keep events disabled
1335# until we've done all processing. HOWEVER, we must enable events before
1336# popping the stack frame (can't be done atomically) and so it would still
1337# be possible to get enough handler activations to overflow the stack.
1338# Although unlikely, bugs of that kind are hard to track down, so we'd
1339# like to avoid the possibility.
1340# So, on entry to the handler we detect whether we interrupted an
1341# existing activation in its critical region -- if so, we pop the current
1342# activation and restart the handler using the previous one.
1343*/
1344ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1345 CFI_STARTPROC
1346/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1347 see the correct pointer to the pt_regs */
1348 movq %rdi, %rsp # we don't return, adjust the stack frame
1349 CFI_ENDPROC
1350 CFI_DEFAULT_STACK
135111: incl %gs:pda_irqcount
1352 movq %rsp,%rbp
1353 CFI_DEF_CFA_REGISTER rbp
1354 cmovzq %gs:pda_irqstackptr,%rsp
1355 pushq %rbp # backlink for old unwinder
1356 call xen_evtchn_do_upcall
1357 popq %rsp
1358 CFI_DEF_CFA_REGISTER rsp
1359 decl %gs:pda_irqcount
1360 jmp error_exit
1361 CFI_ENDPROC
1362END(do_hypervisor_callback)
1363
1364/*
1365# Hypervisor uses this for application faults while it executes.
1366# We get here for two reasons:
1367# 1. Fault while reloading DS, ES, FS or GS
1368# 2. Fault while executing IRET
1369# Category 1 we do not need to fix up as Xen has already reloaded all segment
1370# registers that could be reloaded and zeroed the others.
1371# Category 2 we fix up by killing the current process. We cannot use the
1372# normal Linux return path in this case because if we use the IRET hypercall
1373# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1374# We distinguish between categories by comparing each saved segment register
1375# with its current contents: any discrepancy means we in category 1.
1376*/
1377ENTRY(xen_failsafe_callback)
1378 framesz = (RIP-0x30) /* workaround buggy gas */
1379 _frame framesz
1380 CFI_REL_OFFSET rcx, 0
1381 CFI_REL_OFFSET r11, 8
1382 movw %ds,%cx
1383 cmpw %cx,0x10(%rsp)
1384 CFI_REMEMBER_STATE
1385 jne 1f
1386 movw %es,%cx
1387 cmpw %cx,0x18(%rsp)
1388 jne 1f
1389 movw %fs,%cx
1390 cmpw %cx,0x20(%rsp)
1391 jne 1f
1392 movw %gs,%cx
1393 cmpw %cx,0x28(%rsp)
1394 jne 1f
1395 /* All segments match their saved values => Category 2 (Bad IRET). */
1396 movq (%rsp),%rcx
1397 CFI_RESTORE rcx
1398 movq 8(%rsp),%r11
1399 CFI_RESTORE r11
1400 addq $0x30,%rsp
1401 CFI_ADJUST_CFA_OFFSET -0x30
1402 pushq $0
1403 CFI_ADJUST_CFA_OFFSET 8
1404 pushq %r11
1405 CFI_ADJUST_CFA_OFFSET 8
1406 pushq %rcx
1407 CFI_ADJUST_CFA_OFFSET 8
1408 jmp general_protection
1409 CFI_RESTORE_STATE
14101: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1411 movq (%rsp),%rcx
1412 CFI_RESTORE rcx
1413 movq 8(%rsp),%r11
1414 CFI_RESTORE r11
1415 addq $0x30,%rsp
1416 CFI_ADJUST_CFA_OFFSET -0x30
1417 pushq $0
1418 CFI_ADJUST_CFA_OFFSET 8
1419 SAVE_ALL
1420 jmp error_exit
1421 CFI_ENDPROC
1422END(xen_failsafe_callback)
1423
1424#endif /* CONFIG_XEN */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c97819829146..1b318e903bf6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; 39static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
40#endif 40#endif
41 41
42void __init x86_64_init_pda(void)
43{
44 _cpu_pda = __cpu_pda;
45 cpu_pda(0) = &_boot_cpu_pda;
46 pda_init(0);
47}
48
42static void __init zap_identity_mappings(void) 49static void __init zap_identity_mappings(void)
43{ 50{
44 pgd_t *pgd = pgd_offset_k(0UL); 51 pgd_t *pgd = pgd_offset_k(0UL);
@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
102 109
103 early_printk("Kernel alive\n"); 110 early_printk("Kernel alive\n");
104 111
105 _cpu_pda = __cpu_pda; 112 x86_64_init_pda();
106 cpu_pda(0) = &_boot_cpu_pda;
107 pda_init(0);
108 113
109 early_printk("Kernel really alive\n"); 114 early_printk("Kernel really alive\n");
110 115
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b07ac7b217cb..db3280afe886 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -407,6 +407,7 @@ ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 407 /* This must match the first entry in level2_kernel_pgt */
408 .quad 0x0000000000000000 408 .quad 0x0000000000000000
409 409
410#include "../../x86/xen/xen-head.S"
410 411
411 .section .bss, "aw", @nobits 412 .section .bss, "aw", @nobits
412 .align L1_CACHE_BYTES 413 .align L1_CACHE_BYTES
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 5921e5f0a640..1c3a66a67f83 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -103,6 +103,9 @@ void __init io_delay_init(void)
103 103
104static int __init io_delay_param(char *s) 104static int __init io_delay_param(char *s)
105{ 105{
106 if (!s)
107 return -EINVAL;
108
106 if (!strcmp(s, "0x80")) 109 if (!strcmp(s, "0x80"))
107 io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; 110 io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
108 else if (!strcmp(s, "0xed")) 111 else if (!strcmp(s, "0xed"))
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 47a6f6f12478..1cf8c1fcc088 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -83,11 +83,8 @@ union irq_ctx {
83static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; 83static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
84static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; 84static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
85 85
86static char softirq_stack[NR_CPUS * THREAD_SIZE] 86static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
87 __attribute__((__section__(".bss.page_aligned"))); 87static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
88
89static char hardirq_stack[NR_CPUS * THREAD_SIZE]
90 __attribute__((__section__(".bss.page_aligned")));
91 88
92static void call_on_stack(void *func, void *stack) 89static void call_on_stack(void *func, void *stack)
93{ 90{
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index c03205991718..f2d43bc75514 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -12,9 +12,13 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/io.h> 13#include <linux/io.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/module.h>
15 16
16#include <asm/setup.h> 17#include <asm/setup.h>
17 18
19struct dentry *arch_debugfs_dir;
20EXPORT_SYMBOL(arch_debugfs_dir);
21
18#ifdef CONFIG_DEBUG_BOOT_PARAMS 22#ifdef CONFIG_DEBUG_BOOT_PARAMS
19struct setup_data_node { 23struct setup_data_node {
20 u64 paddr; 24 u64 paddr;
@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void)
209{ 213{
210 int error = 0; 214 int error = 0;
211 215
216 arch_debugfs_dir = debugfs_create_dir("x86", NULL);
217 if (!arch_debugfs_dir)
218 return -ENOMEM;
219
212#ifdef CONFIG_DEBUG_BOOT_PARAMS 220#ifdef CONFIG_DEBUG_BOOT_PARAMS
213 error = boot_params_kdebugfs_init(); 221 error = boot_params_kdebugfs_init();
214#endif 222#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b8c6743a13da..43c019f85f0d 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
860 860
861 resume_execution(cur, regs, kcb); 861 resume_execution(cur, regs, kcb);
862 regs->flags |= kcb->kprobe_saved_flags; 862 regs->flags |= kcb->kprobe_saved_flags;
863 trace_hardirqs_fixup_flags(regs->flags);
864 863
865 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { 864 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
866 kcb->kprobe_status = KPROBE_HIT_SSDONE; 865 kcb->kprobe_status = KPROBE_HIT_SSDONE;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 87edf1ceb1df..d02def06ca91 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void)
113#endif 113#endif
114 114
115#ifdef CONFIG_SMP 115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void) 116static void __init kvm_smp_prepare_boot_cpu(void)
117{ 117{
118 WARN_ON(kvm_register_clock("primary cpu clock")); 118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu(); 119 native_smp_prepare_boot_cpu();
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index a888e67f5874..0e867676b5a5 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr,
150 const Elf_Shdr *sechdrs, 150 const Elf_Shdr *sechdrs,
151 struct module *me) 151 struct module *me)
152{ 152{
153 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; 153 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
154 *para = NULL;
154 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 155 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
155 156
156 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 157 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr,
160 alt = s; 161 alt = s;
161 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 162 if (!strcmp(".smp_locks", secstrings + s->sh_name))
162 locks= s; 163 locks= s;
164 if (!strcmp(".parainstructions", secstrings + s->sh_name))
165 para = s;
163 } 166 }
164 167
165 if (alt) { 168 if (alt) {
@@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr,
175 tseg, tseg + text->sh_size); 178 tseg, tseg + text->sh_size);
176 } 179 }
177 180
181 if (para) {
182 void *pseg = (void *)para->sh_addr;
183 apply_paravirt(pseg, pseg + para->sh_size);
184 }
185
178 return module_bug_finalize(hdr, sechdrs, me); 186 return module_bug_finalize(hdr, sechdrs, me);
179} 187}
180 188
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70e1f3e287fb..e362c6ab4d35 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h> 28#include <asm/e820.h>
29#include <asm/trampoline.h> 29#include <asm/trampoline.h>
30#include <asm/setup.h>
30 31
31#include <mach_apic.h> 32#include <mach_apic.h>
32#ifdef CONFIG_X86_32 33#ifdef CONFIG_X86_32
@@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
48 return sum & 0xFF; 49 return sum & 0xFF;
49} 50}
50 51
51#ifdef CONFIG_X86_NUMAQ
52int found_numaq;
53/*
54 * Have to match translation table entries to main table entries by counter
55 * hence the mpc_record variable .... can't see a less disgusting way of
56 * doing this ....
57 */
58struct mpc_config_translation {
59 unsigned char mpc_type;
60 unsigned char trans_len;
61 unsigned char trans_type;
62 unsigned char trans_quad;
63 unsigned char trans_global;
64 unsigned char trans_local;
65 unsigned short trans_reserved;
66};
67
68
69static int mpc_record;
70static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
71 __cpuinitdata;
72
73static inline int generate_logical_apicid(int quad, int phys_apicid)
74{
75 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
76}
77
78
79static inline int mpc_apic_id(struct mpc_config_processor *m,
80 struct mpc_config_translation *translation_record)
81{
82 int quad = translation_record->trans_quad;
83 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
84
85 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
86 m->mpc_apicid,
87 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
88 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
89 m->mpc_apicver, quad, logical_apicid);
90 return logical_apicid;
91}
92
93int mp_bus_id_to_node[MAX_MP_BUSSES];
94
95int mp_bus_id_to_local[MAX_MP_BUSSES];
96
97static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
98 struct mpc_config_translation *translation)
99{
100 int quad = translation->trans_quad;
101 int local = translation->trans_local;
102
103 mp_bus_id_to_node[m->mpc_busid] = quad;
104 mp_bus_id_to_local[m->mpc_busid] = local;
105 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
106 m->mpc_busid, name, quad);
107}
108
109int quad_local_to_mp_bus_id [NR_CPUS/4][4];
110static void mpc_oem_pci_bus(struct mpc_config_bus *m,
111 struct mpc_config_translation *translation)
112{
113 int quad = translation->trans_quad;
114 int local = translation->trans_local;
115
116 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
117}
118
119#endif
120
121static void __cpuinit MP_processor_info(struct mpc_config_processor *m) 52static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
122{ 53{
123 int apicid; 54 int apicid;
@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
127 disabled_cpus++; 58 disabled_cpus++;
128 return; 59 return;
129 } 60 }
130#ifdef CONFIG_X86_NUMAQ 61
131 if (found_numaq) 62 if (x86_quirks->mpc_apic_id)
132 apicid = mpc_apic_id(m, translation_table[mpc_record]); 63 apicid = x86_quirks->mpc_apic_id(m);
133 else 64 else
134 apicid = m->mpc_apicid; 65 apicid = m->mpc_apicid;
135#else 66
136 apicid = m->mpc_apicid;
137#endif
138 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 67 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
139 bootup_cpu = " (Bootup-CPU)"; 68 bootup_cpu = " (Bootup-CPU)";
140 boot_cpu_physical_apicid = m->mpc_apicid; 69 boot_cpu_physical_apicid = m->mpc_apicid;
@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
151 memcpy(str, m->mpc_bustype, 6); 80 memcpy(str, m->mpc_bustype, 6);
152 str[6] = 0; 81 str[6] = 0;
153 82
154#ifdef CONFIG_X86_NUMAQ 83 if (x86_quirks->mpc_oem_bus_info)
155 if (found_numaq) 84 x86_quirks->mpc_oem_bus_info(m, str);
156 mpc_oem_bus_info(m, str, translation_table[mpc_record]); 85 else
157#else 86 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
158 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
159#endif
160 87
161#if MAX_MP_BUSSES < 256 88#if MAX_MP_BUSSES < 256
162 if (m->mpc_busid >= MAX_MP_BUSSES) { 89 if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
173 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
174#endif 101#endif
175 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
176#ifdef CONFIG_X86_NUMAQ 103 if (x86_quirks->mpc_oem_pci_bus)
177 if (found_numaq) 104 x86_quirks->mpc_oem_pci_bus(m);
178 mpc_oem_pci_bus(m, translation_table[mpc_record]); 105
179#endif
180 clear_bit(m->mpc_busid, mp_bus_not_pci); 106 clear_bit(m->mpc_busid, mp_bus_not_pci);
181#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 107#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
182 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
@@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
316 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); 242 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
317} 243}
318 244
319#ifdef CONFIG_X86_NUMAQ
320static void __init MP_translation_info(struct mpc_config_translation *m)
321{
322 printk(KERN_INFO
323 "Translation: record %d, type %d, quad %d, global %d, local %d\n",
324 mpc_record, m->trans_type, m->trans_quad, m->trans_global,
325 m->trans_local);
326
327 if (mpc_record >= MAX_MPC_ENTRY)
328 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
329 else
330 translation_table[mpc_record] = m; /* stash this for later */
331 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
332 node_set_online(m->trans_quad);
333}
334
335/*
336 * Read/parse the MPC oem tables
337 */
338
339static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
340 unsigned short oemsize)
341{
342 int count = sizeof(*oemtable); /* the header size */
343 unsigned char *oemptr = ((unsigned char *)oemtable) + count;
344
345 mpc_record = 0;
346 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
347 oemtable);
348 if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
349 printk(KERN_WARNING
350 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
351 oemtable->oem_signature[0], oemtable->oem_signature[1],
352 oemtable->oem_signature[2], oemtable->oem_signature[3]);
353 return;
354 }
355 if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
356 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
357 return;
358 }
359 while (count < oemtable->oem_length) {
360 switch (*oemptr) {
361 case MP_TRANSLATION:
362 {
363 struct mpc_config_translation *m =
364 (struct mpc_config_translation *)oemptr;
365 MP_translation_info(m);
366 oemptr += sizeof(*m);
367 count += sizeof(*m);
368 ++mpc_record;
369 break;
370 }
371 default:
372 {
373 printk(KERN_WARNING
374 "Unrecognised OEM table entry type! - %d\n",
375 (int)*oemptr);
376 return;
377 }
378 }
379 }
380}
381
382void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
383 char *productid)
384{
385 if (strncmp(oem, "IBM NUMA", 8))
386 printk("Warning! Not a NUMA-Q system!\n");
387 else
388 found_numaq = 1;
389
390 if (mpc->mpc_oemptr)
391 smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
392 mpc->mpc_oemsize);
393}
394#endif /* CONFIG_X86_NUMAQ */
395
396/* 245/*
397 * Read/parse the MPC 246 * Read/parse the MPC
398 */ 247 */
@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
457 } else 306 } else
458 mps_oem_check(mpc, oem, str); 307 mps_oem_check(mpc, oem, str);
459#endif 308#endif
460
461 /* save the local APIC address, it might be non-default */ 309 /* save the local APIC address, it might be non-default */
462 if (!acpi_lapic) 310 if (!acpi_lapic)
463 mp_lapic_addr = mpc->mpc_lapic; 311 mp_lapic_addr = mpc->mpc_lapic;
@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
465 if (early) 313 if (early)
466 return 1; 314 return 1;
467 315
316 if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
317 struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
318 x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
319 }
320
468 /* 321 /*
469 * Now process the configuration blocks. 322 * Now process the configuration blocks.
470 */ 323 */
471#ifdef CONFIG_X86_NUMAQ 324 if (x86_quirks->mpc_record)
472 mpc_record = 0; 325 *x86_quirks->mpc_record = 0;
473#endif 326
474 while (count < mpc->mpc_length) { 327 while (count < mpc->mpc_length) {
475 switch (*mpt) { 328 switch (*mpt) {
476 case MP_PROCESSOR: 329 case MP_PROCESSOR:
@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
536 count = mpc->mpc_length; 389 count = mpc->mpc_length;
537 break; 390 break;
538 } 391 }
539#ifdef CONFIG_X86_NUMAQ 392 if (x86_quirks->mpc_record)
540 ++mpc_record; 393 (*x86_quirks->mpc_record)++;
541#endif
542 } 394 }
543 395
544#ifdef CONFIG_X86_GENERICARCH 396#ifdef CONFIG_X86_GENERICARCH
@@ -728,20 +580,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
728static struct intel_mp_floating *mpf_found; 580static struct intel_mp_floating *mpf_found;
729 581
730/* 582/*
731 * Machine specific quirk for finding the SMP config before other setup
732 * activities destroy the table:
733 */
734int (*mach_get_smp_config_quirk)(unsigned int early);
735
736/*
737 * Scan the memory blocks for an SMP configuration block. 583 * Scan the memory blocks for an SMP configuration block.
738 */ 584 */
739static void __init __get_smp_config(unsigned int early) 585static void __init __get_smp_config(unsigned int early)
740{ 586{
741 struct intel_mp_floating *mpf = mpf_found; 587 struct intel_mp_floating *mpf = mpf_found;
742 588
743 if (mach_get_smp_config_quirk) { 589 if (x86_quirks->mach_get_smp_config) {
744 if (mach_get_smp_config_quirk(early)) 590 if (x86_quirks->mach_get_smp_config(early))
745 return; 591 return;
746 } 592 }
747 if (acpi_lapic && early) 593 if (acpi_lapic && early)
@@ -901,14 +747,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
901 return 0; 747 return 0;
902} 748}
903 749
904int (*mach_find_smp_config_quirk)(unsigned int reserve);
905
906static void __init __find_smp_config(unsigned int reserve) 750static void __init __find_smp_config(unsigned int reserve)
907{ 751{
908 unsigned int address; 752 unsigned int address;
909 753
910 if (mach_find_smp_config_quirk) { 754 if (x86_quirks->mach_find_smp_config) {
911 if (mach_find_smp_config_quirk(reserve)) 755 if (x86_quirks->mach_find_smp_config(reserve))
912 return; 756 return;
913 } 757 }
914 /* 758 /*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 384b49fed598..ac6d51222e7d 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
448 448
449#ifdef CONFIG_SYSCTL 449#ifdef CONFIG_SYSCTL
450 450
451static int __init setup_unknown_nmi_panic(char *str)
452{
453 unknown_nmi_panic = 1;
454 return 1;
455}
456__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
457
451static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) 458static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
452{ 459{
453 unsigned char reason = get_nmi_reason(); 460 unsigned char reason = get_nmi_reason();
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index a23e8233b9ac..b8c45610b20a 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -33,6 +33,7 @@
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h> 34#include <asm/mpspec.h>
35#include <asm/e820.h> 35#include <asm/e820.h>
36#include <asm/setup.h>
36 37
37#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 38#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
38 39
@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void)
71 } 72 }
72} 73}
73 74
75
76void __init numaq_tsc_disable(void)
77{
78 if (!found_numaq)
79 return;
80
81 if (num_online_nodes() > 1) {
82 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
83 setup_clear_cpu_cap(X86_FEATURE_TSC);
84 }
85}
86
87static int __init numaq_pre_time_init(void)
88{
89 numaq_tsc_disable();
90 return 0;
91}
92
93int found_numaq;
94/*
95 * Have to match translation table entries to main table entries by counter
96 * hence the mpc_record variable .... can't see a less disgusting way of
97 * doing this ....
98 */
99struct mpc_config_translation {
100 unsigned char mpc_type;
101 unsigned char trans_len;
102 unsigned char trans_type;
103 unsigned char trans_quad;
104 unsigned char trans_global;
105 unsigned char trans_local;
106 unsigned short trans_reserved;
107};
108
109/* x86_quirks member */
110static int mpc_record;
111static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
112 __cpuinitdata;
113
114static inline int generate_logical_apicid(int quad, int phys_apicid)
115{
116 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
117}
118
119/* x86_quirks member */
120static int mpc_apic_id(struct mpc_config_processor *m)
121{
122 int quad = translation_table[mpc_record]->trans_quad;
123 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
124
125 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
126 m->mpc_apicid,
127 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
128 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
129 m->mpc_apicver, quad, logical_apicid);
130 return logical_apicid;
131}
132
133int mp_bus_id_to_node[MAX_MP_BUSSES];
134
135int mp_bus_id_to_local[MAX_MP_BUSSES];
136
137/* x86_quirks member */
138static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
139{
140 int quad = translation_table[mpc_record]->trans_quad;
141 int local = translation_table[mpc_record]->trans_local;
142
143 mp_bus_id_to_node[m->mpc_busid] = quad;
144 mp_bus_id_to_local[m->mpc_busid] = local;
145 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
146 m->mpc_busid, name, quad);
147}
148
149int quad_local_to_mp_bus_id [NR_CPUS/4][4];
150
151/* x86_quirks member */
152static void mpc_oem_pci_bus(struct mpc_config_bus *m)
153{
154 int quad = translation_table[mpc_record]->trans_quad;
155 int local = translation_table[mpc_record]->trans_local;
156
157 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
158}
159
160static void __init MP_translation_info(struct mpc_config_translation *m)
161{
162 printk(KERN_INFO
163 "Translation: record %d, type %d, quad %d, global %d, local %d\n",
164 mpc_record, m->trans_type, m->trans_quad, m->trans_global,
165 m->trans_local);
166
167 if (mpc_record >= MAX_MPC_ENTRY)
168 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
169 else
170 translation_table[mpc_record] = m; /* stash this for later */
171 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
172 node_set_online(m->trans_quad);
173}
174
175static int __init mpf_checksum(unsigned char *mp, int len)
176{
177 int sum = 0;
178
179 while (len--)
180 sum += *mp++;
181
182 return sum & 0xFF;
183}
184
185/*
186 * Read/parse the MPC oem tables
187 */
188
189static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
190 unsigned short oemsize)
191{
192 int count = sizeof(*oemtable); /* the header size */
193 unsigned char *oemptr = ((unsigned char *)oemtable) + count;
194
195 mpc_record = 0;
196 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
197 oemtable);
198 if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
199 printk(KERN_WARNING
200 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
201 oemtable->oem_signature[0], oemtable->oem_signature[1],
202 oemtable->oem_signature[2], oemtable->oem_signature[3]);
203 return;
204 }
205 if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
206 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
207 return;
208 }
209 while (count < oemtable->oem_length) {
210 switch (*oemptr) {
211 case MP_TRANSLATION:
212 {
213 struct mpc_config_translation *m =
214 (struct mpc_config_translation *)oemptr;
215 MP_translation_info(m);
216 oemptr += sizeof(*m);
217 count += sizeof(*m);
218 ++mpc_record;
219 break;
220 }
221 default:
222 {
223 printk(KERN_WARNING
224 "Unrecognised OEM table entry type! - %d\n",
225 (int)*oemptr);
226 return;
227 }
228 }
229 }
230}
231
232static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL,
235 .arch_pre_intr_init = NULL,
236 .arch_memory_setup = NULL,
237 .arch_intr_init = NULL,
238 .arch_trap_init = NULL,
239 .mach_get_smp_config = NULL,
240 .mach_find_smp_config = NULL,
241 .mpc_record = &mpc_record,
242 .mpc_apic_id = mpc_apic_id,
243 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem,
246};
247
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
249 char *productid)
250{
251 if (strncmp(oem, "IBM NUMA", 8))
252 printk("Warning! Not a NUMA-Q system!\n");
253 else
254 found_numaq = 1;
255}
256
74static __init void early_check_numaq(void) 257static __init void early_check_numaq(void)
75{ 258{
76 /* 259 /*
@@ -82,6 +265,9 @@ static __init void early_check_numaq(void)
82 */ 265 */
83 if (smp_found_config) 266 if (smp_found_config)
84 early_get_smp_config(); 267 early_get_smp_config();
268
269 if (found_numaq)
270 x86_quirks = &numaq_x86_quirks;
85} 271}
86 272
87int __init get_memcfg_numaq(void) 273int __init get_memcfg_numaq(void)
@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void)
92 smp_dump_qct(); 278 smp_dump_qct();
93 return 1; 279 return 1;
94} 280}
95
96void __init numaq_tsc_disable(void)
97{
98 if (!found_numaq)
99 return;
100
101 if (num_online_nodes() > 1) {
102 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
103 setup_clear_cpu_cap(X86_FEATURE_TSC);
104 }
105}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e0f139106c7e..00c53a049756 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -29,6 +29,7 @@
29#include <asm/desc.h> 29#include <asm/desc.h>
30#include <asm/setup.h> 30#include <asm/setup.h>
31#include <asm/arch_hooks.h> 31#include <asm/arch_hooks.h>
32#include <asm/pgtable.h>
32#include <asm/time.h> 33#include <asm/time.h>
33#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
34#include <asm/irq.h> 35#include <asm/irq.h>
@@ -123,6 +124,7 @@ static void *get_call_destination(u8 type)
123 .pv_irq_ops = pv_irq_ops, 124 .pv_irq_ops = pv_irq_ops,
124 .pv_apic_ops = pv_apic_ops, 125 .pv_apic_ops = pv_apic_ops,
125 .pv_mmu_ops = pv_mmu_ops, 126 .pv_mmu_ops = pv_mmu_ops,
127 .pv_lock_ops = pv_lock_ops,
126 }; 128 };
127 return *((void **)&tmpl + type); 129 return *((void **)&tmpl + type);
128} 130}
@@ -266,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
266 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
267} 269}
268 270
271void __init paravirt_use_bytelocks(void)
272{
273#ifdef CONFIG_SMP
274 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
275 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
276 pv_lock_ops.spin_lock = __byte_spin_lock;
277 pv_lock_ops.spin_trylock = __byte_spin_trylock;
278 pv_lock_ops.spin_unlock = __byte_spin_unlock;
279#endif
280}
281
269struct pv_info pv_info = { 282struct pv_info pv_info = {
270 .name = "bare hardware", 283 .name = "bare hardware",
271 .paravirt_enabled = 0, 284 .paravirt_enabled = 0,
@@ -370,6 +383,9 @@ struct pv_mmu_ops pv_mmu_ops = {
370#ifndef CONFIG_X86_64 383#ifndef CONFIG_X86_64
371 .pagetable_setup_start = native_pagetable_setup_start, 384 .pagetable_setup_start = native_pagetable_setup_start,
372 .pagetable_setup_done = native_pagetable_setup_done, 385 .pagetable_setup_done = native_pagetable_setup_done,
386#else
387 .pagetable_setup_start = paravirt_nop,
388 .pagetable_setup_done = paravirt_nop,
373#endif 389#endif
374 390
375 .read_cr2 = native_read_cr2, 391 .read_cr2 = native_read_cr2,
@@ -443,6 +459,18 @@ struct pv_mmu_ops pv_mmu_ops = {
443 .set_fixmap = native_set_fixmap, 459 .set_fixmap = native_set_fixmap,
444}; 460};
445 461
462struct pv_lock_ops pv_lock_ops = {
463#ifdef CONFIG_SMP
464 .spin_is_locked = __ticket_spin_is_locked,
465 .spin_is_contended = __ticket_spin_is_contended,
466
467 .spin_lock = __ticket_spin_lock,
468 .spin_trylock = __ticket_spin_trylock,
469 .spin_unlock = __ticket_spin_unlock,
470#endif
471};
472EXPORT_SYMBOL_GPL(pv_lock_ops);
473
446EXPORT_SYMBOL_GPL(pv_time_ops); 474EXPORT_SYMBOL_GPL(pv_time_ops);
447EXPORT_SYMBOL (pv_cpu_ops); 475EXPORT_SYMBOL (pv_cpu_ops);
448EXPORT_SYMBOL (pv_mmu_ops); 476EXPORT_SYMBOL (pv_mmu_ops);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6959b5c45df4..151f2d171f7c 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -36,7 +36,7 @@
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/scatterlist.h> 37#include <linux/scatterlist.h>
38#include <linux/iommu-helper.h> 38#include <linux/iommu-helper.h>
39#include <asm/gart.h> 39#include <asm/iommu.h>
40#include <asm/calgary.h> 40#include <asm/calgary.h>
41#include <asm/tce.h> 41#include <asm/tce.h>
42#include <asm/pci-direct.h> 42#include <asm/pci-direct.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8467ec2320f1..a4213c00dffc 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -5,12 +5,11 @@
5 5
6#include <asm/proto.h> 6#include <asm/proto.h>
7#include <asm/dma.h> 7#include <asm/dma.h>
8#include <asm/gart.h> 8#include <asm/iommu.h>
9#include <asm/calgary.h> 9#include <asm/calgary.h>
10#include <asm/amd_iommu.h> 10#include <asm/amd_iommu.h>
11 11
12int forbid_dac __read_mostly; 12static int forbid_dac __read_mostly;
13EXPORT_SYMBOL(forbid_dac);
14 13
15const struct dma_mapping_ops *dma_ops; 14const struct dma_mapping_ops *dma_ops;
16EXPORT_SYMBOL(dma_ops); 15EXPORT_SYMBOL(dma_ops);
@@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void)
114 * The order of these functions is important for 113 * The order of these functions is important for
115 * fall-back/fail-over reasons 114 * fall-back/fail-over reasons
116 */ 115 */
117#ifdef CONFIG_GART_IOMMU
118 gart_iommu_hole_init(); 116 gart_iommu_hole_init();
119#endif
120 117
121#ifdef CONFIG_CALGARY_IOMMU
122 detect_calgary(); 118 detect_calgary();
123#endif
124 119
125 detect_intel_iommu(); 120 detect_intel_iommu();
126 121
127 amd_iommu_detect(); 122 amd_iommu_detect();
128 123
129#ifdef CONFIG_SWIOTLB
130 pci_swiotlb_init(); 124 pci_swiotlb_init();
131#endif
132} 125}
133#endif 126#endif
134 127
@@ -184,9 +177,7 @@ static __init int iommu_setup(char *p)
184 swiotlb = 1; 177 swiotlb = 1;
185#endif 178#endif
186 179
187#ifdef CONFIG_GART_IOMMU
188 gart_parse_options(p); 180 gart_parse_options(p);
189#endif
190 181
191#ifdef CONFIG_CALGARY_IOMMU 182#ifdef CONFIG_CALGARY_IOMMU
192 if (!strncmp(p, "calgary", 7)) 183 if (!strncmp(p, "calgary", 7))
@@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent);
500 491
501static int __init pci_iommu_init(void) 492static int __init pci_iommu_init(void)
502{ 493{
503#ifdef CONFIG_CALGARY_IOMMU
504 calgary_iommu_init(); 494 calgary_iommu_init();
505#endif
506 495
507 intel_iommu_init(); 496 intel_iommu_init();
508 497
509 amd_iommu_init(); 498 amd_iommu_init();
510 499
511#ifdef CONFIG_GART_IOMMU
512 gart_iommu_init(); 500 gart_iommu_init();
513#endif
514 501
515 no_iommu_init(); 502 no_iommu_init();
516 return 0; 503 return 0;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c3fe78406d18..be60961f8695 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -32,6 +32,7 @@
32#include <asm/mtrr.h> 32#include <asm/mtrr.h>
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/iommu.h>
35#include <asm/gart.h> 36#include <asm/gart.h>
36#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
37#include <asm/swiotlb.h> 38#include <asm/swiotlb.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index aec43d56f49c..792b9179eff3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -7,7 +7,7 @@
7#include <linux/dma-mapping.h> 7#include <linux/dma-mapping.h>
8#include <linux/scatterlist.h> 8#include <linux/scatterlist.h>
9 9
10#include <asm/gart.h> 10#include <asm/iommu.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/dma.h> 12#include <asm/dma.h>
13 13
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82299cd1d04d..20df839b9c20 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -5,7 +5,7 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/dma-mapping.h> 6#include <linux/dma-mapping.h>
7 7
8#include <asm/gart.h> 8#include <asm/iommu.h>
9#include <asm/swiotlb.h> 9#include <asm/swiotlb.h>
10#include <asm/dma.h> 10#include <asm/dma.h>
11 11
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4d629c62f4f8..7fc4d5b0a6a0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,6 +15,7 @@ unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait); 15EXPORT_SYMBOL(idle_nomwait);
16 16
17struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
18static int force_mwait __cpuinitdata;
18 19
19int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 20int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
20{ 21{
@@ -199,6 +200,7 @@ static void poll_idle(void)
199 * 200 *
200 * idle=mwait overrides this decision and forces the usage of mwait. 201 * idle=mwait overrides this decision and forces the usage of mwait.
201 */ 202 */
203static int __cpuinitdata force_mwait;
202 204
203#define MWAIT_INFO 0x05 205#define MWAIT_INFO 0x05
204#define MWAIT_ECX_EXTENDED_INFO 0x01 206#define MWAIT_ECX_EXTENDED_INFO 0x01
@@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
326 328
327static int __init idle_setup(char *str) 329static int __init idle_setup(char *str)
328{ 330{
331 if (!str)
332 return -EINVAL;
333
329 if (!strcmp(str, "poll")) { 334 if (!strcmp(str, "poll")) {
330 printk("using polling idle threads.\n"); 335 printk("using polling idle threads.\n");
331 pm_idle = poll_idle; 336 pm_idle = poll_idle;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a8e53626ac9a..e8a8e1b99817 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
537struct task_struct * 537struct task_struct *
538__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 538__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539{ 539{
540 struct thread_struct *prev = &prev_p->thread, 540 struct thread_struct *prev = &prev_p->thread;
541 *next = &next_p->thread; 541 struct thread_struct *next = &next_p->thread;
542 int cpu = smp_processor_id(); 542 int cpu = smp_processor_id();
543 struct tss_struct *tss = &per_cpu(init_tss, cpu); 543 struct tss_struct *tss = &per_cpu(init_tss, cpu);
544 unsigned fsindex, gsindex; 544 unsigned fsindex, gsindex;
@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
586 586
587 /* 587 /*
588 * Switch FS and GS. 588 * Switch FS and GS.
589 *
590 * Segment register != 0 always requires a reload. Also
591 * reload when it has changed. When prev process used 64bit
592 * base always reload to avoid an information leak.
589 */ 593 */
590 { 594 if (unlikely(fsindex | next->fsindex | prev->fs)) {
591 /* segment register != 0 always requires a reload. 595 loadsegment(fs, next->fsindex);
592 also reload when it has changed. 596 /*
593 when prev process used 64bit base always reload 597 * Check if the user used a selector != 0; if yes
594 to avoid an information leak. */ 598 * clear 64bit base, since overloaded base is always
595 if (unlikely(fsindex | next->fsindex | prev->fs)) { 599 * mapped to the Null selector
596 loadsegment(fs, next->fsindex); 600 */
597 /* check if the user used a selector != 0 601 if (fsindex)
598 * if yes clear 64bit base, since overloaded base
599 * is always mapped to the Null selector
600 */
601 if (fsindex)
602 prev->fs = 0; 602 prev->fs = 0;
603 } 603 }
604 /* when next process has a 64bit base use it */ 604 /* when next process has a 64bit base use it */
605 if (next->fs) 605 if (next->fs)
606 wrmsrl(MSR_FS_BASE, next->fs); 606 wrmsrl(MSR_FS_BASE, next->fs);
607 prev->fsindex = fsindex; 607 prev->fsindex = fsindex;
608 608
609 if (unlikely(gsindex | next->gsindex | prev->gs)) { 609 if (unlikely(gsindex | next->gsindex | prev->gs)) {
610 load_gs_index(next->gsindex); 610 load_gs_index(next->gsindex);
611 if (gsindex) 611 if (gsindex)
612 prev->gs = 0; 612 prev->gs = 0;
613 }
614 if (next->gs)
615 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
616 prev->gsindex = gsindex;
617 } 613 }
614 if (next->gs)
615 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
616 prev->gsindex = gsindex;
618 617
619 /* Must be after DS reload */ 618 /* Must be after DS reload */
620 unlazy_fpu(prev_p); 619 unlazy_fpu(prev_p);
@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
627 write_pda(pcurrent, next_p); 626 write_pda(pcurrent, next_p);
628 627
629 write_pda(kernelstack, 628 write_pda(kernelstack,
630 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 629 (unsigned long)task_stack_page(next_p) +
630 THREAD_SIZE - PDA_STACKOFFSET);
631#ifdef CONFIG_CC_STACKPROTECTOR 631#ifdef CONFIG_CC_STACKPROTECTOR
632 write_pda(stack_canary, next_p->stack_canary); 632 write_pda(stack_canary, next_p->stack_canary);
633 /* 633 /*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 77040b6070e1..e37dccce85db 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1357#endif 1357#endif
1358} 1358}
1359 1359
1360#ifdef CONFIG_X86_32
1361
1362void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) 1360void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
1363{ 1361{
1364 struct siginfo info; 1362 struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
1377 force_sig_info(SIGTRAP, &info, tsk); 1375 force_sig_info(SIGTRAP, &info, tsk);
1378} 1376}
1379 1377
1380/* notification of system call entry/exit
1381 * - triggered by current->work.syscall_trace
1382 */
1383int do_syscall_trace(struct pt_regs *regs, int entryexit)
1384{
1385 int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
1386 /*
1387 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
1388 * interception
1389 */
1390 int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
1391 int ret = 0;
1392
1393 /* do the secure computing check first */
1394 if (!entryexit)
1395 secure_computing(regs->orig_ax);
1396
1397 if (unlikely(current->audit_context)) {
1398 if (entryexit)
1399 audit_syscall_exit(AUDITSC_RESULT(regs->ax),
1400 regs->ax);
1401 /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
1402 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
1403 * not used, entry.S will call us only on syscall exit, not
1404 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
1405 * calling send_sigtrap() on syscall entry.
1406 *
1407 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
1408 * is_singlestep is false, despite his name, so we will still do
1409 * the correct thing.
1410 */
1411 else if (is_singlestep)
1412 goto out;
1413 }
1414
1415 if (!(current->ptrace & PT_PTRACED))
1416 goto out;
1417
1418 /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
1419 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
1420 * here. We have to check this and return */
1421 if (is_sysemu && entryexit)
1422 return 0;
1423
1424 /* Fake a debug trap */
1425 if (is_singlestep)
1426 send_sigtrap(current, regs, 0);
1427
1428 if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
1429 goto out;
1430
1431 /* the 0x80 provides a way for the tracing parent to distinguish
1432 between a syscall stop and SIGTRAP delivery */
1433 /* Note that the debugger could change the result of test_thread_flag!*/
1434 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
1435
1436 /*
1437 * this isn't the same as continuing with a signal, but it will do
1438 * for normal use. strace only continues with a signal if the
1439 * stopping signal is not SIGTRAP. -brl
1440 */
1441 if (current->exit_code) {
1442 send_sig(current->exit_code, current, 1);
1443 current->exit_code = 0;
1444 }
1445 ret = is_sysemu;
1446out:
1447 if (unlikely(current->audit_context) && !entryexit)
1448 audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
1449 regs->bx, regs->cx, regs->dx, regs->si);
1450 if (ret == 0)
1451 return 0;
1452
1453 regs->orig_ax = -1; /* force skip of syscall restarting */
1454 if (unlikely(current->audit_context))
1455 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1456 return 1;
1457}
1458
1459#else /* CONFIG_X86_64 */
1460
1461static void syscall_trace(struct pt_regs *regs) 1378static void syscall_trace(struct pt_regs *regs)
1462{ 1379{
1380 if (!(current->ptrace & PT_PTRACED))
1381 return;
1463 1382
1464#if 0 1383#if 0
1465 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", 1384 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs)
1481 } 1400 }
1482} 1401}
1483 1402
1484asmlinkage void syscall_trace_enter(struct pt_regs *regs) 1403#ifdef CONFIG_X86_32
1404# define IS_IA32 1
1405#elif defined CONFIG_IA32_EMULATION
1406# define IS_IA32 test_thread_flag(TIF_IA32)
1407#else
1408# define IS_IA32 0
1409#endif
1410
1411/*
1412 * We must return the syscall number to actually look up in the table.
1413 * This can be -1L to skip running any syscall at all.
1414 */
1415asmregparm long syscall_trace_enter(struct pt_regs *regs)
1485{ 1416{
1417 long ret = 0;
1418
1419 /*
1420 * If we stepped into a sysenter/syscall insn, it trapped in
1421 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
1422 * If user-mode had set TF itself, then it's still clear from
1423 * do_debug() and we need to set it again to restore the user
1424 * state. If we entered on the slow path, TF was already set.
1425 */
1426 if (test_thread_flag(TIF_SINGLESTEP))
1427 regs->flags |= X86_EFLAGS_TF;
1428
1486 /* do the secure computing check first */ 1429 /* do the secure computing check first */
1487 secure_computing(regs->orig_ax); 1430 secure_computing(regs->orig_ax);
1488 1431
1489 if (test_thread_flag(TIF_SYSCALL_TRACE) 1432 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1490 && (current->ptrace & PT_PTRACED)) 1433 ret = -1L;
1434
1435 if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
1491 syscall_trace(regs); 1436 syscall_trace(regs);
1492 1437
1493 if (unlikely(current->audit_context)) { 1438 if (unlikely(current->audit_context)) {
1494 if (test_thread_flag(TIF_IA32)) { 1439 if (IS_IA32)
1495 audit_syscall_entry(AUDIT_ARCH_I386, 1440 audit_syscall_entry(AUDIT_ARCH_I386,
1496 regs->orig_ax, 1441 regs->orig_ax,
1497 regs->bx, regs->cx, 1442 regs->bx, regs->cx,
1498 regs->dx, regs->si); 1443 regs->dx, regs->si);
1499 } else { 1444#ifdef CONFIG_X86_64
1445 else
1500 audit_syscall_entry(AUDIT_ARCH_X86_64, 1446 audit_syscall_entry(AUDIT_ARCH_X86_64,
1501 regs->orig_ax, 1447 regs->orig_ax,
1502 regs->di, regs->si, 1448 regs->di, regs->si,
1503 regs->dx, regs->r10); 1449 regs->dx, regs->r10);
1504 } 1450#endif
1505 } 1451 }
1452
1453 return ret ?: regs->orig_ax;
1506} 1454}
1507 1455
1508asmlinkage void syscall_trace_leave(struct pt_regs *regs) 1456asmregparm void syscall_trace_leave(struct pt_regs *regs)
1509{ 1457{
1510 if (unlikely(current->audit_context)) 1458 if (unlikely(current->audit_context))
1511 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1459 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1512 1460
1513 if ((test_thread_flag(TIF_SYSCALL_TRACE) 1461 if (test_thread_flag(TIF_SYSCALL_TRACE))
1514 || test_thread_flag(TIF_SINGLESTEP))
1515 && (current->ptrace & PT_PTRACED))
1516 syscall_trace(regs); 1462 syscall_trace(regs);
1517}
1518 1463
1519#endif /* CONFIG_X86_32 */ 1464 /*
1465 * If TIF_SYSCALL_EMU is set, we only get here because of
1466 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1467 * We already reported this syscall instruction in
1468 * syscall_trace_enter(), so don't do any more now.
1469 */
1470 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1471 return;
1472
1473 /*
1474 * If we are single-stepping, synthesize a trap to follow the
1475 * system call instruction.
1476 */
1477 if (test_thread_flag(TIF_SINGLESTEP) &&
1478 (current->ptrace & PT_PTRACED))
1479 send_sigtrap(current, regs, 0);
1480}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f8a62160e151..9dcf39c02972 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
177 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), 177 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
178 }, 178 },
179 }, 179 },
180 { /* Handle problems with rebooting on Dell T5400's */
181 .callback = set_bios_reboot,
182 .ident = "Dell Precision T5400",
183 .matches = {
184 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
185 DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
186 },
187 },
180 { /* Handle problems with rebooting on HP laptops */ 188 { /* Handle problems with rebooting on HP laptops */
181 .callback = set_bios_reboot, 189 .callback = set_bios_reboot,
182 .ident = "HP Compaq Laptop", 190 .ident = "HP Compaq Laptop",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6121ffd46b9e..a50f9550cbec 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -57,12 +57,8 @@
57#include <linux/slab.h> 57#include <linux/slab.h>
58#include <linux/user.h> 58#include <linux/user.h>
59#include <linux/delay.h> 59#include <linux/delay.h>
60#include <linux/highmem.h>
61 60
62#include <linux/kallsyms.h> 61#include <linux/kallsyms.h>
63#include <linux/edd.h>
64#include <linux/iscsi_ibft.h>
65#include <linux/kexec.h>
66#include <linux/cpufreq.h> 62#include <linux/cpufreq.h>
67#include <linux/dma-mapping.h> 63#include <linux/dma-mapping.h>
68#include <linux/ctype.h> 64#include <linux/ctype.h>
@@ -96,7 +92,7 @@
96#include <asm/smp.h> 92#include <asm/smp.h>
97#include <asm/desc.h> 93#include <asm/desc.h>
98#include <asm/dma.h> 94#include <asm/dma.h>
99#include <asm/gart.h> 95#include <asm/iommu.h>
100#include <asm/mmu_context.h> 96#include <asm/mmu_context.h>
101#include <asm/proto.h> 97#include <asm/proto.h>
102 98
@@ -104,7 +100,6 @@
104#include <asm/paravirt.h> 100#include <asm/paravirt.h>
105 101
106#include <asm/percpu.h> 102#include <asm/percpu.h>
107#include <asm/sections.h>
108#include <asm/topology.h> 103#include <asm/topology.h>
109#include <asm/apicdef.h> 104#include <asm/apicdef.h>
110#ifdef CONFIG_X86_64 105#ifdef CONFIG_X86_64
@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg)
579early_param("elfcorehdr", setup_elfcorehdr); 574early_param("elfcorehdr", setup_elfcorehdr);
580#endif 575#endif
581 576
577static struct x86_quirks default_x86_quirks __initdata;
578
579struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
580
582/* 581/*
583 * Determine if we were loaded by an EFI loader. If so, then we have also been 582 * Determine if we were loaded by an EFI loader. If so, then we have also been
584 * passed the efi memmap, systab, etc., so we should use these data structures 583 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -826,7 +825,10 @@ void __init setup_arch(char **cmdline_p)
826 vmi_init(); 825 vmi_init();
827#endif 826#endif
828 827
828 paravirt_pagetable_setup_start(swapper_pg_dir);
829 paging_init(); 829 paging_init();
830 paravirt_pagetable_setup_done(swapper_pg_dir);
831 paravirt_post_allocator_init();
830 832
831#ifdef CONFIG_X86_64 833#ifdef CONFIG_X86_64
832 map_vsyscall(); 834 map_vsyscall();
@@ -856,14 +858,6 @@ void __init setup_arch(char **cmdline_p)
856 init_cpu_to_node(); 858 init_cpu_to_node();
857#endif 859#endif
858 860
859#ifdef CONFIG_X86_NUMAQ
860 /*
861 * need to check online nodes num, call it
862 * here before time_init/tsc_init
863 */
864 numaq_tsc_disable();
865#endif
866
867 init_apic_mappings(); 861 init_apic_mappings();
868 ioapic_init_mappings(); 862 ioapic_init_mappings();
869 863
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index cac68430d31f..f7745f94c006 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -227,8 +227,8 @@ static void __init setup_node_to_cpumask_map(void)
227 /* allocate the map */ 227 /* allocate the map */
228 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 228 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
229 229
230 Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", 230 pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
231 map, nr_node_ids); 231 map, nr_node_ids);
232 232
233 /* node_to_cpumask() will now work */ 233 /* node_to_cpumask() will now work */
234 node_to_cpumask_map = map; 234 node_to_cpumask_map = map;
@@ -248,7 +248,7 @@ void __cpuinit numa_set_node(int cpu, int node)
248 per_cpu(x86_cpu_to_node_map, cpu) = node; 248 per_cpu(x86_cpu_to_node_map, cpu) = node;
249 249
250 else 250 else
251 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); 251 pr_debug("Setting node for non-present cpu %d\n", cpu);
252} 252}
253 253
254void __cpuinit numa_clear_node(int cpu) 254void __cpuinit numa_clear_node(int cpu)
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d92373630963..07faaa5109cb 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
212 212
213badframe: 213badframe:
214 if (show_unhandled_signals && printk_ratelimit()) { 214 if (show_unhandled_signals && printk_ratelimit()) {
215 printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" 215 printk("%s%s[%d] bad frame in sigreturn frame:"
216 "%p ip:%lx sp:%lx oeax:%lx", 216 "%p ip:%lx sp:%lx oeax:%lx",
217 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, 217 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
218 current->comm, task_pid_nr(current), frame, regs->ip, 218 current->comm, task_pid_nr(current), frame, regs->ip,
@@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs)
657void 657void
658do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 658do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
659{ 659{
660 /* Pending single-step? */
661 if (thread_info_flags & _TIF_SINGLESTEP) {
662 regs->flags |= X86_EFLAGS_TF;
663 clear_thread_flag(TIF_SINGLESTEP);
664 }
665
666 /* deal with pending signal delivery */ 660 /* deal with pending signal delivery */
667 if (thread_info_flags & _TIF_SIGPENDING) 661 if (thread_info_flags & _TIF_SIGPENDING)
668 do_signal(regs); 662 do_signal(regs);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index e53b267662e7..bf87684474f1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs)
487void do_notify_resume(struct pt_regs *regs, void *unused, 487void do_notify_resume(struct pt_regs *regs, void *unused,
488 __u32 thread_info_flags) 488 __u32 thread_info_flags)
489{ 489{
490 /* Pending single-step? */
491 if (thread_info_flags & _TIF_SINGLESTEP) {
492 regs->flags |= X86_EFLAGS_TF;
493 clear_thread_flag(TIF_SINGLESTEP);
494 }
495
496#ifdef CONFIG_X86_MCE 490#ifdef CONFIG_X86_MCE
497 /* notify userspace of pending MCEs */ 491 /* notify userspace of pending MCEs */
498 if (thread_info_flags & _TIF_MCE_NOTIFY) 492 if (thread_info_flags & _TIF_MCE_NOTIFY)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a4f2d8f06e48..52eb1484a48a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -217,7 +217,7 @@ static void __cpuinit smp_callin(void)
217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
218 phys_id, cpuid); 218 phys_id, cpuid);
219 } 219 }
220 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); 220 pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
221 221
222 /* 222 /*
223 * STARTUP IPIs are fragile beasts as they might sometimes 223 * STARTUP IPIs are fragile beasts as they might sometimes
@@ -252,7 +252,7 @@ static void __cpuinit smp_callin(void)
252 * boards) 252 * boards)
253 */ 253 */
254 254
255 Dprintk("CALLIN, before setup_local_APIC().\n"); 255 pr_debug("CALLIN, before setup_local_APIC().\n");
256 smp_callin_clear_local_apic(); 256 smp_callin_clear_local_apic();
257 setup_local_APIC(); 257 setup_local_APIC();
258 end_local_APIC_setup(); 258 end_local_APIC_setup();
@@ -267,7 +267,7 @@ static void __cpuinit smp_callin(void)
267 local_irq_enable(); 267 local_irq_enable();
268 calibrate_delay(); 268 calibrate_delay();
269 local_irq_disable(); 269 local_irq_disable();
270 Dprintk("Stack at about %p\n", &cpuid); 270 pr_debug("Stack at about %p\n", &cpuid);
271 271
272 /* 272 /*
273 * Save our processor parameters 273 * Save our processor parameters
@@ -514,7 +514,7 @@ static void impress_friends(void)
514 /* 514 /*
515 * Allow the user to impress friends. 515 * Allow the user to impress friends.
516 */ 516 */
517 Dprintk("Before bogomips.\n"); 517 pr_debug("Before bogomips.\n");
518 for_each_possible_cpu(cpu) 518 for_each_possible_cpu(cpu)
519 if (cpu_isset(cpu, cpu_callout_map)) 519 if (cpu_isset(cpu, cpu_callout_map))
520 bogosum += cpu_data(cpu).loops_per_jiffy; 520 bogosum += cpu_data(cpu).loops_per_jiffy;
@@ -524,7 +524,7 @@ static void impress_friends(void)
524 bogosum/(500000/HZ), 524 bogosum/(500000/HZ),
525 (bogosum/(5000/HZ))%100); 525 (bogosum/(5000/HZ))%100);
526 526
527 Dprintk("Before bogocount - setting activated=1.\n"); 527 pr_debug("Before bogocount - setting activated=1.\n");
528} 528}
529 529
530static inline void __inquire_remote_apic(int apicid) 530static inline void __inquire_remote_apic(int apicid)
@@ -583,7 +583,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
583 /* Kick the second */ 583 /* Kick the second */
584 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); 584 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
585 585
586 Dprintk("Waiting for send to finish...\n"); 586 pr_debug("Waiting for send to finish...\n");
587 send_status = safe_apic_wait_icr_idle(); 587 send_status = safe_apic_wait_icr_idle();
588 588
589 /* 589 /*
@@ -594,7 +594,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
594 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 594 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
595 apic_write(APIC_ESR, 0); 595 apic_write(APIC_ESR, 0);
596 accept_status = (apic_read(APIC_ESR) & 0xEF); 596 accept_status = (apic_read(APIC_ESR) & 0xEF);
597 Dprintk("NMI sent.\n"); 597 pr_debug("NMI sent.\n");
598 598
599 if (send_status) 599 if (send_status)
600 printk(KERN_ERR "APIC never delivered???\n"); 600 printk(KERN_ERR "APIC never delivered???\n");
@@ -629,7 +629,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
629 apic_read(APIC_ESR); 629 apic_read(APIC_ESR);
630 } 630 }
631 631
632 Dprintk("Asserting INIT.\n"); 632 pr_debug("Asserting INIT.\n");
633 633
634 /* 634 /*
635 * Turn INIT on target chip 635 * Turn INIT on target chip
@@ -640,18 +640,18 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
640 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, 640 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
641 phys_apicid); 641 phys_apicid);
642 642
643 Dprintk("Waiting for send to finish...\n"); 643 pr_debug("Waiting for send to finish...\n");
644 send_status = safe_apic_wait_icr_idle(); 644 send_status = safe_apic_wait_icr_idle();
645 645
646 mdelay(10); 646 mdelay(10);
647 647
648 Dprintk("Deasserting INIT.\n"); 648 pr_debug("Deasserting INIT.\n");
649 649
650 /* Target chip */ 650 /* Target chip */
651 /* Send IPI */ 651 /* Send IPI */
652 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); 652 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
653 653
654 Dprintk("Waiting for send to finish...\n"); 654 pr_debug("Waiting for send to finish...\n");
655 send_status = safe_apic_wait_icr_idle(); 655 send_status = safe_apic_wait_icr_idle();
656 656
657 mb(); 657 mb();
@@ -678,14 +678,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
678 /* 678 /*
679 * Run STARTUP IPI loop. 679 * Run STARTUP IPI loop.
680 */ 680 */
681 Dprintk("#startup loops: %d.\n", num_starts); 681 pr_debug("#startup loops: %d.\n", num_starts);
682 682
683 for (j = 1; j <= num_starts; j++) { 683 for (j = 1; j <= num_starts; j++) {
684 Dprintk("Sending STARTUP #%d.\n", j); 684 pr_debug("Sending STARTUP #%d.\n", j);
685 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 685 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
686 apic_write(APIC_ESR, 0); 686 apic_write(APIC_ESR, 0);
687 apic_read(APIC_ESR); 687 apic_read(APIC_ESR);
688 Dprintk("After apic_write.\n"); 688 pr_debug("After apic_write.\n");
689 689
690 /* 690 /*
691 * STARTUP IPI 691 * STARTUP IPI
@@ -702,9 +702,9 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
702 */ 702 */
703 udelay(300); 703 udelay(300);
704 704
705 Dprintk("Startup point 1.\n"); 705 pr_debug("Startup point 1.\n");
706 706
707 Dprintk("Waiting for send to finish...\n"); 707 pr_debug("Waiting for send to finish...\n");
708 send_status = safe_apic_wait_icr_idle(); 708 send_status = safe_apic_wait_icr_idle();
709 709
710 /* 710 /*
@@ -717,7 +717,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
717 if (send_status || accept_status) 717 if (send_status || accept_status)
718 break; 718 break;
719 } 719 }
720 Dprintk("After Startup.\n"); 720 pr_debug("After Startup.\n");
721 721
722 if (send_status) 722 if (send_status)
723 printk(KERN_ERR "APIC never delivered???\n"); 723 printk(KERN_ERR "APIC never delivered???\n");
@@ -750,7 +750,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
750 * 750 *
751 * Must be called after the _cpu_pda pointer table is initialized. 751 * Must be called after the _cpu_pda pointer table is initialized.
752 */ 752 */
753static int __cpuinit get_local_pda(int cpu) 753int __cpuinit get_local_pda(int cpu)
754{ 754{
755 struct x8664_pda *oldpda, *newpda; 755 struct x8664_pda *oldpda, *newpda;
756 unsigned long size = sizeof(struct x8664_pda); 756 unsigned long size = sizeof(struct x8664_pda);
@@ -868,7 +868,7 @@ do_rest:
868 868
869 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 869 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
870 870
871 Dprintk("Setting warm reset code and vector.\n"); 871 pr_debug("Setting warm reset code and vector.\n");
872 872
873 store_NMI_vector(&nmi_high, &nmi_low); 873 store_NMI_vector(&nmi_high, &nmi_low);
874 874
@@ -889,9 +889,9 @@ do_rest:
889 /* 889 /*
890 * allow APs to start initializing. 890 * allow APs to start initializing.
891 */ 891 */
892 Dprintk("Before Callout %d.\n", cpu); 892 pr_debug("Before Callout %d.\n", cpu);
893 cpu_set(cpu, cpu_callout_map); 893 cpu_set(cpu, cpu_callout_map);
894 Dprintk("After Callout %d.\n", cpu); 894 pr_debug("After Callout %d.\n", cpu);
895 895
896 /* 896 /*
897 * Wait 5s total for a response 897 * Wait 5s total for a response
@@ -904,10 +904,10 @@ do_rest:
904 904
905 if (cpu_isset(cpu, cpu_callin_map)) { 905 if (cpu_isset(cpu, cpu_callin_map)) {
906 /* number CPUs logically, starting from 1 (BSP is 0) */ 906 /* number CPUs logically, starting from 1 (BSP is 0) */
907 Dprintk("OK.\n"); 907 pr_debug("OK.\n");
908 printk(KERN_INFO "CPU%d: ", cpu); 908 printk(KERN_INFO "CPU%d: ", cpu);
909 print_cpu_info(&cpu_data(cpu)); 909 print_cpu_info(&cpu_data(cpu));
910 Dprintk("CPU has booted.\n"); 910 pr_debug("CPU has booted.\n");
911 } else { 911 } else {
912 boot_error = 1; 912 boot_error = 1;
913 if (*((volatile unsigned char *)trampoline_base) 913 if (*((volatile unsigned char *)trampoline_base)
@@ -952,7 +952,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
952 952
953 WARN_ON(irqs_disabled()); 953 WARN_ON(irqs_disabled());
954 954
955 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); 955 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
956 956
957 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 957 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
958 !physid_isset(apicid, phys_cpu_present_map)) { 958 !physid_isset(apicid, phys_cpu_present_map)) {
@@ -964,7 +964,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
964 * Already booted CPU? 964 * Already booted CPU?
965 */ 965 */
966 if (cpu_isset(cpu, cpu_callin_map)) { 966 if (cpu_isset(cpu, cpu_callin_map)) {
967 Dprintk("do_boot_cpu %d Already started\n", cpu); 967 pr_debug("do_boot_cpu %d Already started\n", cpu);
968 return -ENOSYS; 968 return -ENOSYS;
969 } 969 }
970 970
@@ -991,7 +991,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
991 err = do_boot_cpu(apicid, cpu); 991 err = do_boot_cpu(apicid, cpu);
992#endif 992#endif
993 if (err) { 993 if (err) {
994 Dprintk("do_boot_cpu failed %d\n", err); 994 pr_debug("do_boot_cpu failed %d\n", err);
995 return -EIO; 995 return -EIO;
996 } 996 }
997 997
@@ -1202,7 +1202,7 @@ void __init native_smp_prepare_boot_cpu(void)
1202 1202
1203void __init native_smp_cpus_done(unsigned int max_cpus) 1203void __init native_smp_cpus_done(unsigned int max_cpus)
1204{ 1204{
1205 Dprintk("Boot done.\n"); 1205 pr_debug("Boot done.\n");
1206 1206
1207 impress_friends(); 1207 impress_friends();
1208 smp_checks(); 1208 smp_checks();
@@ -1300,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu)
1300 cpu_clear(cpu, cpu_callout_map); 1300 cpu_clear(cpu, cpu_callout_map);
1301 cpu_clear(cpu, cpu_callin_map); 1301 cpu_clear(cpu, cpu_callin_map);
1302 /* was set by cpu_init() */ 1302 /* was set by cpu_init() */
1303 clear_bit(cpu, (unsigned long *)&cpu_initialized); 1303 cpu_clear(cpu, cpu_initialized);
1304 numa_remove_cpu(cpu); 1304 numa_remove_cpu(cpu);
1305} 1305}
1306 1306
@@ -1379,7 +1379,8 @@ static int __init parse_maxcpus(char *arg)
1379{ 1379{
1380 extern unsigned int maxcpus; 1380 extern unsigned int maxcpus;
1381 1381
1382 maxcpus = simple_strtoul(arg, NULL, 0); 1382 if (arg)
1383 maxcpus = simple_strtoul(arg, NULL, 0);
1383 return 0; 1384 return 0;
1384} 1385}
1385early_param("maxcpus", parse_maxcpus); 1386early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
deleted file mode 100644
index 8b137891791f..000000000000
--- a/arch/x86/kernel/smpcommon_32.c
+++ /dev/null
@@ -1 +0,0 @@
1
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 92c20fee6781..e8b9863ef8c4 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
105static int enable_single_step(struct task_struct *child) 105static int enable_single_step(struct task_struct *child)
106{ 106{
107 struct pt_regs *regs = task_pt_regs(child); 107 struct pt_regs *regs = task_pt_regs(child);
108 unsigned long oflags;
109
110 /*
111 * If we stepped into a sysenter/syscall insn, it trapped in
112 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
113 * If user-mode had set TF itself, then it's still clear from
114 * do_debug() and we need to set it again to restore the user
115 * state so we don't wrongly set TIF_FORCED_TF below.
116 * If enable_single_step() was used last and that is what
117 * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
118 * already set and our bookkeeping is fine.
119 */
120 if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
121 regs->flags |= X86_EFLAGS_TF;
108 122
109 /* 123 /*
110 * Always set TIF_SINGLESTEP - this guarantees that 124 * Always set TIF_SINGLESTEP - this guarantees that
@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child)
113 */ 127 */
114 set_tsk_thread_flag(child, TIF_SINGLESTEP); 128 set_tsk_thread_flag(child, TIF_SINGLESTEP);
115 129
116 /* 130 oflags = regs->flags;
117 * If TF was already set, don't do anything else
118 */
119 if (regs->flags & X86_EFLAGS_TF)
120 return 0;
121 131
122 /* Set TF on the kernel stack.. */ 132 /* Set TF on the kernel stack.. */
123 regs->flags |= X86_EFLAGS_TF; 133 regs->flags |= X86_EFLAGS_TF;
@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child)
126 * ..but if TF is changed by the instruction we will trace, 136 * ..but if TF is changed by the instruction we will trace,
127 * don't mark it as being "us" that set it, so that we 137 * don't mark it as being "us" that set it, so that we
128 * won't clear it by hand later. 138 * won't clear it by hand later.
139 *
140 * Note that if we don't actually execute the popf because
141 * of a signal arriving right now or suchlike, we will lose
142 * track of the fact that it really was "us" that set it.
129 */ 143 */
130 if (is_setting_trap_flag(child, regs)) 144 if (is_setting_trap_flag(child, regs)) {
145 clear_tsk_thread_flag(child, TIF_FORCED_TF);
131 return 0; 146 return 0;
147 }
148
149 /*
150 * If TF was already set, check whether it was us who set it.
151 * If not, we should never attempt a block step.
152 */
153 if (oflags & X86_EFLAGS_TF)
154 return test_tsk_thread_flag(child, TIF_FORCED_TF);
132 155
133 set_tsk_thread_flag(child, TIF_FORCED_TF); 156 set_tsk_thread_flag(child, TIF_FORCED_TF);
134 157
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 059ca6ee59b4..ffe3c664afc0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -129,6 +129,7 @@ void __init hpet_time_init(void)
129 */ 129 */
130void __init time_init(void) 130void __init time_init(void)
131{ 131{
132 pre_time_init_hook();
132 tsc_init(); 133 tsc_init();
133 late_time_init = choose_time_init(); 134 late_time_init = choose_time_init();
134} 135}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8a768973c4f0..03df8e45e5a1 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -58,6 +58,7 @@
58#include <asm/nmi.h> 58#include <asm/nmi.h>
59#include <asm/smp.h> 59#include <asm/smp.h>
60#include <asm/io.h> 60#include <asm/io.h>
61#include <asm/traps.h>
61 62
62#include "mach_traps.h" 63#include "mach_traps.h"
63 64
@@ -77,26 +78,6 @@ char ignore_fpu_irq;
77gate_desc idt_table[256] 78gate_desc idt_table[256]
78 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 79 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
79 80
80asmlinkage void divide_error(void);
81asmlinkage void debug(void);
82asmlinkage void nmi(void);
83asmlinkage void int3(void);
84asmlinkage void overflow(void);
85asmlinkage void bounds(void);
86asmlinkage void invalid_op(void);
87asmlinkage void device_not_available(void);
88asmlinkage void coprocessor_segment_overrun(void);
89asmlinkage void invalid_TSS(void);
90asmlinkage void segment_not_present(void);
91asmlinkage void stack_segment(void);
92asmlinkage void general_protection(void);
93asmlinkage void page_fault(void);
94asmlinkage void coprocessor_error(void);
95asmlinkage void simd_coprocessor_error(void);
96asmlinkage void alignment_check(void);
97asmlinkage void spurious_interrupt_bug(void);
98asmlinkage void machine_check(void);
99
100int panic_on_unrecovered_nmi; 81int panic_on_unrecovered_nmi;
101int kstack_depth_to_print = 24; 82int kstack_depth_to_print = 24;
102static unsigned int code_bytes = 64; 83static unsigned int code_bytes = 64;
@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = {
256 237
257static void 238static void
258show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 239show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
259 unsigned long *stack, unsigned long bp, char *log_lvl) 240 unsigned long *stack, unsigned long bp, char *log_lvl)
260{ 241{
261 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 242 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
262 printk("%s =======================\n", log_lvl); 243 printk("%s =======================\n", log_lvl);
@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip)
383 return ud2 == 0x0b0f; 364 return ud2 == 0x0b0f;
384} 365}
385 366
367static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
368static int die_owner = -1;
369static unsigned int die_nest_count;
370
371unsigned __kprobes long oops_begin(void)
372{
373 unsigned long flags;
374
375 oops_enter();
376
377 if (die_owner != raw_smp_processor_id()) {
378 console_verbose();
379 raw_local_irq_save(flags);
380 __raw_spin_lock(&die_lock);
381 die_owner = smp_processor_id();
382 die_nest_count = 0;
383 bust_spinlocks(1);
384 } else {
385 raw_local_irq_save(flags);
386 }
387 die_nest_count++;
388 return flags;
389}
390
391void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
392{
393 bust_spinlocks(0);
394 die_owner = -1;
395 add_taint(TAINT_DIE);
396 __raw_spin_unlock(&die_lock);
397 raw_local_irq_restore(flags);
398
399 if (!regs)
400 return;
401
402 if (kexec_should_crash(current))
403 crash_kexec(regs);
404
405 if (in_interrupt())
406 panic("Fatal exception in interrupt");
407
408 if (panic_on_oops)
409 panic("Fatal exception");
410
411 oops_exit();
412 do_exit(signr);
413}
414
386int __kprobes __die(const char *str, struct pt_regs *regs, long err) 415int __kprobes __die(const char *str, struct pt_regs *regs, long err)
387{ 416{
388 unsigned short ss; 417 unsigned short ss;
@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
423 */ 452 */
424void die(const char *str, struct pt_regs *regs, long err) 453void die(const char *str, struct pt_regs *regs, long err)
425{ 454{
426 static struct { 455 unsigned long flags = oops_begin();
427 raw_spinlock_t lock;
428 u32 lock_owner;
429 int lock_owner_depth;
430 } die = {
431 .lock = __RAW_SPIN_LOCK_UNLOCKED,
432 .lock_owner = -1,
433 .lock_owner_depth = 0
434 };
435 unsigned long flags;
436
437 oops_enter();
438
439 if (die.lock_owner != raw_smp_processor_id()) {
440 console_verbose();
441 raw_local_irq_save(flags);
442 __raw_spin_lock(&die.lock);
443 die.lock_owner = smp_processor_id();
444 die.lock_owner_depth = 0;
445 bust_spinlocks(1);
446 } else {
447 raw_local_irq_save(flags);
448 }
449 456
450 if (++die.lock_owner_depth < 3) { 457 if (die_nest_count < 3) {
451 report_bug(regs->ip, regs); 458 report_bug(regs->ip, regs);
452 459
453 if (__die(str, regs, err)) 460 if (__die(str, regs, err))
@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err)
456 printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); 463 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
457 } 464 }
458 465
459 bust_spinlocks(0); 466 oops_end(flags, regs, SIGSEGV);
460 die.lock_owner = -1;
461 add_taint(TAINT_DIE);
462 __raw_spin_unlock(&die.lock);
463 raw_local_irq_restore(flags);
464
465 if (!regs)
466 return;
467
468 if (kexec_should_crash(current))
469 crash_kexec(regs);
470
471 if (in_interrupt())
472 panic("Fatal exception in interrupt");
473
474 if (panic_on_oops)
475 panic("Fatal exception");
476
477 oops_exit();
478 do_exit(SIGSEGV);
479} 467}
480 468
481static inline void 469static inline void
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 2696a6837782..3f18d73f420c 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -51,30 +51,10 @@
51#include <asm/pgalloc.h> 51#include <asm/pgalloc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
53#include <asm/pda.h> 53#include <asm/pda.h>
54#include <asm/traps.h>
54 55
55#include <mach_traps.h> 56#include <mach_traps.h>
56 57
57asmlinkage void divide_error(void);
58asmlinkage void debug(void);
59asmlinkage void nmi(void);
60asmlinkage void int3(void);
61asmlinkage void overflow(void);
62asmlinkage void bounds(void);
63asmlinkage void invalid_op(void);
64asmlinkage void device_not_available(void);
65asmlinkage void double_fault(void);
66asmlinkage void coprocessor_segment_overrun(void);
67asmlinkage void invalid_TSS(void);
68asmlinkage void segment_not_present(void);
69asmlinkage void stack_segment(void);
70asmlinkage void general_protection(void);
71asmlinkage void page_fault(void);
72asmlinkage void coprocessor_error(void);
73asmlinkage void simd_coprocessor_error(void);
74asmlinkage void alignment_check(void);
75asmlinkage void spurious_interrupt_bug(void);
76asmlinkage void machine_check(void);
77
78int panic_on_unrecovered_nmi; 58int panic_on_unrecovered_nmi;
79int kstack_depth_to_print = 12; 59int kstack_depth_to_print = 12;
80static unsigned int code_bytes = 64; 60static unsigned int code_bytes = 64;
@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = {
355 .address = print_trace_address, 335 .address = print_trace_address,
356}; 336};
357 337
358void show_trace(struct task_struct *task, struct pt_regs *regs, 338static void
359 unsigned long *stack, unsigned long bp) 339show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
340 unsigned long *stack, unsigned long bp, char *log_lvl)
360{ 341{
361 printk("\nCall Trace:\n"); 342 printk("\nCall Trace:\n");
362 dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); 343 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
363 printk("\n"); 344 printk("\n");
364} 345}
365 346
347void show_trace(struct task_struct *task, struct pt_regs *regs,
348 unsigned long *stack, unsigned long bp)
349{
350 show_trace_log_lvl(task, regs, stack, bp, "");
351}
352
366static void 353static void
367_show_stack(struct task_struct *task, struct pt_regs *regs, 354show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
368 unsigned long *sp, unsigned long bp) 355 unsigned long *sp, unsigned long bp, char *log_lvl)
369{ 356{
370 unsigned long *stack; 357 unsigned long *stack;
371 int i; 358 int i;
@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs,
399 printk(" %016lx", *stack++); 386 printk(" %016lx", *stack++);
400 touch_nmi_watchdog(); 387 touch_nmi_watchdog();
401 } 388 }
402 show_trace(task, regs, sp, bp); 389 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
403} 390}
404 391
405void show_stack(struct task_struct *task, unsigned long *sp) 392void show_stack(struct task_struct *task, unsigned long *sp)
406{ 393{
407 _show_stack(task, NULL, sp, 0); 394 show_stack_log_lvl(task, NULL, sp, 0, "");
408} 395}
409 396
410/* 397/*
@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs)
454 u8 *ip; 441 u8 *ip;
455 442
456 printk("Stack: "); 443 printk("Stack: ");
457 _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); 444 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
445 regs->bp, "");
458 printk("\n"); 446 printk("\n");
459 447
460 printk(KERN_EMERG "Code: "); 448 printk(KERN_EMERG "Code: ");
@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void)
518} 506}
519 507
520void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) 508void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
521{ 509{
522 die_owner = -1; 510 die_owner = -1;
523 bust_spinlocks(0); 511 bust_spinlocks(0);
524 die_nest_count--; 512 die_nest_count--;
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index e94bdb6add1d..41e01b145c48 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -73,7 +73,7 @@ int is_visws_box(void)
73 return visws_board_type >= 0; 73 return visws_board_type >= 0;
74} 74}
75 75
76static int __init visws_time_init_quirk(void) 76static int __init visws_time_init(void)
77{ 77{
78 printk(KERN_INFO "Starting Cobalt Timer system clock\n"); 78 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
79 79
@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void)
93 return 0; 93 return 0;
94} 94}
95 95
96static int __init visws_pre_intr_init_quirk(void) 96static int __init visws_pre_intr_init(void)
97{ 97{
98 init_VISWS_APIC_irqs(); 98 init_VISWS_APIC_irqs();
99 99
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size);
114 114
115long long mem_size __initdata = 0; 115long long mem_size __initdata = 0;
116 116
117static char * __init visws_memory_setup_quirk(void) 117static char * __init visws_memory_setup(void)
118{ 118{
119 long long gfx_mem_size = 8 * MB; 119 long long gfx_mem_size = 8 * MB;
120 120
@@ -176,7 +176,7 @@ static void visws_machine_power_off(void)
176 outl(PIIX_SPECIAL_STOP, 0xCFC); 176 outl(PIIX_SPECIAL_STOP, 0xCFC);
177} 177}
178 178
179static int __init visws_get_smp_config_quirk(unsigned int early) 179static int __init visws_get_smp_config(unsigned int early)
180{ 180{
181 /* 181 /*
182 * Prevent MP-table parsing by the generic code: 182 * Prevent MP-table parsing by the generic code:
@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus;
192 * No problem for Linux. 192 * No problem for Linux.
193 */ 193 */
194 194
195static void __init MP_processor_info (struct mpc_config_processor *m) 195static void __init MP_processor_info(struct mpc_config_processor *m)
196{ 196{
197 int ver, logical_apicid; 197 int ver, logical_apicid;
198 physid_mask_t apic_cpus; 198 physid_mask_t apic_cpus;
@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
232 apic_version[m->mpc_apicid] = ver; 232 apic_version[m->mpc_apicid] = ver;
233} 233}
234 234
235int __init visws_find_smp_config_quirk(unsigned int reserve) 235static int __init visws_find_smp_config(unsigned int reserve)
236{ 236{
237 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); 237 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
238 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); 238 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve)
258 return 1; 258 return 1;
259} 259}
260 260
261extern int visws_trap_init_quirk(void); 261static int visws_trap_init(void);
262
263static struct x86_quirks visws_x86_quirks __initdata = {
264 .arch_time_init = visws_time_init,
265 .arch_pre_intr_init = visws_pre_intr_init,
266 .arch_memory_setup = visws_memory_setup,
267 .arch_intr_init = NULL,
268 .arch_trap_init = visws_trap_init,
269 .mach_get_smp_config = visws_get_smp_config,
270 .mach_find_smp_config = visws_find_smp_config,
271};
262 272
263void __init visws_early_detect(void) 273void __init visws_early_detect(void)
264{ 274{
@@ -272,16 +282,10 @@ void __init visws_early_detect(void)
272 282
273 /* 283 /*
274 * Install special quirks for timer, interrupt and memory setup: 284 * Install special quirks for timer, interrupt and memory setup:
275 */
276 arch_time_init_quirk = visws_time_init_quirk;
277 arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
278 arch_memory_setup_quirk = visws_memory_setup_quirk;
279
280 /*
281 * Fall back to generic behavior for traps: 285 * Fall back to generic behavior for traps:
286 * Override generic MP-table parsing:
282 */ 287 */
283 arch_intr_init_quirk = NULL; 288 x86_quirks = &visws_x86_quirks;
284 arch_trap_init_quirk = visws_trap_init_quirk;
285 289
286 /* 290 /*
287 * Install reboot quirks: 291 * Install reboot quirks:
@@ -294,12 +298,6 @@ void __init visws_early_detect(void)
294 */ 298 */
295 no_broadcast = 0; 299 no_broadcast = 0;
296 300
297 /*
298 * Override generic MP-table parsing:
299 */
300 mach_get_smp_config_quirk = visws_get_smp_config_quirk;
301 mach_find_smp_config_quirk = visws_find_smp_config_quirk;
302
303#ifdef CONFIG_X86_IO_APIC 301#ifdef CONFIG_X86_IO_APIC
304 /* 302 /*
305 * Turn off IO-APIC detection and initialization: 303 * Turn off IO-APIC detection and initialization:
@@ -426,7 +424,7 @@ static __init void cobalt_init(void)
426 co_apic_read(CO_APIC_ID)); 424 co_apic_read(CO_APIC_ID));
427} 425}
428 426
429int __init visws_trap_init_quirk(void) 427static int __init visws_trap_init(void)
430{ 428{
431 lithium_init(); 429 lithium_init();
432 cobalt_init(); 430 cobalt_init();