aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c13
-rw-r--r--arch/x86/kernel/alternative.c60
-rw-r--r--arch/x86/kernel/apb_timer.c784
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c340
-rw-r--r--arch/x86/kernel/apic/nmi.c14
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c620
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c208
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c41
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c8
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c10
-rw-r--r--arch/x86/kernel/e820.c349
-rw-r--r--arch/x86/kernel/head32.c10
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/i8259.c94
-rw-r--r--arch/x86/kernel/irqinit.c36
-rw-r--r--arch/x86/kernel/kprobes.c609
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c7
-rw-r--r--arch/x86/kernel/mrst.c216
-rw-r--r--arch/x86/kernel/olpc.c10
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c15
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/sys_i386_32.c185
-rw-r--r--arch/x86/kernel/sys_x86_64.c12
-rw-r--r--arch/x86/kernel/syscall_table_32.S4
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/visws_quirks.c27
-rw-r--r--arch/x86/kernel/vmi_32.c35
-rw-r--r--arch/x86/kernel/vmiclock_32.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c8
54 files changed, 2752 insertions, 1142 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d87f09bc5a52..4c58352209e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88 88
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o
90 91
91obj-$(CONFIG_K8_NB) += k8.o 92obj-$(CONFIG_K8_NB) += k8.o
92obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f95703098f8d..a54d714545ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
35#include <linux/ioport.h> 35#include <linux/ioport.h>
36#include <linux/pci.h> 36#include <linux/pci.h>
37 37
38#include <asm/pci_x86.h>
38#include <asm/pgtable.h> 39#include <asm/pgtable.h>
39#include <asm/io_apic.h> 40#include <asm/io_apic.h>
40#include <asm/apic.h> 41#include <asm/apic.h>
@@ -447,6 +448,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
447int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) 448int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
448{ 449{
449 *irq = gsi; 450 *irq = gsi;
451
452#ifdef CONFIG_X86_IO_APIC
453 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
454 setup_IO_APIC_irq_extra(gsi);
455#endif
456
450 return 0; 457 return 0;
451} 458}
452 459
@@ -474,7 +481,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
474 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); 481 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
475 } 482 }
476#endif 483#endif
477 acpi_gsi_to_irq(plat_gsi, &irq); 484 irq = plat_gsi;
485
478 return irq; 486 return irq;
479} 487}
480 488
@@ -1617,6 +1625,9 @@ int __init acpi_boot_init(void)
1617 1625
1618 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); 1626 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
1619 1627
1628 if (!acpi_noirq)
1629 x86_init.pci.init = pci_acpi_init;
1630
1620 return 0; 1631 return 0;
1621} 1632}
1622 1633
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e6ea0342c8f8..3a4bf35c179b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
7#include <linux/mm.h> 7#include <linux/mm.h>
8#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
9#include <linux/memory.h> 9#include <linux/memory.h>
10#include <linux/stop_machine.h>
10#include <asm/alternative.h> 11#include <asm/alternative.h>
11#include <asm/sections.h> 12#include <asm/sections.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
@@ -572,3 +573,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
572 local_irq_restore(flags); 573 local_irq_restore(flags);
573 return addr; 574 return addr;
574} 575}
576
577/*
578 * Cross-modifying kernel text with stop_machine().
579 * This code originally comes from immediate value.
580 */
581static atomic_t stop_machine_first;
582static int wrote_text;
583
584struct text_poke_params {
585 void *addr;
586 const void *opcode;
587 size_t len;
588};
589
590static int __kprobes stop_machine_text_poke(void *data)
591{
592 struct text_poke_params *tpp = data;
593
594 if (atomic_dec_and_test(&stop_machine_first)) {
595 text_poke(tpp->addr, tpp->opcode, tpp->len);
596 smp_wmb(); /* Make sure other cpus see that this has run */
597 wrote_text = 1;
598 } else {
599 while (!wrote_text)
600 cpu_relax();
601 smp_mb(); /* Load wrote_text before following execution */
602 }
603
604 flush_icache_range((unsigned long)tpp->addr,
605 (unsigned long)tpp->addr + tpp->len);
606 return 0;
607}
608
609/**
610 * text_poke_smp - Update instructions on a live kernel on SMP
611 * @addr: address to modify
612 * @opcode: source of the copy
613 * @len: length to copy
614 *
615 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
616 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
617 * should be allowed, since stop_machine() does _not_ protect code against
618 * NMI and MCE.
619 *
620 * Note: Must be called under get_online_cpus() and text_mutex.
621 */
622void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
623{
624 struct text_poke_params tpp;
625
626 tpp.addr = addr;
627 tpp.opcode = opcode;
628 tpp.len = len;
629 atomic_set(&stop_machine_first, 1);
630 wrote_text = 0;
631 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
632 return addr;
633}
634
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
new file mode 100644
index 000000000000..4b7099526d2c
--- /dev/null
+++ b/arch/x86/kernel/apb_timer.c
@@ -0,0 +1,784 @@
1/*
2 * apb_timer.c: Driver for Langwell APB timers
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 * Langwell is the south complex of Intel Moorestown MID platform. There are
14 * eight external timers in total that can be used by the operating system.
15 * The timer information, such as frequency and addresses, is provided to the
16 * OS via SFI tables.
17 * Timer interrupts are routed via FW/HW emulated IOAPIC independently via
18 * individual redirection table entries (RTE).
19 * Unlike HPET, there is no master counter, therefore one of the timers are
20 * used as clocksource. The overall allocation looks like:
21 * - timer 0 - NR_CPUs for per cpu timer
22 * - one timer for clocksource
23 * - one timer for watchdog driver.
24 * It is also worth notice that APB timer does not support true one-shot mode,
25 * free-running mode will be used here to emulate one-shot mode.
26 * APB timer can also be used as broadcast timer along with per cpu local APIC
27 * timer, but by default APB timer has higher rating than local APIC timers.
28 */
29
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/delay.h>
33#include <linux/errno.h>
34#include <linux/init.h>
35#include <linux/sysdev.h>
36#include <linux/pm.h>
37#include <linux/pci.h>
38#include <linux/sfi.h>
39#include <linux/interrupt.h>
40#include <linux/cpu.h>
41#include <linux/irq.h>
42
43#include <asm/fixmap.h>
44#include <asm/apb_timer.h>
45
46#define APBT_MASK CLOCKSOURCE_MASK(32)
47#define APBT_SHIFT 22
48#define APBT_CLOCKEVENT_RATING 150
49#define APBT_CLOCKSOURCE_RATING 250
50#define APBT_MIN_DELTA_USEC 200
51
52#define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt)
53#define APBT_CLOCKEVENT0_NUM (0)
54#define APBT_CLOCKEVENT1_NUM (1)
55#define APBT_CLOCKSOURCE_NUM (2)
56
57static unsigned long apbt_address;
58static int apb_timer_block_enabled;
59static void __iomem *apbt_virt_address;
60static int phy_cs_timer_id;
61
62/*
63 * Common DW APB timer info
64 */
65static uint64_t apbt_freq;
66
67static void apbt_set_mode(enum clock_event_mode mode,
68 struct clock_event_device *evt);
69static int apbt_next_event(unsigned long delta,
70 struct clock_event_device *evt);
71static cycle_t apbt_read_clocksource(struct clocksource *cs);
72static void apbt_restart_clocksource(struct clocksource *cs);
73
74struct apbt_dev {
75 struct clock_event_device evt;
76 unsigned int num;
77 int cpu;
78 unsigned int irq;
79 unsigned int tick;
80 unsigned int count;
81 unsigned int flags;
82 char name[10];
83};
84
85int disable_apbt_percpu __cpuinitdata;
86
87static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
88
89#ifdef CONFIG_SMP
90static unsigned int apbt_num_timers_used;
91static struct apbt_dev *apbt_devs;
92#endif
93
94static inline unsigned long apbt_readl_reg(unsigned long a)
95{
96 return readl(apbt_virt_address + a);
97}
98
99static inline void apbt_writel_reg(unsigned long d, unsigned long a)
100{
101 writel(d, apbt_virt_address + a);
102}
103
104static inline unsigned long apbt_readl(int n, unsigned long a)
105{
106 return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE);
107}
108
109static inline void apbt_writel(int n, unsigned long d, unsigned long a)
110{
111 writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE);
112}
113
114static inline void apbt_set_mapping(void)
115{
116 struct sfi_timer_table_entry *mtmr;
117
118 if (apbt_virt_address) {
119 pr_debug("APBT base already mapped\n");
120 return;
121 }
122 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
123 if (mtmr == NULL) {
124 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
125 APBT_CLOCKEVENT0_NUM);
126 return;
127 }
128 apbt_address = (unsigned long)mtmr->phys_addr;
129 if (!apbt_address) {
130 printk(KERN_WARNING "No timer base from SFI, use default\n");
131 apbt_address = APBT_DEFAULT_BASE;
132 }
133 apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE);
134 if (apbt_virt_address) {
135 pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\
136 (void *)apbt_address, (void *)apbt_virt_address);
137 } else {
138 pr_debug("Failed mapping APBT phy address at %p\n",\
139 (void *)apbt_address);
140 goto panic_noapbt;
141 }
142 apbt_freq = mtmr->freq_hz / USEC_PER_SEC;
143 sfi_free_mtmr(mtmr);
144
145 /* Now figure out the physical timer id for clocksource device */
146 mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
147 if (mtmr == NULL)
148 goto panic_noapbt;
149
150 /* Now figure out the physical timer id */
151 phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff)
152 / APBTMRS_REG_SIZE;
153 pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id);
154 return;
155
156panic_noapbt:
157 panic("Failed to setup APB system timer\n");
158
159}
160
161static inline void apbt_clear_mapping(void)
162{
163 iounmap(apbt_virt_address);
164 apbt_virt_address = NULL;
165}
166
167/*
168 * APBT timer interrupt enable / disable
169 */
170static inline int is_apbt_capable(void)
171{
172 return apbt_virt_address ? 1 : 0;
173}
174
175static struct clocksource clocksource_apbt = {
176 .name = "apbt",
177 .rating = APBT_CLOCKSOURCE_RATING,
178 .read = apbt_read_clocksource,
179 .mask = APBT_MASK,
180 .shift = APBT_SHIFT,
181 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
182 .resume = apbt_restart_clocksource,
183};
184
185/* boot APB clock event device */
186static struct clock_event_device apbt_clockevent = {
187 .name = "apbt0",
188 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
189 .set_mode = apbt_set_mode,
190 .set_next_event = apbt_next_event,
191 .shift = APBT_SHIFT,
192 .irq = 0,
193 .rating = APBT_CLOCKEVENT_RATING,
194};
195
196/*
197 * if user does not want to use per CPU apb timer, just give it a lower rating
198 * than local apic timer and skip the late per cpu timer init.
199 */
200static inline int __init setup_x86_mrst_timer(char *arg)
201{
202 if (!arg)
203 return -EINVAL;
204
205 if (strcmp("apbt_only", arg) == 0)
206 disable_apbt_percpu = 0;
207 else if (strcmp("lapic_and_apbt", arg) == 0)
208 disable_apbt_percpu = 1;
209 else {
210 pr_warning("X86 MRST timer option %s not recognised"
211 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
212 arg);
213 return -EINVAL;
214 }
215 return 0;
216}
217__setup("x86_mrst_timer=", setup_x86_mrst_timer);
218
219/*
220 * start count down from 0xffff_ffff. this is done by toggling the enable bit
221 * then load initial load count to ~0.
222 */
223static void apbt_start_counter(int n)
224{
225 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
226
227 ctrl &= ~APBTMR_CONTROL_ENABLE;
228 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
229 apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT);
230 /* enable, mask interrupt */
231 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
232 ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT);
233 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
234 /* read it once to get cached counter value initialized */
235 apbt_read_clocksource(&clocksource_apbt);
236}
237
238static irqreturn_t apbt_interrupt_handler(int irq, void *data)
239{
240 struct apbt_dev *dev = (struct apbt_dev *)data;
241 struct clock_event_device *aevt = &dev->evt;
242
243 if (!aevt->event_handler) {
244 printk(KERN_INFO "Spurious APBT timer interrupt on %d\n",
245 dev->num);
246 return IRQ_NONE;
247 }
248 aevt->event_handler(aevt);
249 return IRQ_HANDLED;
250}
251
252static void apbt_restart_clocksource(struct clocksource *cs)
253{
254 apbt_start_counter(phy_cs_timer_id);
255}
256
257/* Setup IRQ routing via IOAPIC */
258#ifdef CONFIG_SMP
259static void apbt_setup_irq(struct apbt_dev *adev)
260{
261 struct irq_chip *chip;
262 struct irq_desc *desc;
263
264 /* timer0 irq has been setup early */
265 if (adev->irq == 0)
266 return;
267 desc = irq_to_desc(adev->irq);
268 chip = get_irq_chip(adev->irq);
269 disable_irq(adev->irq);
270 desc->status |= IRQ_MOVE_PCNTXT;
271 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
272 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
273 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
274 enable_irq(adev->irq);
275 if (system_state == SYSTEM_BOOTING)
276 if (request_irq(adev->irq, apbt_interrupt_handler,
277 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
278 adev->name, adev)) {
279 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
280 adev->num);
281 }
282}
283#endif
284
285static void apbt_enable_int(int n)
286{
287 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
288 /* clear pending intr */
289 apbt_readl(n, APBTMR_N_EOI);
290 ctrl &= ~APBTMR_CONTROL_INT;
291 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
292}
293
294static void apbt_disable_int(int n)
295{
296 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
297
298 ctrl |= APBTMR_CONTROL_INT;
299 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
300}
301
302
303static int __init apbt_clockevent_register(void)
304{
305 struct sfi_timer_table_entry *mtmr;
306 struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev);
307
308 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
309 if (mtmr == NULL) {
310 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
311 APBT_CLOCKEVENT0_NUM);
312 return -ENODEV;
313 }
314
315 /*
316 * We need to calculate the scaled math multiplication factor for
317 * nanosecond to apbt tick conversion.
318 * mult = (nsec/cycle)*2^APBT_SHIFT
319 */
320 apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz
321 , NSEC_PER_SEC, APBT_SHIFT);
322
323 /* Calculate the min / max delta */
324 apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
325 &apbt_clockevent);
326 apbt_clockevent.min_delta_ns = clockevent_delta2ns(
327 APBT_MIN_DELTA_USEC*apbt_freq,
328 &apbt_clockevent);
329 /*
330 * Start apbt with the boot cpu mask and make it
331 * global if not used for per cpu timer.
332 */
333 apbt_clockevent.cpumask = cpumask_of(smp_processor_id());
334 adev->num = smp_processor_id();
335 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
336
337 if (disable_apbt_percpu) {
338 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
339 global_clock_event = &adev->evt;
340 printk(KERN_DEBUG "%s clockevent registered as global\n",
341 global_clock_event->name);
342 }
343
344 if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler,
345 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
346 apbt_clockevent.name, adev)) {
347 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
348 apbt_clockevent.irq);
349 }
350
351 clockevents_register_device(&adev->evt);
352 /* Start APBT 0 interrupts */
353 apbt_enable_int(APBT_CLOCKEVENT0_NUM);
354
355 sfi_free_mtmr(mtmr);
356 return 0;
357}
358
359#ifdef CONFIG_SMP
360/* Should be called with per cpu */
361void apbt_setup_secondary_clock(void)
362{
363 struct apbt_dev *adev;
364 struct clock_event_device *aevt;
365 int cpu;
366
367 /* Don't register boot CPU clockevent */
368 cpu = smp_processor_id();
369 if (cpu == boot_cpu_id)
370 return;
371 /*
372 * We need to calculate the scaled math multiplication factor for
373 * nanosecond to apbt tick conversion.
374 * mult = (nsec/cycle)*2^APBT_SHIFT
375 */
376 printk(KERN_INFO "Init per CPU clockevent %d\n", cpu);
377 adev = &per_cpu(cpu_apbt_dev, cpu);
378 aevt = &adev->evt;
379
380 memcpy(aevt, &apbt_clockevent, sizeof(*aevt));
381 aevt->cpumask = cpumask_of(cpu);
382 aevt->name = adev->name;
383 aevt->mode = CLOCK_EVT_MODE_UNUSED;
384
385 printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n",
386 cpu, aevt->name, *(u32 *)aevt->cpumask);
387
388 apbt_setup_irq(adev);
389
390 clockevents_register_device(aevt);
391
392 apbt_enable_int(cpu);
393
394 return;
395}
396
397/*
398 * this notify handler process CPU hotplug events. in case of S0i3, nonboot
399 * cpus are disabled/enabled frequently, for performance reasons, we keep the
400 * per cpu timer irq registered so that we do need to do free_irq/request_irq.
401 *
402 * TODO: it might be more reliable to directly disable percpu clockevent device
403 * without the notifier chain. currently, cpu 0 may get interrupts from other
404 * cpu timers during the offline process due to the ordering of notification.
405 * the extra interrupt is harmless.
406 */
407static int apbt_cpuhp_notify(struct notifier_block *n,
408 unsigned long action, void *hcpu)
409{
410 unsigned long cpu = (unsigned long)hcpu;
411 struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
412
413 switch (action & 0xf) {
414 case CPU_DEAD:
415 apbt_disable_int(cpu);
416 if (system_state == SYSTEM_RUNNING)
417 pr_debug("skipping APBT CPU %lu offline\n", cpu);
418 else if (adev) {
419 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
420 free_irq(adev->irq, adev);
421 }
422 break;
423 default:
424 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action);
425 }
426 return NOTIFY_OK;
427}
428
429static __init int apbt_late_init(void)
430{
431 if (disable_apbt_percpu)
432 return 0;
433 /* This notifier should be called after workqueue is ready */
434 hotcpu_notifier(apbt_cpuhp_notify, -20);
435 return 0;
436}
437fs_initcall(apbt_late_init);
438#else
439
440void apbt_setup_secondary_clock(void) {}
441
442#endif /* CONFIG_SMP */
443
444static void apbt_set_mode(enum clock_event_mode mode,
445 struct clock_event_device *evt)
446{
447 unsigned long ctrl;
448 uint64_t delta;
449 int timer_num;
450 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
451
452 timer_num = adev->num;
453 pr_debug("%s CPU %d timer %d mode=%d\n",
454 __func__, first_cpu(*evt->cpumask), timer_num, mode);
455
456 switch (mode) {
457 case CLOCK_EVT_MODE_PERIODIC:
458 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult;
459 delta >>= apbt_clockevent.shift;
460 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
461 ctrl |= APBTMR_CONTROL_MODE_PERIODIC;
462 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
463 /*
464 * DW APB p. 46, have to disable timer before load counter,
465 * may cause sync problem.
466 */
467 ctrl &= ~APBTMR_CONTROL_ENABLE;
468 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
469 udelay(1);
470 pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ);
471 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
472 ctrl |= APBTMR_CONTROL_ENABLE;
473 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
474 break;
475 /* APB timer does not have one-shot mode, use free running mode */
476 case CLOCK_EVT_MODE_ONESHOT:
477 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
478 /*
479 * set free running mode, this mode will let timer reload max
480 * timeout which will give time (3min on 25MHz clock) to rearm
481 * the next event, therefore emulate the one-shot mode.
482 */
483 ctrl &= ~APBTMR_CONTROL_ENABLE;
484 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
485
486 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
487 /* write again to set free running mode */
488 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
489
490 /*
491 * DW APB p. 46, load counter with all 1s before starting free
492 * running mode.
493 */
494 apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT);
495 ctrl &= ~APBTMR_CONTROL_INT;
496 ctrl |= APBTMR_CONTROL_ENABLE;
497 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
498 break;
499
500 case CLOCK_EVT_MODE_UNUSED:
501 case CLOCK_EVT_MODE_SHUTDOWN:
502 apbt_disable_int(timer_num);
503 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
504 ctrl &= ~APBTMR_CONTROL_ENABLE;
505 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
506 break;
507
508 case CLOCK_EVT_MODE_RESUME:
509 apbt_enable_int(timer_num);
510 break;
511 }
512}
513
514static int apbt_next_event(unsigned long delta,
515 struct clock_event_device *evt)
516{
517 unsigned long ctrl;
518 int timer_num;
519
520 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
521
522 timer_num = adev->num;
523 /* Disable timer */
524 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
525 ctrl &= ~APBTMR_CONTROL_ENABLE;
526 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
527 /* write new count */
528 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
529 ctrl |= APBTMR_CONTROL_ENABLE;
530 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
531 return 0;
532}
533
534/*
535 * APB timer clock is not in sync with pclk on Langwell, which translates to
536 * unreliable read value caused by sampling error. the error does not add up
537 * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
538 * would go backwards. the following code is trying to prevent time traveling
539 * backwards. little bit paranoid.
540 */
541static cycle_t apbt_read_clocksource(struct clocksource *cs)
542{
543 unsigned long t0, t1, t2;
544 static unsigned long last_read;
545
546bad_count:
547 t1 = apbt_readl(phy_cs_timer_id,
548 APBTMR_N_CURRENT_VALUE);
549 t2 = apbt_readl(phy_cs_timer_id,
550 APBTMR_N_CURRENT_VALUE);
551 if (unlikely(t1 < t2)) {
552 pr_debug("APBT: read current count error %lx:%lx:%lx\n",
553 t1, t2, t2 - t1);
554 goto bad_count;
555 }
556 /*
557 * check against cached last read, makes sure time does not go back.
558 * it could be a normal rollover but we will do tripple check anyway
559 */
560 if (unlikely(t2 > last_read)) {
561 /* check if we have a normal rollover */
562 unsigned long raw_intr_status =
563 apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
564 /*
565 * cs timer interrupt is masked but raw intr bit is set if
566 * rollover occurs. then we read EOI reg to clear it.
567 */
568 if (raw_intr_status & (1 << phy_cs_timer_id)) {
569 apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
570 goto out;
571 }
572 pr_debug("APB CS going back %lx:%lx:%lx ",
573 t2, last_read, t2 - last_read);
574bad_count_x3:
575 pr_debug(KERN_INFO "tripple check enforced\n");
576 t0 = apbt_readl(phy_cs_timer_id,
577 APBTMR_N_CURRENT_VALUE);
578 udelay(1);
579 t1 = apbt_readl(phy_cs_timer_id,
580 APBTMR_N_CURRENT_VALUE);
581 udelay(1);
582 t2 = apbt_readl(phy_cs_timer_id,
583 APBTMR_N_CURRENT_VALUE);
584 if ((t2 > t1) || (t1 > t0)) {
585 printk(KERN_ERR "Error: APB CS tripple check failed\n");
586 goto bad_count_x3;
587 }
588 }
589out:
590 last_read = t2;
591 return (cycle_t)~t2;
592}
593
594static int apbt_clocksource_register(void)
595{
596 u64 start, now;
597 cycle_t t1;
598
599 /* Start the counter, use timer 2 as source, timer 0/1 for event */
600 apbt_start_counter(phy_cs_timer_id);
601
602 /* Verify whether apbt counter works */
603 t1 = apbt_read_clocksource(&clocksource_apbt);
604 rdtscll(start);
605
606 /*
607 * We don't know the TSC frequency yet, but waiting for
608 * 200000 TSC cycles is safe:
609 * 4 GHz == 50us
610 * 1 GHz == 200us
611 */
612 do {
613 rep_nop();
614 rdtscll(now);
615 } while ((now - start) < 200000UL);
616
617 /* APBT is the only always on clocksource, it has to work! */
618 if (t1 == apbt_read_clocksource(&clocksource_apbt))
619 panic("APBT counter not counting. APBT disabled\n");
620
621 /*
622 * initialize and register APBT clocksource
623 * convert that to ns/clock cycle
624 * mult = (ns/c) * 2^APBT_SHIFT
625 */
626 clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
627 (unsigned long) apbt_freq, APBT_SHIFT);
628 clocksource_register(&clocksource_apbt);
629
630 return 0;
631}
632
633/*
634 * Early setup the APBT timer, only use timer 0 for booting then switch to
635 * per CPU timer if possible.
636 * returns 1 if per cpu apbt is setup
637 * returns 0 if no per cpu apbt is chosen
638 * panic if set up failed, this is the only platform timer on Moorestown.
639 */
640void __init apbt_time_init(void)
641{
642#ifdef CONFIG_SMP
643 int i;
644 struct sfi_timer_table_entry *p_mtmr;
645 unsigned int percpu_timer;
646 struct apbt_dev *adev;
647#endif
648
649 if (apb_timer_block_enabled)
650 return;
651 apbt_set_mapping();
652 if (apbt_virt_address) {
653 pr_debug("Found APBT version 0x%lx\n",\
654 apbt_readl_reg(APBTMRS_COMP_VERSION));
655 } else
656 goto out_noapbt;
657 /*
658 * Read the frequency and check for a sane value, for ESL model
659 * we extend the possible clock range to allow time scaling.
660 */
661
662 if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
663 pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq);
664 goto out_noapbt;
665 }
666 if (apbt_clocksource_register()) {
667 pr_debug("APBT has failed to register clocksource\n");
668 goto out_noapbt;
669 }
670 if (!apbt_clockevent_register())
671 apb_timer_block_enabled = 1;
672 else {
673 pr_debug("APBT has failed to register clockevent\n");
674 goto out_noapbt;
675 }
676#ifdef CONFIG_SMP
677 /* kernel cmdline disable apb timer, so we will use lapic timers */
678 if (disable_apbt_percpu) {
679 printk(KERN_INFO "apbt: disabled per cpu timer\n");
680 return;
681 }
682 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
683 if (num_possible_cpus() <= sfi_mtimer_num) {
684 percpu_timer = 1;
685 apbt_num_timers_used = num_possible_cpus();
686 } else {
687 percpu_timer = 0;
688 apbt_num_timers_used = 1;
689 adev = &per_cpu(cpu_apbt_dev, 0);
690 adev->flags &= ~APBT_DEV_USED;
691 }
692 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
693
694 /* here we set up per CPU timer data structure */
695 apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used,
696 GFP_KERNEL);
697 if (!apbt_devs) {
698 printk(KERN_ERR "Failed to allocate APB timer devices\n");
699 return;
700 }
701 for (i = 0; i < apbt_num_timers_used; i++) {
702 adev = &per_cpu(cpu_apbt_dev, i);
703 adev->num = i;
704 adev->cpu = i;
705 p_mtmr = sfi_get_mtmr(i);
706 if (p_mtmr) {
707 adev->tick = p_mtmr->freq_hz;
708 adev->irq = p_mtmr->irq;
709 } else
710 printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
711 adev->count = 0;
712 sprintf(adev->name, "apbt%d", i);
713 }
714#endif
715
716 return;
717
718out_noapbt:
719 apbt_clear_mapping();
720 apb_timer_block_enabled = 0;
721 panic("failed to enable APB timer\n");
722}
723
724static inline void apbt_disable(int n)
725{
726 if (is_apbt_capable()) {
727 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
728 ctrl &= ~APBTMR_CONTROL_ENABLE;
729 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
730 }
731}
732
733/* called before apb_timer_enable, use early map */
734unsigned long apbt_quick_calibrate()
735{
736 int i, scale;
737 u64 old, new;
738 cycle_t t1, t2;
739 unsigned long khz = 0;
740 u32 loop, shift;
741
742 apbt_set_mapping();
743 apbt_start_counter(phy_cs_timer_id);
744
745 /* check if the timer can count down, otherwise return */
746 old = apbt_read_clocksource(&clocksource_apbt);
747 i = 10000;
748 while (--i) {
749 if (old != apbt_read_clocksource(&clocksource_apbt))
750 break;
751 }
752 if (!i)
753 goto failed;
754
755 /* count 16 ms */
756 loop = (apbt_freq * 1000) << 4;
757
758 /* restart the timer to ensure it won't get to 0 in the calibration */
759 apbt_start_counter(phy_cs_timer_id);
760
761 old = apbt_read_clocksource(&clocksource_apbt);
762 old += loop;
763
764 t1 = __native_read_tsc();
765
766 do {
767 new = apbt_read_clocksource(&clocksource_apbt);
768 } while (new < old);
769
770 t2 = __native_read_tsc();
771
772 shift = 5;
773 if (unlikely(loop >> shift == 0)) {
774 printk(KERN_INFO
775 "APBT TSC calibration failed, not enough resolution\n");
776 return 0;
777 }
778 scale = (int)div_u64((t2 - t1), loop >> shift);
779 khz = (scale * apbt_freq * 1000) >> shift;
780 printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
781 return khz;
782failed:
783 return 0;
784}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e29b2a77aa8..00187f1fcfb7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1390,7 +1390,7 @@ void __init enable_IR_x2apic(void)
1390 } 1390 }
1391 1391
1392 local_irq_save(flags); 1392 local_irq_save(flags);
1393 mask_8259A(); 1393 legacy_pic->mask_all();
1394 mask_IO_APIC_setup(ioapic_entries); 1394 mask_IO_APIC_setup(ioapic_entries);
1395 1395
1396 if (dmar_table_init_ret) 1396 if (dmar_table_init_ret)
@@ -1422,7 +1422,7 @@ void __init enable_IR_x2apic(void)
1422nox2apic: 1422nox2apic:
1423 if (!ret) /* IR enabling failed */ 1423 if (!ret) /* IR enabling failed */
1424 restore_IO_APIC_setup(ioapic_entries); 1424 restore_IO_APIC_setup(ioapic_entries);
1425 unmask_8259A(); 1425 legacy_pic->restore_mask();
1426 local_irq_restore(flags); 1426 local_irq_restore(flags);
1427 1427
1428out: 1428out:
@@ -2018,7 +2018,7 @@ static int lapic_resume(struct sys_device *dev)
2018 } 2018 }
2019 2019
2020 mask_IO_APIC_setup(ioapic_entries); 2020 mask_IO_APIC_setup(ioapic_entries);
2021 mask_8259A(); 2021 legacy_pic->mask_all();
2022 } 2022 }
2023 2023
2024 if (x2apic_mode) 2024 if (x2apic_mode)
@@ -2062,7 +2062,7 @@ static int lapic_resume(struct sys_device *dev)
2062 2062
2063 if (intr_remapping_enabled) { 2063 if (intr_remapping_enabled) {
2064 reenable_intr_remapping(x2apic_mode); 2064 reenable_intr_remapping(x2apic_mode);
2065 unmask_8259A(); 2065 legacy_pic->restore_mask();
2066 restore_IO_APIC_setup(ioapic_entries); 2066 restore_IO_APIC_setup(ioapic_entries);
2067 free_ioapic_entries(ioapic_entries); 2067 free_ioapic_entries(ioapic_entries);
2068 } 2068 }
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index e3c3d820c325..09d3b17ce0c2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -223,7 +223,7 @@ struct apic apic_flat = {
223}; 223};
224 224
225/* 225/*
226 * Physflat mode is used when there are more than 8 CPUs on a AMD system. 226 * Physflat mode is used when there are more than 8 CPUs on a system.
227 * We cannot use logical delivery in this case because the mask 227 * We cannot use logical delivery in this case because the mask
228 * overflows, so use physical mode. 228 * overflows, so use physical mode.
229 */ 229 */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6bdd2c7ead75..e4e0ddcb1546 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -73,8 +73,8 @@
73 */ 73 */
74int sis_apic_bug = -1; 74int sis_apic_bug = -1;
75 75
76static DEFINE_SPINLOCK(ioapic_lock); 76static DEFINE_RAW_SPINLOCK(ioapic_lock);
77static DEFINE_SPINLOCK(vector_lock); 77static DEFINE_RAW_SPINLOCK(vector_lock);
78 78
79/* 79/*
80 * # of IRQ routing registers 80 * # of IRQ routing registers
@@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
94/* # of MP IRQ source entries */ 94/* # of MP IRQ source entries */
95int mp_irq_entries; 95int mp_irq_entries;
96 96
97/* Number of legacy interrupts */
98static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
99/* GSI interrupts */ 97/* GSI interrupts */
100static int nr_irqs_gsi = NR_IRQS_LEGACY; 98static int nr_irqs_gsi = NR_IRQS_LEGACY;
101 99
@@ -140,33 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
140 138
141/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
142#ifdef CONFIG_SPARSE_IRQ 140#ifdef CONFIG_SPARSE_IRQ
143static struct irq_cfg irq_cfgx[] = { 141static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
144#else 142#else
145static struct irq_cfg irq_cfgx[NR_IRQS] = { 143static struct irq_cfg irq_cfgx[NR_IRQS];
146#endif 144#endif
147 [0] = { .vector = IRQ0_VECTOR, },
148 [1] = { .vector = IRQ1_VECTOR, },
149 [2] = { .vector = IRQ2_VECTOR, },
150 [3] = { .vector = IRQ3_VECTOR, },
151 [4] = { .vector = IRQ4_VECTOR, },
152 [5] = { .vector = IRQ5_VECTOR, },
153 [6] = { .vector = IRQ6_VECTOR, },
154 [7] = { .vector = IRQ7_VECTOR, },
155 [8] = { .vector = IRQ8_VECTOR, },
156 [9] = { .vector = IRQ9_VECTOR, },
157 [10] = { .vector = IRQ10_VECTOR, },
158 [11] = { .vector = IRQ11_VECTOR, },
159 [12] = { .vector = IRQ12_VECTOR, },
160 [13] = { .vector = IRQ13_VECTOR, },
161 [14] = { .vector = IRQ14_VECTOR, },
162 [15] = { .vector = IRQ15_VECTOR, },
163};
164
165void __init io_apic_disable_legacy(void)
166{
167 nr_legacy_irqs = 0;
168 nr_irqs_gsi = 0;
169}
170 145
171int __init arch_early_irq_init(void) 146int __init arch_early_irq_init(void)
172{ 147{
@@ -176,6 +151,11 @@ int __init arch_early_irq_init(void)
176 int node; 151 int node;
177 int i; 152 int i;
178 153
154 if (!legacy_pic->nr_legacy_irqs) {
155 nr_irqs_gsi = 0;
156 io_apic_irqs = ~0UL;
157 }
158
179 cfg = irq_cfgx; 159 cfg = irq_cfgx;
180 count = ARRAY_SIZE(irq_cfgx); 160 count = ARRAY_SIZE(irq_cfgx);
181 node= cpu_to_node(boot_cpu_id); 161 node= cpu_to_node(boot_cpu_id);
@@ -185,8 +165,14 @@ int __init arch_early_irq_init(void)
185 desc->chip_data = &cfg[i]; 165 desc->chip_data = &cfg[i];
186 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 166 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
187 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); 167 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
188 if (i < nr_legacy_irqs) 168 /*
189 cpumask_setall(cfg[i].domain); 169 * For legacy IRQ's, start with assigning irq0 to irq15 to
170 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
171 */
172 if (i < legacy_pic->nr_legacy_irqs) {
173 cfg[i].vector = IRQ0_VECTOR + i;
174 cpumask_set_cpu(0, cfg[i].domain);
175 }
190 } 176 }
191 177
192 return 0; 178 return 0;
@@ -406,7 +392,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
406 struct irq_pin_list *entry; 392 struct irq_pin_list *entry;
407 unsigned long flags; 393 unsigned long flags;
408 394
409 spin_lock_irqsave(&ioapic_lock, flags); 395 raw_spin_lock_irqsave(&ioapic_lock, flags);
410 for_each_irq_pin(entry, cfg->irq_2_pin) { 396 for_each_irq_pin(entry, cfg->irq_2_pin) {
411 unsigned int reg; 397 unsigned int reg;
412 int pin; 398 int pin;
@@ -415,11 +401,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
415 reg = io_apic_read(entry->apic, 0x10 + pin*2); 401 reg = io_apic_read(entry->apic, 0x10 + pin*2);
416 /* Is the remote IRR bit set? */ 402 /* Is the remote IRR bit set? */
417 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 403 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
418 spin_unlock_irqrestore(&ioapic_lock, flags); 404 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
419 return true; 405 return true;
420 } 406 }
421 } 407 }
422 spin_unlock_irqrestore(&ioapic_lock, flags); 408 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
423 409
424 return false; 410 return false;
425} 411}
@@ -433,10 +419,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
433{ 419{
434 union entry_union eu; 420 union entry_union eu;
435 unsigned long flags; 421 unsigned long flags;
436 spin_lock_irqsave(&ioapic_lock, flags); 422 raw_spin_lock_irqsave(&ioapic_lock, flags);
437 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 423 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
438 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 424 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
439 spin_unlock_irqrestore(&ioapic_lock, flags); 425 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
440 return eu.entry; 426 return eu.entry;
441} 427}
442 428
@@ -459,9 +445,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
459void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 445void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
460{ 446{
461 unsigned long flags; 447 unsigned long flags;
462 spin_lock_irqsave(&ioapic_lock, flags); 448 raw_spin_lock_irqsave(&ioapic_lock, flags);
463 __ioapic_write_entry(apic, pin, e); 449 __ioapic_write_entry(apic, pin, e);
464 spin_unlock_irqrestore(&ioapic_lock, flags); 450 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
465} 451}
466 452
467/* 453/*
@@ -474,10 +460,10 @@ static void ioapic_mask_entry(int apic, int pin)
474 unsigned long flags; 460 unsigned long flags;
475 union entry_union eu = { .entry.mask = 1 }; 461 union entry_union eu = { .entry.mask = 1 };
476 462
477 spin_lock_irqsave(&ioapic_lock, flags); 463 raw_spin_lock_irqsave(&ioapic_lock, flags);
478 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 464 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
479 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 465 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
480 spin_unlock_irqrestore(&ioapic_lock, flags); 466 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
481} 467}
482 468
483/* 469/*
@@ -604,9 +590,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
604 590
605 BUG_ON(!cfg); 591 BUG_ON(!cfg);
606 592
607 spin_lock_irqsave(&ioapic_lock, flags); 593 raw_spin_lock_irqsave(&ioapic_lock, flags);
608 __mask_IO_APIC_irq(cfg); 594 __mask_IO_APIC_irq(cfg);
609 spin_unlock_irqrestore(&ioapic_lock, flags); 595 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
610} 596}
611 597
612static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 598static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -614,9 +600,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
614 struct irq_cfg *cfg = desc->chip_data; 600 struct irq_cfg *cfg = desc->chip_data;
615 unsigned long flags; 601 unsigned long flags;
616 602
617 spin_lock_irqsave(&ioapic_lock, flags); 603 raw_spin_lock_irqsave(&ioapic_lock, flags);
618 __unmask_IO_APIC_irq(cfg); 604 __unmask_IO_APIC_irq(cfg);
619 spin_unlock_irqrestore(&ioapic_lock, flags); 605 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
620} 606}
621 607
622static void mask_IO_APIC_irq(unsigned int irq) 608static void mask_IO_APIC_irq(unsigned int irq)
@@ -865,7 +851,7 @@ static int __init find_isa_irq_apic(int irq, int type)
865 */ 851 */
866static int EISA_ELCR(unsigned int irq) 852static int EISA_ELCR(unsigned int irq)
867{ 853{
868 if (irq < nr_legacy_irqs) { 854 if (irq < legacy_pic->nr_legacy_irqs) {
869 unsigned int port = 0x4d0 + (irq >> 3); 855 unsigned int port = 0x4d0 + (irq >> 3);
870 return (inb(port) >> (irq & 7)) & 1; 856 return (inb(port) >> (irq & 7)) & 1;
871 } 857 }
@@ -1140,12 +1126,12 @@ void lock_vector_lock(void)
1140 /* Used to the online set of cpus does not change 1126 /* Used to the online set of cpus does not change
1141 * during assign_irq_vector. 1127 * during assign_irq_vector.
1142 */ 1128 */
1143 spin_lock(&vector_lock); 1129 raw_spin_lock(&vector_lock);
1144} 1130}
1145 1131
1146void unlock_vector_lock(void) 1132void unlock_vector_lock(void)
1147{ 1133{
1148 spin_unlock(&vector_lock); 1134 raw_spin_unlock(&vector_lock);
1149} 1135}
1150 1136
1151static int 1137static int
@@ -1162,7 +1148,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1162 * Also, we've got to be careful not to trash gate 1148 * Also, we've got to be careful not to trash gate
1163 * 0x80, because int 0x80 is hm, kind of importantish. ;) 1149 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1164 */ 1150 */
1165 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1151 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1152 static int current_offset = VECTOR_OFFSET_START % 8;
1166 unsigned int old_vector; 1153 unsigned int old_vector;
1167 int cpu, err; 1154 int cpu, err;
1168 cpumask_var_t tmp_mask; 1155 cpumask_var_t tmp_mask;
@@ -1198,7 +1185,7 @@ next:
1198 if (vector >= first_system_vector) { 1185 if (vector >= first_system_vector) {
1199 /* If out of vectors on large boxen, must share them. */ 1186 /* If out of vectors on large boxen, must share them. */
1200 offset = (offset + 1) % 8; 1187 offset = (offset + 1) % 8;
1201 vector = FIRST_DEVICE_VECTOR + offset; 1188 vector = FIRST_EXTERNAL_VECTOR + offset;
1202 } 1189 }
1203 if (unlikely(current_vector == vector)) 1190 if (unlikely(current_vector == vector))
1204 continue; 1191 continue;
@@ -1232,9 +1219,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1232 int err; 1219 int err;
1233 unsigned long flags; 1220 unsigned long flags;
1234 1221
1235 spin_lock_irqsave(&vector_lock, flags); 1222 raw_spin_lock_irqsave(&vector_lock, flags);
1236 err = __assign_irq_vector(irq, cfg, mask); 1223 err = __assign_irq_vector(irq, cfg, mask);
1237 spin_unlock_irqrestore(&vector_lock, flags); 1224 raw_spin_unlock_irqrestore(&vector_lock, flags);
1238 return err; 1225 return err;
1239} 1226}
1240 1227
@@ -1268,11 +1255,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1268void __setup_vector_irq(int cpu) 1255void __setup_vector_irq(int cpu)
1269{ 1256{
1270 /* Initialize vector_irq on a new cpu */ 1257 /* Initialize vector_irq on a new cpu */
1271 /* This function must be called with vector_lock held */
1272 int irq, vector; 1258 int irq, vector;
1273 struct irq_cfg *cfg; 1259 struct irq_cfg *cfg;
1274 struct irq_desc *desc; 1260 struct irq_desc *desc;
1275 1261
1262 /*
1263 * vector_lock will make sure that we don't run into irq vector
1264 * assignments that might be happening on another cpu in parallel,
1265 * while we setup our initial vector to irq mappings.
1266 */
1267 raw_spin_lock(&vector_lock);
1276 /* Mark the inuse vectors */ 1268 /* Mark the inuse vectors */
1277 for_each_irq_desc(irq, desc) { 1269 for_each_irq_desc(irq, desc) {
1278 cfg = desc->chip_data; 1270 cfg = desc->chip_data;
@@ -1291,6 +1283,7 @@ void __setup_vector_irq(int cpu)
1291 if (!cpumask_test_cpu(cpu, cfg->domain)) 1283 if (!cpumask_test_cpu(cpu, cfg->domain))
1292 per_cpu(vector_irq, cpu)[vector] = -1; 1284 per_cpu(vector_irq, cpu)[vector] = -1;
1293 } 1285 }
1286 raw_spin_unlock(&vector_lock);
1294} 1287}
1295 1288
1296static struct irq_chip ioapic_chip; 1289static struct irq_chip ioapic_chip;
@@ -1440,6 +1433,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1440 1433
1441 cfg = desc->chip_data; 1434 cfg = desc->chip_data;
1442 1435
1436 /*
1437 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1438 * controllers like 8259. Now that IO-APIC can handle this irq, update
1439 * the cfg->domain.
1440 */
1441 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1442 apic->vector_allocation_domain(0, cfg->domain);
1443
1443 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1444 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1444 return; 1445 return;
1445 1446
@@ -1461,8 +1462,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1461 } 1462 }
1462 1463
1463 ioapic_register_intr(irq, desc, trigger); 1464 ioapic_register_intr(irq, desc, trigger);
1464 if (irq < nr_legacy_irqs) 1465 if (irq < legacy_pic->nr_legacy_irqs)
1465 disable_8259A_irq(irq); 1466 legacy_pic->chip->mask(irq);
1466 1467
1467 ioapic_write_entry(apic_id, pin, entry); 1468 ioapic_write_entry(apic_id, pin, entry);
1468} 1469}
@@ -1473,7 +1474,7 @@ static struct {
1473 1474
1474static void __init setup_IO_APIC_irqs(void) 1475static void __init setup_IO_APIC_irqs(void)
1475{ 1476{
1476 int apic_id = 0, pin, idx, irq; 1477 int apic_id, pin, idx, irq;
1477 int notcon = 0; 1478 int notcon = 0;
1478 struct irq_desc *desc; 1479 struct irq_desc *desc;
1479 struct irq_cfg *cfg; 1480 struct irq_cfg *cfg;
@@ -1481,14 +1482,7 @@ static void __init setup_IO_APIC_irqs(void)
1481 1482
1482 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1483 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1483 1484
1484#ifdef CONFIG_ACPI 1485 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1485 if (!acpi_disabled && acpi_ioapic) {
1486 apic_id = mp_find_ioapic(0);
1487 if (apic_id < 0)
1488 apic_id = 0;
1489 }
1490#endif
1491
1492 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1486 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1493 idx = find_irq_entry(apic_id, pin, mp_INT); 1487 idx = find_irq_entry(apic_id, pin, mp_INT);
1494 if (idx == -1) { 1488 if (idx == -1) {
@@ -1510,6 +1504,9 @@ static void __init setup_IO_APIC_irqs(void)
1510 1504
1511 irq = pin_2_irq(idx, apic_id, pin); 1505 irq = pin_2_irq(idx, apic_id, pin);
1512 1506
1507 if ((apic_id > 0) && (irq > 16))
1508 continue;
1509
1513 /* 1510 /*
1514 * Skip the timer IRQ if there's a quirk handler 1511 * Skip the timer IRQ if there's a quirk handler
1515 * installed and if it returns 1: 1512 * installed and if it returns 1:
@@ -1539,6 +1536,56 @@ static void __init setup_IO_APIC_irqs(void)
1539} 1536}
1540 1537
1541/* 1538/*
1539 * for the gsit that is not in first ioapic
1540 * but could not use acpi_register_gsi()
1541 * like some special sci in IBM x3330
1542 */
1543void setup_IO_APIC_irq_extra(u32 gsi)
1544{
1545 int apic_id = 0, pin, idx, irq;
1546 int node = cpu_to_node(boot_cpu_id);
1547 struct irq_desc *desc;
1548 struct irq_cfg *cfg;
1549
1550 /*
1551 * Convert 'gsi' to 'ioapic.pin'.
1552 */
1553 apic_id = mp_find_ioapic(gsi);
1554 if (apic_id < 0)
1555 return;
1556
1557 pin = mp_find_ioapic_pin(apic_id, gsi);
1558 idx = find_irq_entry(apic_id, pin, mp_INT);
1559 if (idx == -1)
1560 return;
1561
1562 irq = pin_2_irq(idx, apic_id, pin);
1563#ifdef CONFIG_SPARSE_IRQ
1564 desc = irq_to_desc(irq);
1565 if (desc)
1566 return;
1567#endif
1568 desc = irq_to_desc_alloc_node(irq, node);
1569 if (!desc) {
1570 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1571 return;
1572 }
1573
1574 cfg = desc->chip_data;
1575 add_pin_to_irq_node(cfg, node, apic_id, pin);
1576
1577 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1578 pr_debug("Pin %d-%d already programmed\n",
1579 mp_ioapics[apic_id].apicid, pin);
1580 return;
1581 }
1582 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1583
1584 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1585 irq_trigger(idx), irq_polarity(idx));
1586}
1587
1588/*
1542 * Set up the timer pin, possibly with the 8259A-master behind. 1589 * Set up the timer pin, possibly with the 8259A-master behind.
1543 */ 1590 */
1544static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, 1591static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1601,14 +1648,14 @@ __apicdebuginit(void) print_IO_APIC(void)
1601 1648
1602 for (apic = 0; apic < nr_ioapics; apic++) { 1649 for (apic = 0; apic < nr_ioapics; apic++) {
1603 1650
1604 spin_lock_irqsave(&ioapic_lock, flags); 1651 raw_spin_lock_irqsave(&ioapic_lock, flags);
1605 reg_00.raw = io_apic_read(apic, 0); 1652 reg_00.raw = io_apic_read(apic, 0);
1606 reg_01.raw = io_apic_read(apic, 1); 1653 reg_01.raw = io_apic_read(apic, 1);
1607 if (reg_01.bits.version >= 0x10) 1654 if (reg_01.bits.version >= 0x10)
1608 reg_02.raw = io_apic_read(apic, 2); 1655 reg_02.raw = io_apic_read(apic, 2);
1609 if (reg_01.bits.version >= 0x20) 1656 if (reg_01.bits.version >= 0x20)
1610 reg_03.raw = io_apic_read(apic, 3); 1657 reg_03.raw = io_apic_read(apic, 3);
1611 spin_unlock_irqrestore(&ioapic_lock, flags); 1658 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1612 1659
1613 printk("\n"); 1660 printk("\n");
1614 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); 1661 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1825,12 +1872,12 @@ __apicdebuginit(void) print_PIC(void)
1825 unsigned int v; 1872 unsigned int v;
1826 unsigned long flags; 1873 unsigned long flags;
1827 1874
1828 if (!nr_legacy_irqs) 1875 if (!legacy_pic->nr_legacy_irqs)
1829 return; 1876 return;
1830 1877
1831 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1878 printk(KERN_DEBUG "\nprinting PIC contents\n");
1832 1879
1833 spin_lock_irqsave(&i8259A_lock, flags); 1880 raw_spin_lock_irqsave(&i8259A_lock, flags);
1834 1881
1835 v = inb(0xa1) << 8 | inb(0x21); 1882 v = inb(0xa1) << 8 | inb(0x21);
1836 printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 1883 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
@@ -1844,7 +1891,7 @@ __apicdebuginit(void) print_PIC(void)
1844 outb(0x0a,0xa0); 1891 outb(0x0a,0xa0);
1845 outb(0x0a,0x20); 1892 outb(0x0a,0x20);
1846 1893
1847 spin_unlock_irqrestore(&i8259A_lock, flags); 1894 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1848 1895
1849 printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 1896 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1850 1897
@@ -1903,13 +1950,13 @@ void __init enable_IO_APIC(void)
1903 * The number of IO-APIC IRQ registers (== #pins): 1950 * The number of IO-APIC IRQ registers (== #pins):
1904 */ 1951 */
1905 for (apic = 0; apic < nr_ioapics; apic++) { 1952 for (apic = 0; apic < nr_ioapics; apic++) {
1906 spin_lock_irqsave(&ioapic_lock, flags); 1953 raw_spin_lock_irqsave(&ioapic_lock, flags);
1907 reg_01.raw = io_apic_read(apic, 1); 1954 reg_01.raw = io_apic_read(apic, 1);
1908 spin_unlock_irqrestore(&ioapic_lock, flags); 1955 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1909 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1956 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1910 } 1957 }
1911 1958
1912 if (!nr_legacy_irqs) 1959 if (!legacy_pic->nr_legacy_irqs)
1913 return; 1960 return;
1914 1961
1915 for(apic = 0; apic < nr_ioapics; apic++) { 1962 for(apic = 0; apic < nr_ioapics; apic++) {
@@ -1966,7 +2013,7 @@ void disable_IO_APIC(void)
1966 */ 2013 */
1967 clear_IO_APIC(); 2014 clear_IO_APIC();
1968 2015
1969 if (!nr_legacy_irqs) 2016 if (!legacy_pic->nr_legacy_irqs)
1970 return; 2017 return;
1971 2018
1972 /* 2019 /*
@@ -2045,9 +2092,9 @@ void __init setup_ioapic_ids_from_mpc(void)
2045 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 2092 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2046 2093
2047 /* Read the register 0 value */ 2094 /* Read the register 0 value */
2048 spin_lock_irqsave(&ioapic_lock, flags); 2095 raw_spin_lock_irqsave(&ioapic_lock, flags);
2049 reg_00.raw = io_apic_read(apic_id, 0); 2096 reg_00.raw = io_apic_read(apic_id, 0);
2050 spin_unlock_irqrestore(&ioapic_lock, flags); 2097 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2051 2098
2052 old_id = mp_ioapics[apic_id].apicid; 2099 old_id = mp_ioapics[apic_id].apicid;
2053 2100
@@ -2106,16 +2153,16 @@ void __init setup_ioapic_ids_from_mpc(void)
2106 mp_ioapics[apic_id].apicid); 2153 mp_ioapics[apic_id].apicid);
2107 2154
2108 reg_00.bits.ID = mp_ioapics[apic_id].apicid; 2155 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2109 spin_lock_irqsave(&ioapic_lock, flags); 2156 raw_spin_lock_irqsave(&ioapic_lock, flags);
2110 io_apic_write(apic_id, 0, reg_00.raw); 2157 io_apic_write(apic_id, 0, reg_00.raw);
2111 spin_unlock_irqrestore(&ioapic_lock, flags); 2158 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2112 2159
2113 /* 2160 /*
2114 * Sanity check 2161 * Sanity check
2115 */ 2162 */
2116 spin_lock_irqsave(&ioapic_lock, flags); 2163 raw_spin_lock_irqsave(&ioapic_lock, flags);
2117 reg_00.raw = io_apic_read(apic_id, 0); 2164 reg_00.raw = io_apic_read(apic_id, 0);
2118 spin_unlock_irqrestore(&ioapic_lock, flags); 2165 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2119 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) 2166 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2120 printk("could not set ID!\n"); 2167 printk("could not set ID!\n");
2121 else 2168 else
@@ -2198,15 +2245,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2198 unsigned long flags; 2245 unsigned long flags;
2199 struct irq_cfg *cfg; 2246 struct irq_cfg *cfg;
2200 2247
2201 spin_lock_irqsave(&ioapic_lock, flags); 2248 raw_spin_lock_irqsave(&ioapic_lock, flags);
2202 if (irq < nr_legacy_irqs) { 2249 if (irq < legacy_pic->nr_legacy_irqs) {
2203 disable_8259A_irq(irq); 2250 legacy_pic->chip->mask(irq);
2204 if (i8259A_irq_pending(irq)) 2251 if (legacy_pic->irq_pending(irq))
2205 was_pending = 1; 2252 was_pending = 1;
2206 } 2253 }
2207 cfg = irq_cfg(irq); 2254 cfg = irq_cfg(irq);
2208 __unmask_IO_APIC_irq(cfg); 2255 __unmask_IO_APIC_irq(cfg);
2209 spin_unlock_irqrestore(&ioapic_lock, flags); 2256 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2210 2257
2211 return was_pending; 2258 return was_pending;
2212} 2259}
@@ -2217,9 +2264,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
2217 struct irq_cfg *cfg = irq_cfg(irq); 2264 struct irq_cfg *cfg = irq_cfg(irq);
2218 unsigned long flags; 2265 unsigned long flags;
2219 2266
2220 spin_lock_irqsave(&vector_lock, flags); 2267 raw_spin_lock_irqsave(&vector_lock, flags);
2221 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2268 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2222 spin_unlock_irqrestore(&vector_lock, flags); 2269 raw_spin_unlock_irqrestore(&vector_lock, flags);
2223 2270
2224 return 1; 2271 return 1;
2225} 2272}
@@ -2312,14 +2359,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2312 irq = desc->irq; 2359 irq = desc->irq;
2313 cfg = desc->chip_data; 2360 cfg = desc->chip_data;
2314 2361
2315 spin_lock_irqsave(&ioapic_lock, flags); 2362 raw_spin_lock_irqsave(&ioapic_lock, flags);
2316 ret = set_desc_affinity(desc, mask, &dest); 2363 ret = set_desc_affinity(desc, mask, &dest);
2317 if (!ret) { 2364 if (!ret) {
2318 /* Only the high 8 bits are valid. */ 2365 /* Only the high 8 bits are valid. */
2319 dest = SET_APIC_LOGICAL_ID(dest); 2366 dest = SET_APIC_LOGICAL_ID(dest);
2320 __target_IO_APIC_irq(irq, dest, cfg); 2367 __target_IO_APIC_irq(irq, dest, cfg);
2321 } 2368 }
2322 spin_unlock_irqrestore(&ioapic_lock, flags); 2369 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2323 2370
2324 return ret; 2371 return ret;
2325} 2372}
@@ -2554,9 +2601,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
2554 irq = desc->irq; 2601 irq = desc->irq;
2555 cfg = desc->chip_data; 2602 cfg = desc->chip_data;
2556 2603
2557 spin_lock_irqsave(&ioapic_lock, flags); 2604 raw_spin_lock_irqsave(&ioapic_lock, flags);
2558 __eoi_ioapic_irq(irq, cfg); 2605 __eoi_ioapic_irq(irq, cfg);
2559 spin_unlock_irqrestore(&ioapic_lock, flags); 2606 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2560} 2607}
2561 2608
2562static void ack_apic_level(unsigned int irq) 2609static void ack_apic_level(unsigned int irq)
@@ -2734,8 +2781,8 @@ static inline void init_IO_APIC_traps(void)
2734 * so default to an old-fashioned 8259 2781 * so default to an old-fashioned 8259
2735 * interrupt if we can.. 2782 * interrupt if we can..
2736 */ 2783 */
2737 if (irq < nr_legacy_irqs) 2784 if (irq < legacy_pic->nr_legacy_irqs)
2738 make_8259A_irq(irq); 2785 legacy_pic->make_irq(irq);
2739 else 2786 else
2740 /* Strange. Oh, well.. */ 2787 /* Strange. Oh, well.. */
2741 desc->chip = &no_irq_chip; 2788 desc->chip = &no_irq_chip;
@@ -2892,7 +2939,7 @@ static inline void __init check_timer(void)
2892 /* 2939 /*
2893 * get/set the timer IRQ vector: 2940 * get/set the timer IRQ vector:
2894 */ 2941 */
2895 disable_8259A_irq(0); 2942 legacy_pic->chip->mask(0);
2896 assign_irq_vector(0, cfg, apic->target_cpus()); 2943 assign_irq_vector(0, cfg, apic->target_cpus());
2897 2944
2898 /* 2945 /*
@@ -2905,7 +2952,7 @@ static inline void __init check_timer(void)
2905 * automatically. 2952 * automatically.
2906 */ 2953 */
2907 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2954 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2908 init_8259A(1); 2955 legacy_pic->init(1);
2909#ifdef CONFIG_X86_32 2956#ifdef CONFIG_X86_32
2910 { 2957 {
2911 unsigned int ver; 2958 unsigned int ver;
@@ -2964,7 +3011,7 @@ static inline void __init check_timer(void)
2964 if (timer_irq_works()) { 3011 if (timer_irq_works()) {
2965 if (nmi_watchdog == NMI_IO_APIC) { 3012 if (nmi_watchdog == NMI_IO_APIC) {
2966 setup_nmi(); 3013 setup_nmi();
2967 enable_8259A_irq(0); 3014 legacy_pic->chip->unmask(0);
2968 } 3015 }
2969 if (disable_timer_pin_1 > 0) 3016 if (disable_timer_pin_1 > 0)
2970 clear_IO_APIC_pin(0, pin1); 3017 clear_IO_APIC_pin(0, pin1);
@@ -2987,14 +3034,14 @@ static inline void __init check_timer(void)
2987 */ 3034 */
2988 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 3035 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2989 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 3036 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2990 enable_8259A_irq(0); 3037 legacy_pic->chip->unmask(0);
2991 if (timer_irq_works()) { 3038 if (timer_irq_works()) {
2992 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 3039 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2993 timer_through_8259 = 1; 3040 timer_through_8259 = 1;
2994 if (nmi_watchdog == NMI_IO_APIC) { 3041 if (nmi_watchdog == NMI_IO_APIC) {
2995 disable_8259A_irq(0); 3042 legacy_pic->chip->mask(0);
2996 setup_nmi(); 3043 setup_nmi();
2997 enable_8259A_irq(0); 3044 legacy_pic->chip->unmask(0);
2998 } 3045 }
2999 goto out; 3046 goto out;
3000 } 3047 }
@@ -3002,7 +3049,7 @@ static inline void __init check_timer(void)
3002 * Cleanup, just in case ... 3049 * Cleanup, just in case ...
3003 */ 3050 */
3004 local_irq_disable(); 3051 local_irq_disable();
3005 disable_8259A_irq(0); 3052 legacy_pic->chip->mask(0);
3006 clear_IO_APIC_pin(apic2, pin2); 3053 clear_IO_APIC_pin(apic2, pin2);
3007 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 3054 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3008 } 3055 }
@@ -3021,22 +3068,22 @@ static inline void __init check_timer(void)
3021 3068
3022 lapic_register_intr(0, desc); 3069 lapic_register_intr(0, desc);
3023 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3070 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3024 enable_8259A_irq(0); 3071 legacy_pic->chip->unmask(0);
3025 3072
3026 if (timer_irq_works()) { 3073 if (timer_irq_works()) {
3027 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3074 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3028 goto out; 3075 goto out;
3029 } 3076 }
3030 local_irq_disable(); 3077 local_irq_disable();
3031 disable_8259A_irq(0); 3078 legacy_pic->chip->mask(0);
3032 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3079 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3033 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3080 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3034 3081
3035 apic_printk(APIC_QUIET, KERN_INFO 3082 apic_printk(APIC_QUIET, KERN_INFO
3036 "...trying to set up timer as ExtINT IRQ...\n"); 3083 "...trying to set up timer as ExtINT IRQ...\n");
3037 3084
3038 init_8259A(0); 3085 legacy_pic->init(0);
3039 make_8259A_irq(0); 3086 legacy_pic->make_irq(0);
3040 apic_write(APIC_LVT0, APIC_DM_EXTINT); 3087 apic_write(APIC_LVT0, APIC_DM_EXTINT);
3041 3088
3042 unlock_ExtINT_logic(); 3089 unlock_ExtINT_logic();
@@ -3078,7 +3125,7 @@ void __init setup_IO_APIC(void)
3078 /* 3125 /*
3079 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3126 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3080 */ 3127 */
3081 io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 3128 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
3082 3129
3083 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 3130 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3084 /* 3131 /*
@@ -3089,7 +3136,7 @@ void __init setup_IO_APIC(void)
3089 sync_Arb_IDs(); 3136 sync_Arb_IDs();
3090 setup_IO_APIC_irqs(); 3137 setup_IO_APIC_irqs();
3091 init_IO_APIC_traps(); 3138 init_IO_APIC_traps();
3092 if (nr_legacy_irqs) 3139 if (legacy_pic->nr_legacy_irqs)
3093 check_timer(); 3140 check_timer();
3094} 3141}
3095 3142
@@ -3138,13 +3185,13 @@ static int ioapic_resume(struct sys_device *dev)
3138 data = container_of(dev, struct sysfs_ioapic_data, dev); 3185 data = container_of(dev, struct sysfs_ioapic_data, dev);
3139 entry = data->entry; 3186 entry = data->entry;
3140 3187
3141 spin_lock_irqsave(&ioapic_lock, flags); 3188 raw_spin_lock_irqsave(&ioapic_lock, flags);
3142 reg_00.raw = io_apic_read(dev->id, 0); 3189 reg_00.raw = io_apic_read(dev->id, 0);
3143 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 3190 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3144 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 3191 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3145 io_apic_write(dev->id, 0, reg_00.raw); 3192 io_apic_write(dev->id, 0, reg_00.raw);
3146 } 3193 }
3147 spin_unlock_irqrestore(&ioapic_lock, flags); 3194 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3148 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 3195 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3149 ioapic_write_entry(dev->id, i, entry[i]); 3196 ioapic_write_entry(dev->id, i, entry[i]);
3150 3197
@@ -3207,7 +3254,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3207 if (irq_want < nr_irqs_gsi) 3254 if (irq_want < nr_irqs_gsi)
3208 irq_want = nr_irqs_gsi; 3255 irq_want = nr_irqs_gsi;
3209 3256
3210 spin_lock_irqsave(&vector_lock, flags); 3257 raw_spin_lock_irqsave(&vector_lock, flags);
3211 for (new = irq_want; new < nr_irqs; new++) { 3258 for (new = irq_want; new < nr_irqs; new++) {
3212 desc_new = irq_to_desc_alloc_node(new, node); 3259 desc_new = irq_to_desc_alloc_node(new, node);
3213 if (!desc_new) { 3260 if (!desc_new) {
@@ -3226,14 +3273,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3226 irq = new; 3273 irq = new;
3227 break; 3274 break;
3228 } 3275 }
3229 spin_unlock_irqrestore(&vector_lock, flags); 3276 raw_spin_unlock_irqrestore(&vector_lock, flags);
3277
3278 if (irq > 0)
3279 dynamic_irq_init_keep_chip_data(irq);
3230 3280
3231 if (irq > 0) {
3232 dynamic_irq_init(irq);
3233 /* restore it, in case dynamic_irq_init clear it */
3234 if (desc_new)
3235 desc_new->chip_data = cfg_new;
3236 }
3237 return irq; 3281 return irq;
3238} 3282}
3239 3283
@@ -3255,20 +3299,13 @@ int create_irq(void)
3255void destroy_irq(unsigned int irq) 3299void destroy_irq(unsigned int irq)
3256{ 3300{
3257 unsigned long flags; 3301 unsigned long flags;
3258 struct irq_cfg *cfg;
3259 struct irq_desc *desc;
3260 3302
3261 /* store it, in case dynamic_irq_cleanup clear it */ 3303 dynamic_irq_cleanup_keep_chip_data(irq);
3262 desc = irq_to_desc(irq);
3263 cfg = desc->chip_data;
3264 dynamic_irq_cleanup(irq);
3265 /* connect back irq_cfg */
3266 desc->chip_data = cfg;
3267 3304
3268 free_irte(irq); 3305 free_irte(irq);
3269 spin_lock_irqsave(&vector_lock, flags); 3306 raw_spin_lock_irqsave(&vector_lock, flags);
3270 __clear_irq_vector(irq, cfg); 3307 __clear_irq_vector(irq, get_irq_chip_data(irq));
3271 spin_unlock_irqrestore(&vector_lock, flags); 3308 raw_spin_unlock_irqrestore(&vector_lock, flags);
3272} 3309}
3273 3310
3274/* 3311/*
@@ -3805,9 +3842,9 @@ int __init io_apic_get_redir_entries (int ioapic)
3805 union IO_APIC_reg_01 reg_01; 3842 union IO_APIC_reg_01 reg_01;
3806 unsigned long flags; 3843 unsigned long flags;
3807 3844
3808 spin_lock_irqsave(&ioapic_lock, flags); 3845 raw_spin_lock_irqsave(&ioapic_lock, flags);
3809 reg_01.raw = io_apic_read(ioapic, 1); 3846 reg_01.raw = io_apic_read(ioapic, 1);
3810 spin_unlock_irqrestore(&ioapic_lock, flags); 3847 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3811 3848
3812 return reg_01.bits.entries; 3849 return reg_01.bits.entries;
3813} 3850}
@@ -3890,7 +3927,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3890 /* 3927 /*
3891 * IRQs < 16 are already in the irq_2_pin[] map 3928 * IRQs < 16 are already in the irq_2_pin[] map
3892 */ 3929 */
3893 if (irq >= nr_legacy_irqs) { 3930 if (irq >= legacy_pic->nr_legacy_irqs) {
3894 cfg = desc->chip_data; 3931 cfg = desc->chip_data;
3895 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { 3932 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3896 printk(KERN_INFO "can not add pin %d for irq %d\n", 3933 printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -3969,9 +4006,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3969 if (physids_empty(apic_id_map)) 4006 if (physids_empty(apic_id_map))
3970 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); 4007 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
3971 4008
3972 spin_lock_irqsave(&ioapic_lock, flags); 4009 raw_spin_lock_irqsave(&ioapic_lock, flags);
3973 reg_00.raw = io_apic_read(ioapic, 0); 4010 reg_00.raw = io_apic_read(ioapic, 0);
3974 spin_unlock_irqrestore(&ioapic_lock, flags); 4011 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3975 4012
3976 if (apic_id >= get_physical_broadcast()) { 4013 if (apic_id >= get_physical_broadcast()) {
3977 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " 4014 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -4005,10 +4042,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
4005 if (reg_00.bits.ID != apic_id) { 4042 if (reg_00.bits.ID != apic_id) {
4006 reg_00.bits.ID = apic_id; 4043 reg_00.bits.ID = apic_id;
4007 4044
4008 spin_lock_irqsave(&ioapic_lock, flags); 4045 raw_spin_lock_irqsave(&ioapic_lock, flags);
4009 io_apic_write(ioapic, 0, reg_00.raw); 4046 io_apic_write(ioapic, 0, reg_00.raw);
4010 reg_00.raw = io_apic_read(ioapic, 0); 4047 reg_00.raw = io_apic_read(ioapic, 0);
4011 spin_unlock_irqrestore(&ioapic_lock, flags); 4048 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4012 4049
4013 /* Sanity check */ 4050 /* Sanity check */
4014 if (reg_00.bits.ID != apic_id) { 4051 if (reg_00.bits.ID != apic_id) {
@@ -4029,9 +4066,9 @@ int __init io_apic_get_version(int ioapic)
4029 union IO_APIC_reg_01 reg_01; 4066 union IO_APIC_reg_01 reg_01;
4030 unsigned long flags; 4067 unsigned long flags;
4031 4068
4032 spin_lock_irqsave(&ioapic_lock, flags); 4069 raw_spin_lock_irqsave(&ioapic_lock, flags);
4033 reg_01.raw = io_apic_read(ioapic, 1); 4070 reg_01.raw = io_apic_read(ioapic, 1);
4034 spin_unlock_irqrestore(&ioapic_lock, flags); 4071 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4035 4072
4036 return reg_01.bits.version; 4073 return reg_01.bits.version;
4037} 4074}
@@ -4063,27 +4100,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4063#ifdef CONFIG_SMP 4100#ifdef CONFIG_SMP
4064void __init setup_ioapic_dest(void) 4101void __init setup_ioapic_dest(void)
4065{ 4102{
4066 int pin, ioapic = 0, irq, irq_entry; 4103 int pin, ioapic, irq, irq_entry;
4067 struct irq_desc *desc; 4104 struct irq_desc *desc;
4068 const struct cpumask *mask; 4105 const struct cpumask *mask;
4069 4106
4070 if (skip_ioapic_setup == 1) 4107 if (skip_ioapic_setup == 1)
4071 return; 4108 return;
4072 4109
4073#ifdef CONFIG_ACPI 4110 for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
4074 if (!acpi_disabled && acpi_ioapic) {
4075 ioapic = mp_find_ioapic(0);
4076 if (ioapic < 0)
4077 ioapic = 0;
4078 }
4079#endif
4080
4081 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 4111 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
4082 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 4112 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4083 if (irq_entry == -1) 4113 if (irq_entry == -1)
4084 continue; 4114 continue;
4085 irq = pin_2_irq(irq_entry, ioapic, pin); 4115 irq = pin_2_irq(irq_entry, ioapic, pin);
4086 4116
4117 if ((ioapic > 0) && (irq > 16))
4118 continue;
4119
4087 desc = irq_to_desc(irq); 4120 desc = irq_to_desc(irq);
4088 4121
4089 /* 4122 /*
@@ -4268,3 +4301,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4268 4301
4269 nr_ioapics++; 4302 nr_ioapics++;
4270} 4303}
4304
4305/* Enable IOAPIC early just for system timer */
4306void __init pre_init_apic_IRQ0(void)
4307{
4308 struct irq_cfg *cfg;
4309 struct irq_desc *desc;
4310
4311 printk(KERN_INFO "Early APIC setup for system timer0\n");
4312#ifndef CONFIG_SMP
4313 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4314#endif
4315 desc = irq_to_desc_alloc_node(0, 0);
4316
4317 setup_local_APIC();
4318
4319 cfg = irq_cfg(0);
4320 add_pin_to_irq_node(cfg, 0, 0, 0);
4321 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4322
4323 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4324}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 0159a69396cb..8aa65adbd25d 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -177,7 +177,7 @@ int __init check_nmi_watchdog(void)
177error: 177error:
178 if (nmi_watchdog == NMI_IO_APIC) { 178 if (nmi_watchdog == NMI_IO_APIC) {
179 if (!timer_through_8259) 179 if (!timer_through_8259)
180 disable_8259A_irq(0); 180 legacy_pic->chip->mask(0);
181 on_each_cpu(__acpi_nmi_disable, NULL, 1); 181 on_each_cpu(__acpi_nmi_disable, NULL, 1);
182 } 182 }
183 183
@@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
416 416
417 /* We can be called before check_nmi_watchdog, hence NULL check. */ 417 /* We can be called before check_nmi_watchdog, hence NULL check. */
418 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 418 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
419 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 419 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
420 420
421 spin_lock(&lock); 421 raw_spin_lock(&lock);
422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
423 show_regs(regs); 423 show_regs(regs);
424 dump_stack(); 424 dump_stack();
425 spin_unlock(&lock); 425 raw_spin_unlock(&lock);
426 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 426 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
427 427
428 rc = 1; 428 rc = 1;
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
438 * Ayiee, looks like this CPU is stuck ... 438 * Ayiee, looks like this CPU is stuck ...
439 * wait a few IRQs (5 seconds) before doing the oops ... 439 * wait a few IRQs (5 seconds) before doing the oops ...
440 */ 440 */
441 __this_cpu_inc(per_cpu_var(alert_counter)); 441 __this_cpu_inc(alert_counter);
442 if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) 442 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
443 /* 443 /*
444 * die_nmi will return ONLY if NOTIFY_STOP happens.. 444 * die_nmi will return ONLY if NOTIFY_STOP happens..
445 */ 445 */
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
447 regs, panic_on_timeout); 447 regs, panic_on_timeout);
448 } else { 448 } else {
449 __get_cpu_var(last_irq_sum) = sum; 449 __get_cpu_var(last_irq_sum) = sum;
450 __this_cpu_write(per_cpu_var(alert_counter), 0); 450 __this_cpu_write(alert_counter, 0);
451 } 451 }
452 452
453 /* see if the nmi watchdog went off */ 453 /* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 47dd856708e5..3e28401f161c 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -277,6 +277,7 @@ static __init void early_check_numaq(void)
277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; 277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; 278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
279 x86_init.timers.tsc_pre_init = numaq_tsc_init; 279 x86_init.timers.tsc_pre_init = numaq_tsc_init;
280 x86_init.pci.init = pci_numaq_init;
280 } 281 }
281} 282}
282 283
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b9..870e6cc6ad28 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
10 10
11comment "CPUFreq processor drivers" 11comment "CPUFreq processor drivers"
12 12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
13config X86_ACPI_CPUFREQ 27config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver" 28 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE 29 select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294d..1840c0a5170b 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 000000000000..ff36d2979a90
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,620 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/spinlock.h>
37#include <linux/uaccess.h>
38
39#include <acpi/processor.h>
40
41#define PCC_VERSION "1.00.00"
42#define POLL_LOOPS 300
43
44#define CMD_COMPLETE 0x1
45#define CMD_GET_FREQ 0x0
46#define CMD_SET_FREQ 0x1
47
48#define BUF_SZ 4
49
50#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
51 "pcc-cpufreq", msg)
52
53struct pcc_register_resource {
54 u8 descriptor;
55 u16 length;
56 u8 space_id;
57 u8 bit_width;
58 u8 bit_offset;
59 u8 access_size;
60 u64 address;
61} __attribute__ ((packed));
62
63struct pcc_memory_resource {
64 u8 descriptor;
65 u16 length;
66 u8 space_id;
67 u8 resource_usage;
68 u8 type_specific;
69 u64 granularity;
70 u64 minimum;
71 u64 maximum;
72 u64 translation_offset;
73 u64 address_length;
74} __attribute__ ((packed));
75
76static struct cpufreq_driver pcc_cpufreq_driver;
77
78struct pcc_header {
79 u32 signature;
80 u16 length;
81 u8 major;
82 u8 minor;
83 u32 features;
84 u16 command;
85 u16 status;
86 u32 latency;
87 u32 minimum_time;
88 u32 maximum_time;
89 u32 nominal;
90 u32 throttled_frequency;
91 u32 minimum_frequency;
92};
93
94static void __iomem *pcch_virt_addr;
95static struct pcc_header __iomem *pcch_hdr;
96
97static DEFINE_SPINLOCK(pcc_lock);
98
99static struct acpi_generic_address doorbell;
100
101static u64 doorbell_preserve;
102static u64 doorbell_write;
103
104static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
105 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
106
107struct pcc_cpu {
108 u32 input_offset;
109 u32 output_offset;
110};
111
112static struct pcc_cpu *pcc_cpu_info;
113
114static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
115{
116 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
117 policy->cpuinfo.max_freq);
118 return 0;
119}
120
121static inline void pcc_cmd(void)
122{
123 u64 doorbell_value;
124 int i;
125
126 acpi_read(&doorbell_value, &doorbell);
127 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
128 &doorbell);
129
130 for (i = 0; i < POLL_LOOPS; i++) {
131 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
132 break;
133 }
134}
135
136static inline void pcc_clear_mapping(void)
137{
138 if (pcch_virt_addr)
139 iounmap(pcch_virt_addr);
140 pcch_virt_addr = NULL;
141}
142
143static unsigned int pcc_get_freq(unsigned int cpu)
144{
145 struct pcc_cpu *pcc_cpu_data;
146 unsigned int curr_freq;
147 unsigned int freq_limit;
148 u16 status;
149 u32 input_buffer;
150 u32 output_buffer;
151
152 spin_lock(&pcc_lock);
153
154 dprintk("get: get_freq for CPU %d\n", cpu);
155 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
156
157 input_buffer = 0x1;
158 iowrite32(input_buffer,
159 (pcch_virt_addr + pcc_cpu_data->input_offset));
160 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
161
162 pcc_cmd();
163
164 output_buffer =
165 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
166
167 /* Clear the input buffer - we are done with the current command */
168 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
169
170 status = ioread16(&pcch_hdr->status);
171 if (status != CMD_COMPLETE) {
172 dprintk("get: FAILED: for CPU %d, status is %d\n",
173 cpu, status);
174 goto cmd_incomplete;
175 }
176 iowrite16(0, &pcch_hdr->status);
177 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
178 / 100) * 1000);
179
180 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
181 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
182 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
183 output_buffer, curr_freq);
184
185 freq_limit = (output_buffer >> 8) & 0xff;
186 if (freq_limit != 0xff) {
187 dprintk("get: frequency for cpu %d is being temporarily"
188 " capped at %d\n", cpu, curr_freq);
189 }
190
191 spin_unlock(&pcc_lock);
192 return curr_freq;
193
194cmd_incomplete:
195 iowrite16(0, &pcch_hdr->status);
196 spin_unlock(&pcc_lock);
197 return -EINVAL;
198}
199
200static int pcc_cpufreq_target(struct cpufreq_policy *policy,
201 unsigned int target_freq,
202 unsigned int relation)
203{
204 struct pcc_cpu *pcc_cpu_data;
205 struct cpufreq_freqs freqs;
206 u16 status;
207 u32 input_buffer;
208 int cpu;
209
210 spin_lock(&pcc_lock);
211 cpu = policy->cpu;
212 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
213
214 dprintk("target: CPU %d should go to target freq: %d "
215 "(virtual) input_offset is 0x%x\n",
216 cpu, target_freq,
217 (pcch_virt_addr + pcc_cpu_data->input_offset));
218
219 freqs.new = target_freq;
220 freqs.cpu = cpu;
221 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
222
223 input_buffer = 0x1 | (((target_freq * 100)
224 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
225 iowrite32(input_buffer,
226 (pcch_virt_addr + pcc_cpu_data->input_offset));
227 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
228
229 pcc_cmd();
230
231 /* Clear the input buffer - we are done with the current command */
232 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
233
234 status = ioread16(&pcch_hdr->status);
235 if (status != CMD_COMPLETE) {
236 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
237 cpu, status);
238 goto cmd_incomplete;
239 }
240 iowrite16(0, &pcch_hdr->status);
241
242 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
243 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
244 spin_unlock(&pcc_lock);
245
246 return 0;
247
248cmd_incomplete:
249 iowrite16(0, &pcch_hdr->status);
250 spin_unlock(&pcc_lock);
251 return -EINVAL;
252}
253
254static int pcc_get_offset(int cpu)
255{
256 acpi_status status;
257 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
258 union acpi_object *pccp, *offset;
259 struct pcc_cpu *pcc_cpu_data;
260 struct acpi_processor *pr;
261 int ret = 0;
262
263 pr = per_cpu(processors, cpu);
264 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
265
266 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
267 if (ACPI_FAILURE(status))
268 return -ENODEV;
269
270 pccp = buffer.pointer;
271 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
272 ret = -ENODEV;
273 goto out_free;
274 };
275
276 offset = &(pccp->package.elements[0]);
277 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
278 ret = -ENODEV;
279 goto out_free;
280 }
281
282 pcc_cpu_data->input_offset = offset->integer.value;
283
284 offset = &(pccp->package.elements[1]);
285 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
286 ret = -ENODEV;
287 goto out_free;
288 }
289
290 pcc_cpu_data->output_offset = offset->integer.value;
291
292 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
293 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
294
295 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
296 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
297 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
298out_free:
299 kfree(buffer.pointer);
300 return ret;
301}
302
303static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
304{
305 acpi_status status;
306 struct acpi_object_list input;
307 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
308 union acpi_object in_params[4];
309 union acpi_object *out_obj;
310 u32 capabilities[2];
311 u32 errors;
312 u32 supported;
313 int ret = 0;
314
315 input.count = 4;
316 input.pointer = in_params;
317 input.count = 4;
318 input.pointer = in_params;
319 in_params[0].type = ACPI_TYPE_BUFFER;
320 in_params[0].buffer.length = 16;
321 in_params[0].buffer.pointer = OSC_UUID;
322 in_params[1].type = ACPI_TYPE_INTEGER;
323 in_params[1].integer.value = 1;
324 in_params[2].type = ACPI_TYPE_INTEGER;
325 in_params[2].integer.value = 2;
326 in_params[3].type = ACPI_TYPE_BUFFER;
327 in_params[3].buffer.length = 8;
328 in_params[3].buffer.pointer = (u8 *)&capabilities;
329
330 capabilities[0] = OSC_QUERY_ENABLE;
331 capabilities[1] = 0x1;
332
333 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
334 if (ACPI_FAILURE(status))
335 return -ENODEV;
336
337 if (!output.length)
338 return -ENODEV;
339
340 out_obj = output.pointer;
341 if (out_obj->type != ACPI_TYPE_BUFFER) {
342 ret = -ENODEV;
343 goto out_free;
344 }
345
346 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
347 if (errors) {
348 ret = -ENODEV;
349 goto out_free;
350 }
351
352 supported = *((u32 *)(out_obj->buffer.pointer + 4));
353 if (!(supported & 0x1)) {
354 ret = -ENODEV;
355 goto out_free;
356 }
357
358 kfree(output.pointer);
359 capabilities[0] = 0x0;
360 capabilities[1] = 0x1;
361
362 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
363 if (ACPI_FAILURE(status))
364 return -ENODEV;
365
366 if (!output.length)
367 return -ENODEV;
368
369 out_obj = output.pointer;
370 if (out_obj->type != ACPI_TYPE_BUFFER) {
371 ret = -ENODEV;
372 goto out_free;
373 }
374
375 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
376 if (errors) {
377 ret = -ENODEV;
378 goto out_free;
379 }
380
381 supported = *((u32 *)(out_obj->buffer.pointer + 4));
382 if (!(supported & 0x1)) {
383 ret = -ENODEV;
384 goto out_free;
385 }
386
387out_free:
388 kfree(output.pointer);
389 return ret;
390}
391
392static int __init pcc_cpufreq_probe(void)
393{
394 acpi_status status;
395 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
396 struct pcc_memory_resource *mem_resource;
397 struct pcc_register_resource *reg_resource;
398 union acpi_object *out_obj, *member;
399 acpi_handle handle, osc_handle;
400 int ret = 0;
401
402 status = acpi_get_handle(NULL, "\\_SB", &handle);
403 if (ACPI_FAILURE(status))
404 return -ENODEV;
405
406 status = acpi_get_handle(handle, "_OSC", &osc_handle);
407 if (ACPI_SUCCESS(status)) {
408 ret = pcc_cpufreq_do_osc(&osc_handle);
409 if (ret)
410 dprintk("probe: _OSC evaluation did not succeed\n");
411 /* Firmware's use of _OSC is optional */
412 ret = 0;
413 }
414
415 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
416 if (ACPI_FAILURE(status))
417 return -ENODEV;
418
419 out_obj = output.pointer;
420 if (out_obj->type != ACPI_TYPE_PACKAGE) {
421 ret = -ENODEV;
422 goto out_free;
423 }
424
425 member = &out_obj->package.elements[0];
426 if (member->type != ACPI_TYPE_BUFFER) {
427 ret = -ENODEV;
428 goto out_free;
429 }
430
431 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
432
433 dprintk("probe: mem_resource descriptor: 0x%x,"
434 " length: %d, space_id: %d, resource_usage: %d,"
435 " type_specific: %d, granularity: 0x%llx,"
436 " minimum: 0x%llx, maximum: 0x%llx,"
437 " translation_offset: 0x%llx, address_length: 0x%llx\n",
438 mem_resource->descriptor, mem_resource->length,
439 mem_resource->space_id, mem_resource->resource_usage,
440 mem_resource->type_specific, mem_resource->granularity,
441 mem_resource->minimum, mem_resource->maximum,
442 mem_resource->translation_offset,
443 mem_resource->address_length);
444
445 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
446 ret = -ENODEV;
447 goto out_free;
448 }
449
450 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
451 mem_resource->address_length);
452 if (pcch_virt_addr == NULL) {
453 dprintk("probe: could not map shared mem region\n");
454 goto out_free;
455 }
456 pcch_hdr = pcch_virt_addr;
457
458 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
459 dprintk("probe: PCCH header is at physical address: 0x%llx,"
460 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
461 " supported features: 0x%x, command field: 0x%x,"
462 " status field: 0x%x, nominal latency: %d us\n",
463 mem_resource->minimum, ioread32(&pcch_hdr->signature),
464 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
465 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
466 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
467 ioread32(&pcch_hdr->latency));
468
469 dprintk("probe: min time between commands: %d us,"
470 " max time between commands: %d us,"
471 " nominal CPU frequency: %d MHz,"
472 " minimum CPU frequency: %d MHz,"
473 " minimum CPU frequency without throttling: %d MHz\n",
474 ioread32(&pcch_hdr->minimum_time),
475 ioread32(&pcch_hdr->maximum_time),
476 ioread32(&pcch_hdr->nominal),
477 ioread32(&pcch_hdr->throttled_frequency),
478 ioread32(&pcch_hdr->minimum_frequency));
479
480 member = &out_obj->package.elements[1];
481 if (member->type != ACPI_TYPE_BUFFER) {
482 ret = -ENODEV;
483 goto pcch_free;
484 }
485
486 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
487
488 doorbell.space_id = reg_resource->space_id;
489 doorbell.bit_width = reg_resource->bit_width;
490 doorbell.bit_offset = reg_resource->bit_offset;
491 doorbell.access_width = 64;
492 doorbell.address = reg_resource->address;
493
494 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
495 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
496 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
497 doorbell.access_width, reg_resource->address);
498
499 member = &out_obj->package.elements[2];
500 if (member->type != ACPI_TYPE_INTEGER) {
501 ret = -ENODEV;
502 goto pcch_free;
503 }
504
505 doorbell_preserve = member->integer.value;
506
507 member = &out_obj->package.elements[3];
508 if (member->type != ACPI_TYPE_INTEGER) {
509 ret = -ENODEV;
510 goto pcch_free;
511 }
512
513 doorbell_write = member->integer.value;
514
515 dprintk("probe: doorbell_preserve: 0x%llx,"
516 " doorbell_write: 0x%llx\n",
517 doorbell_preserve, doorbell_write);
518
519 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
520 if (!pcc_cpu_info) {
521 ret = -ENOMEM;
522 goto pcch_free;
523 }
524
525 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
526 " limits: %d MHz, %d MHz\n", PCC_VERSION,
527 ioread32(&pcch_hdr->minimum_frequency),
528 ioread32(&pcch_hdr->nominal));
529 kfree(output.pointer);
530 return ret;
531pcch_free:
532 pcc_clear_mapping();
533out_free:
534 kfree(output.pointer);
535 return ret;
536}
537
538static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
539{
540 unsigned int cpu = policy->cpu;
541 unsigned int result = 0;
542
543 if (!pcch_virt_addr) {
544 result = -1;
545 goto pcch_null;
546 }
547
548 result = pcc_get_offset(cpu);
549 if (result) {
550 dprintk("init: PCCP evaluation failed\n");
551 goto free;
552 }
553
554 policy->max = policy->cpuinfo.max_freq =
555 ioread32(&pcch_hdr->nominal) * 1000;
556 policy->min = policy->cpuinfo.min_freq =
557 ioread32(&pcch_hdr->minimum_frequency) * 1000;
558 policy->cur = pcc_get_freq(cpu);
559
560 dprintk("init: policy->max is %d, policy->min is %d\n",
561 policy->max, policy->min);
562
563 return 0;
564free:
565 pcc_clear_mapping();
566 free_percpu(pcc_cpu_info);
567pcch_null:
568 return result;
569}
570
571static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
572{
573 return 0;
574}
575
576static struct cpufreq_driver pcc_cpufreq_driver = {
577 .flags = CPUFREQ_CONST_LOOPS,
578 .get = pcc_get_freq,
579 .verify = pcc_cpufreq_verify,
580 .target = pcc_cpufreq_target,
581 .init = pcc_cpufreq_cpu_init,
582 .exit = pcc_cpufreq_cpu_exit,
583 .name = "pcc-cpufreq",
584 .owner = THIS_MODULE,
585};
586
587static int __init pcc_cpufreq_init(void)
588{
589 int ret;
590
591 if (acpi_disabled)
592 return 0;
593
594 ret = pcc_cpufreq_probe();
595 if (ret) {
596 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
597 return ret;
598 }
599
600 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
601
602 return ret;
603}
604
605static void __exit pcc_cpufreq_exit(void)
606{
607 cpufreq_unregister_driver(&pcc_cpufreq_driver);
608
609 pcc_clear_mapping();
610
611 free_percpu(pcc_cpu_info);
612}
613
614MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
615MODULE_VERSION(PCC_VERSION);
616MODULE_DESCRIPTION("Processor Clocking Control interface driver");
617MODULE_LICENSE("GPL");
618
619late_initcall(pcc_cpufreq_init);
620module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6e44519960c8..d360b56e9825 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, 806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
807 unsigned int index) 807 unsigned int index)
808{ 808{
809 acpi_integer control; 809 u64 control;
810 810
811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
812 return; 812 return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
824{ 824{
825 struct cpufreq_frequency_table *powernow_table; 825 struct cpufreq_frequency_table *powernow_table;
826 int ret_val = -ENODEV; 826 int ret_val = -ENODEV;
827 acpi_integer control, status; 827 u64 control, status;
828 828
829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
830 dprintk("register performance failed: bad ACPI data\n"); 830 dprintk("register performance failed: bad ACPI data\n");
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
948 u32 fid; 948 u32 fid;
949 u32 vid; 949 u32 vid;
950 u32 freq, index; 950 u32 freq, index;
951 acpi_integer status, control; 951 u64 status, control;
952 952
953 if (data->exttype) { 953 if (data->exttype) {
954 status = data->acpi_data.states[i].status; 954 status = data->acpi_data.states[i].status;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index eddb1bdd1b8f..b3eeb66c0a51 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -903,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
903 return ret; 903 return ret;
904} 904}
905 905
906static struct sysfs_ops sysfs_ops = { 906static const struct sysfs_ops sysfs_ops = {
907 .show = show, 907 .show = show,
908 .store = store, 908 .store = store,
909}; 909};
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4442e9e898c2..bd58de4d7a29 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2049,6 +2049,7 @@ static __init void mce_init_banks(void)
2049 struct mce_bank *b = &mce_banks[i]; 2049 struct mce_bank *b = &mce_banks[i];
2050 struct sysdev_attribute *a = &b->attr; 2050 struct sysdev_attribute *a = &b->attr;
2051 2051
2052 sysfs_attr_init(&a->attr);
2052 a->attr.name = b->attrname; 2053 a->attr.name = b->attrname;
2053 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2054 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2054 2055
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..cda932ca3ade 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -388,7 +388,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
388 return ret; 388 return ret;
389} 389}
390 390
391static struct sysfs_ops threshold_ops = { 391static const struct sysfs_ops threshold_ops = {
392 .show = show, 392 .show = show,
393 .store = store, 393 .store = store,
394}; 394};
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..06130b52f012 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/sort.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
28#include <linux/kvm_para.h> 27#include <linux/kvm_para.h>
28#include <linux/range.h>
29 29
30#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/e820.h> 31#include <asm/e820.h>
@@ -34,11 +34,6 @@
34 34
35#include "mtrr.h" 35#include "mtrr.h"
36 36
37struct res_range {
38 unsigned long start;
39 unsigned long end;
40};
41
42struct var_mtrr_range_state { 37struct var_mtrr_range_state {
43 unsigned long base_pfn; 38 unsigned long base_pfn;
44 unsigned long size_pfn; 39 unsigned long size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
56/* Should be related to MTRR_VAR_RANGES nums */ 51/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256 52#define RANGE_NUM 256
58 53
59static struct res_range __initdata range[RANGE_NUM]; 54static struct range __initdata range[RANGE_NUM];
60static int __initdata nr_range; 55static int __initdata nr_range;
61 56
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 57static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
64static int __initdata debug_print; 59static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 60#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66 61
67
68static int __init
69add_range(struct res_range *range, int nr_range,
70 unsigned long start, unsigned long end)
71{
72 /* Out of slots: */
73 if (nr_range >= RANGE_NUM)
74 return nr_range;
75
76 range[nr_range].start = start;
77 range[nr_range].end = end;
78
79 nr_range++;
80
81 return nr_range;
82}
83
84static int __init
85add_range_with_merge(struct res_range *range, int nr_range,
86 unsigned long start, unsigned long end)
87{
88 int i;
89
90 /* Try to merge it with old one: */
91 for (i = 0; i < nr_range; i++) {
92 unsigned long final_start, final_end;
93 unsigned long common_start, common_end;
94
95 if (!range[i].end)
96 continue;
97
98 common_start = max(range[i].start, start);
99 common_end = min(range[i].end, end);
100 if (common_start > common_end + 1)
101 continue;
102
103 final_start = min(range[i].start, start);
104 final_end = max(range[i].end, end);
105
106 range[i].start = final_start;
107 range[i].end = final_end;
108 return nr_range;
109 }
110
111 /* Need to add it: */
112 return add_range(range, nr_range, start, end);
113}
114
115static void __init
116subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117{
118 int i, j;
119
120 for (j = 0; j < RANGE_NUM; j++) {
121 if (!range[j].end)
122 continue;
123
124 if (start <= range[j].start && end >= range[j].end) {
125 range[j].start = 0;
126 range[j].end = 0;
127 continue;
128 }
129
130 if (start <= range[j].start && end < range[j].end &&
131 range[j].start < end + 1) {
132 range[j].start = end + 1;
133 continue;
134 }
135
136
137 if (start > range[j].start && end >= range[j].end &&
138 range[j].end > start - 1) {
139 range[j].end = start - 1;
140 continue;
141 }
142
143 if (start > range[j].start && end < range[j].end) {
144 /* Find the new spare: */
145 for (i = 0; i < RANGE_NUM; i++) {
146 if (range[i].end == 0)
147 break;
148 }
149 if (i < RANGE_NUM) {
150 range[i].end = range[j].end;
151 range[i].start = end + 1;
152 } else {
153 printk(KERN_ERR "run of slot in ranges\n");
154 }
155 range[j].end = start - 1;
156 continue;
157 }
158 }
159}
160
161static int __init cmp_range(const void *x1, const void *x2)
162{
163 const struct res_range *r1 = x1;
164 const struct res_range *r2 = x2;
165 long start1, start2;
166
167 start1 = r1->start;
168 start2 = r2->start;
169
170 return start1 - start2;
171}
172
173static int __init clean_sort_range(struct res_range *range, int az)
174{
175 int i, j, k = az - 1, nr_range = 0;
176
177 for (i = 0; i < k; i++) {
178 if (range[i].end)
179 continue;
180 for (j = k; j > i; j--) {
181 if (range[j].end) {
182 k = j;
183 break;
184 }
185 }
186 if (j == i)
187 break;
188 range[i].start = range[k].start;
189 range[i].end = range[k].end;
190 range[k].start = 0;
191 range[k].end = 0;
192 k--;
193 }
194 /* count it */
195 for (i = 0; i < az; i++) {
196 if (!range[i].end) {
197 nr_range = i;
198 break;
199 }
200 }
201
202 /* sort them */
203 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
204
205 return nr_range;
206}
207
208#define BIOS_BUG_MSG KERN_WARNING \ 62#define BIOS_BUG_MSG KERN_WARNING \
209 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 63 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
210 64
211static int __init 65static int __init
212x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 66x86_get_mtrr_mem_range(struct range *range, int nr_range,
213 unsigned long extra_remove_base, 67 unsigned long extra_remove_base,
214 unsigned long extra_remove_size) 68 unsigned long extra_remove_size)
215{ 69{
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
223 continue; 77 continue;
224 base = range_state[i].base_pfn; 78 base = range_state[i].base_pfn;
225 size = range_state[i].size_pfn; 79 size = range_state[i].size_pfn;
226 nr_range = add_range_with_merge(range, nr_range, base, 80 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
227 base + size - 1); 81 base, base + size);
228 } 82 }
229 if (debug_print) { 83 if (debug_print) {
230 printk(KERN_DEBUG "After WB checking\n"); 84 printk(KERN_DEBUG "After WB checking\n");
231 for (i = 0; i < nr_range; i++) 85 for (i = 0; i < nr_range; i++)
232 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 86 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
233 range[i].start, range[i].end + 1); 87 range[i].start, range[i].end);
234 } 88 }
235 89
236 /* Take out UC ranges: */ 90 /* Take out UC ranges: */
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
252 size -= (1<<(20-PAGE_SHIFT)) - base; 106 size -= (1<<(20-PAGE_SHIFT)) - base;
253 base = 1<<(20-PAGE_SHIFT); 107 base = 1<<(20-PAGE_SHIFT);
254 } 108 }
255 subtract_range(range, base, base + size - 1); 109 subtract_range(range, RANGE_NUM, base, base + size);
256 } 110 }
257 if (extra_remove_size) 111 if (extra_remove_size)
258 subtract_range(range, extra_remove_base, 112 subtract_range(range, RANGE_NUM, extra_remove_base,
259 extra_remove_base + extra_remove_size - 1); 113 extra_remove_base + extra_remove_size);
260 114
261 if (debug_print) { 115 if (debug_print) {
262 printk(KERN_DEBUG "After UC checking\n"); 116 printk(KERN_DEBUG "After UC checking\n");
263 for (i = 0; i < RANGE_NUM; i++) { 117 for (i = 0; i < RANGE_NUM; i++) {
264 if (!range[i].end) 118 if (!range[i].end)
265 continue; 119 continue;
266 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 120 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
267 range[i].start, range[i].end + 1); 121 range[i].start, range[i].end);
268 } 122 }
269 } 123 }
270 124
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
273 if (debug_print) { 127 if (debug_print) {
274 printk(KERN_DEBUG "After sorting\n"); 128 printk(KERN_DEBUG "After sorting\n");
275 for (i = 0; i < nr_range; i++) 129 for (i = 0; i < nr_range; i++)
276 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 130 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
277 range[i].start, range[i].end + 1); 131 range[i].start, range[i].end);
278 } 132 }
279 133
280 /* clear those is not used */
281 for (i = nr_range; i < RANGE_NUM; i++)
282 memset(&range[i], 0, sizeof(range[i]));
283
284 return nr_range; 134 return nr_range;
285} 135}
286 136
287#ifdef CONFIG_MTRR_SANITIZER 137#ifdef CONFIG_MTRR_SANITIZER
288 138
289static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 139static unsigned long __init sum_ranges(struct range *range, int nr_range)
290{ 140{
291 unsigned long sum = 0; 141 unsigned long sum = 0;
292 int i; 142 int i;
293 143
294 for (i = 0; i < nr_range; i++) 144 for (i = 0; i < nr_range; i++)
295 sum += range[i].end + 1 - range[i].start; 145 sum += range[i].end - range[i].start;
296 146
297 return sum; 147 return sum;
298} 148}
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
621early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 471early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
622 472
623static int __init 473static int __init
624x86_setup_var_mtrrs(struct res_range *range, int nr_range, 474x86_setup_var_mtrrs(struct range *range, int nr_range,
625 u64 chunk_size, u64 gran_size) 475 u64 chunk_size, u64 gran_size)
626{ 476{
627 struct var_mtrr_state var_state; 477 struct var_mtrr_state var_state;
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
639 /* Write the range: */ 489 /* Write the range: */
640 for (i = 0; i < nr_range; i++) { 490 for (i = 0; i < nr_range; i++) {
641 set_var_mtrr_range(&var_state, range[i].start, 491 set_var_mtrr_range(&var_state, range[i].start,
642 range[i].end - range[i].start + 1); 492 range[i].end - range[i].start);
643 } 493 }
644 494
645 /* Write the last range: */ 495 /* Write the last range: */
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
742 unsigned long x_remove_base, 592 unsigned long x_remove_base,
743 unsigned long x_remove_size, int i) 593 unsigned long x_remove_size, int i)
744{ 594{
745 static struct res_range range_new[RANGE_NUM]; 595 static struct range range_new[RANGE_NUM];
746 unsigned long range_sums_new; 596 unsigned long range_sums_new;
747 static int nr_range_new; 597 static int nr_range_new;
748 int num_reg; 598 int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
869 * [0, 1M) should always be covered by var mtrr with WB 719 * [0, 1M) should always be covered by var mtrr with WB
870 * and fixed mtrrs should take effect before var mtrr for it: 720 * and fixed mtrrs should take effect before var mtrr for it:
871 */ 721 */
872 nr_range = add_range_with_merge(range, nr_range, 0, 722 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
873 (1ULL<<(20 - PAGE_SHIFT)) - 1); 723 1ULL<<(20 - PAGE_SHIFT));
874 /* Sort the ranges: */ 724 /* Sort the ranges: */
875 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 725 sort_range(range, nr_range);
876 726
877 range_sums = sum_ranges(range, nr_range); 727 range_sums = sum_ranges(range, nr_range);
878 printk(KERN_INFO "total RAM covered: %ldM\n", 728 printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1089 nr_range = 0; 939 nr_range = 0;
1090 if (mtrr_tom2) { 940 if (mtrr_tom2) {
1091 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 941 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1092 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 942 range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
1093 if (highest_pfn < range[nr_range].end + 1) 943 if (highest_pfn < range[nr_range].end)
1094 highest_pfn = range[nr_range].end + 1; 944 highest_pfn = range[nr_range].end;
1095 nr_range++; 945 nr_range++;
1096 } 946 }
1097 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 947 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1103 953
1104 /* Check the holes: */ 954 /* Check the holes: */
1105 for (i = 0; i < nr_range - 1; i++) { 955 for (i = 0; i < nr_range - 1; i++) {
1106 if (range[i].end + 1 < range[i+1].start) 956 if (range[i].end < range[i+1].start)
1107 total_trim_size += real_trim_memory(range[i].end + 1, 957 total_trim_size += real_trim_memory(range[i].end,
1108 range[i+1].start); 958 range[i+1].start);
1109 } 959 }
1110 960
1111 /* Check the top: */ 961 /* Check the top: */
1112 i = nr_range - 1; 962 i = nr_range - 1;
1113 if (range[i].end + 1 < end_pfn) 963 if (range[i].end < end_pfn)
1114 total_trim_size += real_trim_memory(range[i].end + 1, 964 total_trim_size += real_trim_memory(range[i].end,
1115 end_pfn); 965 end_pfn);
1116 966
1117 if (total_trim_size) { 967 if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index fe4622e8c837..79556bd9b602 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -145,6 +145,7 @@ struct set_mtrr_data {
145 145
146/** 146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data
148 * 149 *
149 * Returns nothing. 150 * Returns nothing.
150 */ 151 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddbc..42aafd11e170 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -73,10 +73,10 @@ struct debug_store {
73struct event_constraint { 73struct event_constraint {
74 union { 74 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64[1]; 76 u64 idxmsk64;
77 }; 77 };
78 int code; 78 u64 code;
79 int cmask; 79 u64 cmask;
80 int weight; 80 int weight;
81}; 81};
82 82
@@ -103,7 +103,7 @@ struct cpu_hw_events {
103}; 103};
104 104
105#define __EVENT_CONSTRAINT(c, n, m, w) {\ 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
106 { .idxmsk64[0] = (n) }, \ 106 { .idxmsk64 = (n) }, \
107 .code = (c), \ 107 .code = (c), \
108 .cmask = (m), \ 108 .cmask = (m), \
109 .weight = (w), \ 109 .weight = (w), \
@@ -116,7 +116,7 @@ struct cpu_hw_events {
116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
117 117
118#define FIXED_EVENT_CONSTRAINT(c, n) \ 118#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) 119 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
120 120
121#define EVENT_CONSTRAINT_END \ 121#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0) 122 EVENT_CONSTRAINT(0, 0, 0)
@@ -503,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event)
503 */ 503 */
504 if (attr->type == PERF_TYPE_RAW) { 504 if (attr->type == PERF_TYPE_RAW) {
505 hwc->config |= x86_pmu.raw_event(attr->config); 505 hwc->config |= x86_pmu.raw_event(attr->config);
506 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
507 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
508 return -EACCES;
506 return 0; 509 return 0;
507 } 510 }
508 511
@@ -553,9 +556,9 @@ static void x86_pmu_disable_all(void)
553 if (!test_bit(idx, cpuc->active_mask)) 556 if (!test_bit(idx, cpuc->active_mask))
554 continue; 557 continue;
555 rdmsrl(x86_pmu.eventsel + idx, val); 558 rdmsrl(x86_pmu.eventsel + idx, val);
556 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 559 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
557 continue; 560 continue;
558 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 561 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
559 wrmsrl(x86_pmu.eventsel + idx, val); 562 wrmsrl(x86_pmu.eventsel + idx, val);
560 } 563 }
561} 564}
@@ -590,7 +593,7 @@ static void x86_pmu_enable_all(void)
590 continue; 593 continue;
591 594
592 val = event->hw.config; 595 val = event->hw.config;
593 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 596 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
594 wrmsrl(x86_pmu.eventsel + idx, val); 597 wrmsrl(x86_pmu.eventsel + idx, val);
595 } 598 }
596} 599}
@@ -612,8 +615,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
612 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 615 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
613 616
614 for (i = 0; i < n; i++) { 617 for (i = 0; i < n; i++) {
615 constraints[i] = 618 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
616 x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 619 constraints[i] = c;
617 } 620 }
618 621
619 /* 622 /*
@@ -676,7 +679,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
676 if (c->weight != w) 679 if (c->weight != w)
677 continue; 680 continue;
678 681
679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 682 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
680 if (!test_bit(j, used_mask)) 683 if (!test_bit(j, used_mask))
681 break; 684 break;
682 } 685 }
@@ -853,7 +856,7 @@ void hw_perf_enable(void)
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 856static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
854{ 857{
855 (void)checking_wrmsrl(hwc->config_base + idx, 858 (void)checking_wrmsrl(hwc->config_base + idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 859 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 860}
858 861
859static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 862static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
@@ -1094,8 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1094 int idx, handled = 0; 1097 int idx, handled = 0;
1095 u64 val; 1098 u64 val;
1096 1099
1097 data.addr = 0; 1100 perf_sample_data_init(&data, 0);
1098 data.raw = NULL;
1099 1101
1100 cpuc = &__get_cpu_var(cpu_hw_events); 1102 cpuc = &__get_cpu_var(cpu_hw_events);
1101 1103
@@ -1347,6 +1349,7 @@ static void __init pmu_check_apic(void)
1347 1349
1348void __init init_hw_perf_events(void) 1350void __init init_hw_perf_events(void)
1349{ 1351{
1352 struct event_constraint *c;
1350 int err; 1353 int err;
1351 1354
1352 pr_info("Performance Events: "); 1355 pr_info("Performance Events: ");
@@ -1395,6 +1398,16 @@ void __init init_hw_perf_events(void)
1395 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1398 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1396 0, x86_pmu.num_events); 1399 0, x86_pmu.num_events);
1397 1400
1401 if (x86_pmu.event_constraints) {
1402 for_each_event_constraint(c, x86_pmu.event_constraints) {
1403 if (c->cmask != INTEL_ARCH_FIXED_MASK)
1404 continue;
1405
1406 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
1407 c->weight += x86_pmu.num_events;
1408 }
1409 }
1410
1398 pr_info("... version: %d\n", x86_pmu.version); 1411 pr_info("... version: %d\n", x86_pmu.version);
1399 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1412 pr_info("... bit width: %d\n", x86_pmu.event_bits);
1400 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1413 pr_info("... generic registers: %d\n", x86_pmu.num_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5f..44b60c852107 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,7 +1,7 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3/* 3/*
4 * Intel PerfMon v3. Used on Core2 and later. 4 * Intel PerfMon, used on Core and later.
5 */ 5 */
6static const u64 intel_perfmon_event_map[] = 6static const u64 intel_perfmon_event_map[] =
7{ 7{
@@ -27,8 +27,14 @@ static struct event_constraint intel_core_event_constraints[] =
27 27
28static struct event_constraint intel_core2_event_constraints[] = 28static struct event_constraint intel_core2_event_constraints[] =
29{ 29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 30 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 31 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
32 /*
33 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
34 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
35 * ratio between these counters.
36 */
37 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 38 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 39 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 40 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -37,14 +43,16 @@ static struct event_constraint intel_core2_event_constraints[] =
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 43 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 45 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
46 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 47 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END 48 EVENT_CONSTRAINT_END
42}; 49};
43 50
44static struct event_constraint intel_nehalem_event_constraints[] = 51static struct event_constraint intel_nehalem_event_constraints[] =
45{ 52{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 53 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 54 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
55 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 56 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 57 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 58 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -58,8 +66,9 @@ static struct event_constraint intel_nehalem_event_constraints[] =
58 66
59static struct event_constraint intel_westmere_event_constraints[] = 67static struct event_constraint intel_westmere_event_constraints[] =
60{ 68{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 72 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 73 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 74 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -68,8 +77,9 @@ static struct event_constraint intel_westmere_event_constraints[] =
68 77
69static struct event_constraint intel_gen_event_constraints[] = 78static struct event_constraint intel_gen_event_constraints[] =
70{ 79{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 80 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 81 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
82 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
73 EVENT_CONSTRAINT_END 83 EVENT_CONSTRAINT_END
74}; 84};
75 85
@@ -580,10 +590,9 @@ static void intel_pmu_drain_bts_buffer(void)
580 590
581 ds->bts_index = ds->bts_buffer_base; 591 ds->bts_index = ds->bts_buffer_base;
582 592
593 perf_sample_data_init(&data, 0);
583 594
584 data.period = event->hw.last_period; 595 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0; 596 regs.ip = 0;
588 597
589 /* 598 /*
@@ -732,8 +741,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
732 int bit, loops; 741 int bit, loops;
733 u64 ack, status; 742 u64 ack, status;
734 743
735 data.addr = 0; 744 perf_sample_data_init(&data, 0);
736 data.raw = NULL;
737 745
738 cpuc = &__get_cpu_var(cpu_hw_events); 746 cpuc = &__get_cpu_var(cpu_hw_events);
739 747
@@ -757,7 +765,7 @@ again:
757 765
758 inc_irq_stat(apic_perf_irqs); 766 inc_irq_stat(apic_perf_irqs);
759 ack = status; 767 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 768 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit]; 769 struct perf_event *event = cpuc->events[bit];
762 770
763 clear_bit(bit, (unsigned long *) &status); 771 clear_bit(bit, (unsigned long *) &status);
@@ -935,7 +943,7 @@ static __init int intel_pmu_init(void)
935 x86_pmu.event_constraints = intel_nehalem_event_constraints; 943 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, "); 944 pr_cont("Nehalem/Corei7 events, ");
937 break; 945 break;
938 case 28: 946 case 28: /* Atom */
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids)); 948 sizeof(hw_cache_event_ids));
941 949
@@ -951,6 +959,7 @@ static __init int intel_pmu_init(void)
951 x86_pmu.event_constraints = intel_westmere_event_constraints; 959 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, "); 960 pr_cont("Westmere events, ");
953 break; 961 break;
962
954 default: 963 default:
955 /* 964 /*
956 * default constraints for v2 and up 965 * default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 1ca5ba078afd..a4e67b99d91c 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -62,7 +62,7 @@ static void p6_pmu_disable_all(void)
62 62
63 /* p6 only has one enable register */ 63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val); 64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 65 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val); 66 wrmsrl(MSR_P6_EVNTSEL0, val);
67} 67}
68 68
@@ -72,7 +72,7 @@ static void p6_pmu_enable_all(void)
72 72
73 /* p6 only has one enable register */ 73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val); 74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 75 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val); 76 wrmsrl(MSR_P6_EVNTSEL0, val);
77} 77}
78 78
@@ -83,7 +83,7 @@ p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
83 u64 val = P6_NOP_EVENT; 83 u64 val = P6_NOP_EVENT;
84 84
85 if (cpuc->enabled) 85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 86 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
87 87
88 (void)checking_wrmsrl(hwc->config_base + idx, val); 88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89} 89}
@@ -95,7 +95,7 @@ static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
95 95
96 val = hwc->config; 96 val = hwc->config;
97 if (cpuc->enabled) 97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 98 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
99 99
100 (void)checking_wrmsrl(hwc->config_base + idx, val); 100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101} 101}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 74f4e85a5727..fb329e9f8494 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -680,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
680 cpu_nmi_set_wd_enabled(); 680 cpu_nmi_set_wd_enabled();
681 681
682 apic_write(APIC_LVTPC, APIC_DM_NMI); 682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0); 684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1; 686 return 1;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index dce99abb4496..d5e2a2ebb627 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -120,9 +120,15 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
120{ 120{
121#ifdef CONFIG_FRAME_POINTER 121#ifdef CONFIG_FRAME_POINTER
122 struct stack_frame *frame = (struct stack_frame *)bp; 122 struct stack_frame *frame = (struct stack_frame *)bp;
123 unsigned long next;
123 124
124 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) 125 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
125 return (unsigned long)frame->next_frame; 126 if (!probe_kernel_address(&frame->next_frame, next))
127 return next;
128 else
129 WARN_ONCE(1, "Perf: bad frame pointer = %p in "
130 "callchain\n", &frame->next_frame);
131 }
126#endif 132#endif
127 return bp; 133 return bp;
128} 134}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a966b753e496..740b440fbd73 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -12,21 +12,13 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/bootmem.h> 14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h> 15#include <linux/pfn.h>
21#include <linux/suspend.h> 16#include <linux/suspend.h>
22#include <linux/firmware-map.h> 17#include <linux/firmware-map.h>
23 18
24#include <asm/pgtable.h>
25#include <asm/page.h>
26#include <asm/e820.h> 19#include <asm/e820.h>
27#include <asm/proto.h> 20#include <asm/proto.h>
28#include <asm/setup.h> 21#include <asm/setup.h>
29#include <asm/trampoline.h>
30 22
31/* 23/*
32 * The e820 map is the map that gets modified e.g. with command line parameters 24 * The e820 map is the map that gets modified e.g. with command line parameters
@@ -730,319 +722,44 @@ core_initcall(e820_mark_nvs_memory);
730#endif 722#endif
731 723
732/* 724/*
733 * Early reserved memory areas. 725 * Find a free area with specified alignment in a specific range.
734 */
735#define MAX_EARLY_RES 32
736
737struct early_res {
738 u64 start, end;
739 char name[16];
740 char overlap_ok;
741};
742static struct early_res early_res[MAX_EARLY_RES] __initdata = {
743 { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
744#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
745 /*
746 * But first pinch a few for the stack/trampoline stuff
747 * FIXME: Don't need the extra page at 4K, but need to fix
748 * trampoline before removing it. (see the GDT stuff)
749 */
750 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
751#endif
752
753 {}
754};
755
756static int __init find_overlapped_early(u64 start, u64 end)
757{
758 int i;
759 struct early_res *r;
760
761 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
762 r = &early_res[i];
763 if (end > r->start && start < r->end)
764 break;
765 }
766
767 return i;
768}
769
770/*
771 * Drop the i-th range from the early reservation map,
772 * by copying any higher ranges down one over it, and
773 * clearing what had been the last slot.
774 */
775static void __init drop_range(int i)
776{
777 int j;
778
779 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
780 ;
781
782 memmove(&early_res[i], &early_res[i + 1],
783 (j - 1 - i) * sizeof(struct early_res));
784
785 early_res[j - 1].end = 0;
786}
787
788/*
789 * Split any existing ranges that:
790 * 1) are marked 'overlap_ok', and
791 * 2) overlap with the stated range [start, end)
792 * into whatever portion (if any) of the existing range is entirely
793 * below or entirely above the stated range. Drop the portion
794 * of the existing range that overlaps with the stated range,
795 * which will allow the caller of this routine to then add that
796 * stated range without conflicting with any existing range.
797 */ 726 */
798static void __init drop_overlaps_that_are_ok(u64 start, u64 end) 727u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
799{ 728{
800 int i; 729 int i;
801 struct early_res *r;
802 u64 lower_start, lower_end;
803 u64 upper_start, upper_end;
804 char name[16];
805 730
806 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 731 for (i = 0; i < e820.nr_map; i++) {
807 r = &early_res[i]; 732 struct e820entry *ei = &e820.map[i];
733 u64 addr;
734 u64 ei_start, ei_last;
808 735
809 /* Continue past non-overlapping ranges */ 736 if (ei->type != E820_RAM)
810 if (end <= r->start || start >= r->end)
811 continue; 737 continue;
812 738
813 /* 739 ei_last = ei->addr + ei->size;
814 * Leave non-ok overlaps as is; let caller 740 ei_start = ei->addr;
815 * panic "Overlapping early reservations" 741 addr = find_early_area(ei_start, ei_last, start, end,
816 * when it hits this overlap. 742 size, align);
817 */
818 if (!r->overlap_ok)
819 return;
820
821 /*
822 * We have an ok overlap. We will drop it from the early
823 * reservation map, and add back in any non-overlapping
824 * portions (lower or upper) as separate, overlap_ok,
825 * non-overlapping ranges.
826 */
827
828 /* 1. Note any non-overlapping (lower or upper) ranges. */
829 strncpy(name, r->name, sizeof(name) - 1);
830
831 lower_start = lower_end = 0;
832 upper_start = upper_end = 0;
833 if (r->start < start) {
834 lower_start = r->start;
835 lower_end = start;
836 }
837 if (r->end > end) {
838 upper_start = end;
839 upper_end = r->end;
840 }
841
842 /* 2. Drop the original ok overlapping range */
843 drop_range(i);
844
845 i--; /* resume for-loop on copied down entry */
846
847 /* 3. Add back in any non-overlapping ranges. */
848 if (lower_end)
849 reserve_early_overlap_ok(lower_start, lower_end, name);
850 if (upper_end)
851 reserve_early_overlap_ok(upper_start, upper_end, name);
852 }
853}
854
855static void __init __reserve_early(u64 start, u64 end, char *name,
856 int overlap_ok)
857{
858 int i;
859 struct early_res *r;
860
861 i = find_overlapped_early(start, end);
862 if (i >= MAX_EARLY_RES)
863 panic("Too many early reservations");
864 r = &early_res[i];
865 if (r->end)
866 panic("Overlapping early reservations "
867 "%llx-%llx %s to %llx-%llx %s\n",
868 start, end - 1, name?name:"", r->start,
869 r->end - 1, r->name);
870 r->start = start;
871 r->end = end;
872 r->overlap_ok = overlap_ok;
873 if (name)
874 strncpy(r->name, name, sizeof(r->name) - 1);
875}
876
877/*
878 * A few early reservtations come here.
879 *
880 * The 'overlap_ok' in the name of this routine does -not- mean it
881 * is ok for these reservations to overlap an earlier reservation.
882 * Rather it means that it is ok for subsequent reservations to
883 * overlap this one.
884 *
885 * Use this entry point to reserve early ranges when you are doing
886 * so out of "Paranoia", reserving perhaps more memory than you need,
887 * just in case, and don't mind a subsequent overlapping reservation
888 * that is known to be needed.
889 *
890 * The drop_overlaps_that_are_ok() call here isn't really needed.
891 * It would be needed if we had two colliding 'overlap_ok'
892 * reservations, so that the second such would not panic on the
893 * overlap with the first. We don't have any such as of this
894 * writing, but might as well tolerate such if it happens in
895 * the future.
896 */
897void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
898{
899 drop_overlaps_that_are_ok(start, end);
900 __reserve_early(start, end, name, 1);
901}
902
903/*
904 * Most early reservations come here.
905 *
906 * We first have drop_overlaps_that_are_ok() drop any pre-existing
907 * 'overlap_ok' ranges, so that we can then reserve this memory
908 * range without risk of panic'ing on an overlapping overlap_ok
909 * early reservation.
910 */
911void __init reserve_early(u64 start, u64 end, char *name)
912{
913 if (start >= end)
914 return;
915
916 drop_overlaps_that_are_ok(start, end);
917 __reserve_early(start, end, name, 0);
918}
919
920void __init free_early(u64 start, u64 end)
921{
922 struct early_res *r;
923 int i;
924
925 i = find_overlapped_early(start, end);
926 r = &early_res[i];
927 if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
928 panic("free_early on not reserved area: %llx-%llx!",
929 start, end - 1);
930
931 drop_range(i);
932}
933
934void __init early_res_to_bootmem(u64 start, u64 end)
935{
936 int i, count;
937 u64 final_start, final_end;
938
939 count = 0;
940 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
941 count++;
942
943 printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
944 count, start, end);
945 for (i = 0; i < count; i++) {
946 struct early_res *r = &early_res[i];
947 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
948 r->start, r->end, r->name);
949 final_start = max(start, r->start);
950 final_end = min(end, r->end);
951 if (final_start >= final_end) {
952 printk(KERN_CONT "\n");
953 continue;
954 }
955 printk(KERN_CONT " ==> [%010llx - %010llx]\n",
956 final_start, final_end);
957 reserve_bootmem_generic(final_start, final_end - final_start,
958 BOOTMEM_DEFAULT);
959 }
960}
961 743
962/* Check for already reserved areas */ 744 if (addr != -1ULL)
963static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 745 return addr;
964{
965 int i;
966 u64 addr = *addrp;
967 int changed = 0;
968 struct early_res *r;
969again:
970 i = find_overlapped_early(addr, addr + size);
971 r = &early_res[i];
972 if (i < MAX_EARLY_RES && r->end) {
973 *addrp = addr = round_up(r->end, align);
974 changed = 1;
975 goto again;
976 } 746 }
977 return changed; 747 return -1ULL;
978} 748}
979 749
980/* Check for already reserved areas */ 750u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
981static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
982{ 751{
983 int i; 752 return find_e820_area(start, end, size, align);
984 u64 addr = *addrp, last;
985 u64 size = *sizep;
986 int changed = 0;
987again:
988 last = addr + size;
989 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
990 struct early_res *r = &early_res[i];
991 if (last > r->start && addr < r->start) {
992 size = r->start - addr;
993 changed = 1;
994 goto again;
995 }
996 if (last > r->end && addr < r->end) {
997 addr = round_up(r->end, align);
998 size = last - addr;
999 changed = 1;
1000 goto again;
1001 }
1002 if (last <= r->end && addr >= r->start) {
1003 (*sizep)++;
1004 return 0;
1005 }
1006 }
1007 if (changed) {
1008 *addrp = addr;
1009 *sizep = size;
1010 }
1011 return changed;
1012} 753}
1013 754
1014/* 755u64 __init get_max_mapped(void)
1015 * Find a free area with specified alignment in a specific range.
1016 */
1017u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1018{ 756{
1019 int i; 757 u64 end = max_pfn_mapped;
1020 758
1021 for (i = 0; i < e820.nr_map; i++) { 759 end <<= PAGE_SHIFT;
1022 struct e820entry *ei = &e820.map[i];
1023 u64 addr, last;
1024 u64 ei_last;
1025 760
1026 if (ei->type != E820_RAM) 761 return end;
1027 continue;
1028 addr = round_up(ei->addr, align);
1029 ei_last = ei->addr + ei->size;
1030 if (addr < start)
1031 addr = round_up(start, align);
1032 if (addr >= ei_last)
1033 continue;
1034 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1035 ;
1036 last = addr + size;
1037 if (last > ei_last)
1038 continue;
1039 if (last > end)
1040 continue;
1041 return addr;
1042 }
1043 return -1ULL;
1044} 762}
1045
1046/* 763/*
1047 * Find next free range after *start 764 * Find next free range after *start
1048 */ 765 */
@@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
1052 769
1053 for (i = 0; i < e820.nr_map; i++) { 770 for (i = 0; i < e820.nr_map; i++) {
1054 struct e820entry *ei = &e820.map[i]; 771 struct e820entry *ei = &e820.map[i];
1055 u64 addr, last; 772 u64 addr;
1056 u64 ei_last; 773 u64 ei_start, ei_last;
1057 774
1058 if (ei->type != E820_RAM) 775 if (ei->type != E820_RAM)
1059 continue; 776 continue;
1060 addr = round_up(ei->addr, align); 777
1061 ei_last = ei->addr + ei->size; 778 ei_last = ei->addr + ei->size;
1062 if (addr < start) 779 ei_start = ei->addr;
1063 addr = round_up(start, align); 780 addr = find_early_area_size(ei_start, ei_last, start,
1064 if (addr >= ei_last) 781 sizep, align);
1065 continue; 782
1066 *sizep = ei_last - addr; 783 if (addr != -1ULL)
1067 while (bad_addr_size(&addr, sizep, align) && 784 return addr;
1068 addr + *sizep <= ei_last)
1069 ;
1070 last = addr + *sizep;
1071 if (last > ei_last)
1072 continue;
1073 return addr;
1074 } 785 }
1075 786
1076 return -1ULL; 787 return -1ULL;
@@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void)
1429 end = MAX_RESOURCE_SIZE; 1140 end = MAX_RESOURCE_SIZE;
1430 if (start >= end) 1141 if (start >= end)
1431 continue; 1142 continue;
1143 printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1144 start, end);
1432 reserve_region_with_split(&iomem_resource, start, end, 1145 reserve_region_with_split(&iomem_resource, start, end,
1433 "RAM buffer"); 1146 "RAM buffer");
1434 } 1147 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 5051b94c9069..adedeef1dedc 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void)
29 29
30void __init i386_start_kernel(void) 30void __init i386_start_kernel(void)
31{ 31{
32#ifdef CONFIG_X86_TRAMPOLINE
33 /*
34 * But first pinch a few for the stack/trampoline stuff
35 * FIXME: Don't need the extra page at 4K, but need to fix
36 * trampoline before removing it. (see the GDT stuff)
37 */
38 reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
39 "EX TRAMPOLINE");
40#endif
41
32 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 42 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
33 43
34#ifdef CONFIG_BLK_DEV_INITRD 44#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7fd318bac59c..37c3d4b17d85 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP
442 */ 442 */
443 cmpb $0,ready 443 cmpb $0,ready
444 jne 1f 444 jne 1f
445 movl $per_cpu__gdt_page,%eax 445 movl $gdt_page,%eax
446 movl $per_cpu__stack_canary,%ecx 446 movl $stack_canary,%ecx
447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
448 shrl $16, %ecx 448 shrl $16, %ecx
449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -706,7 +706,7 @@ idt_descr:
706 .word 0 # 32 bit align gdt_desc.address 706 .word 0 # 32 bit align gdt_desc.address
707ENTRY(early_gdt_descr) 707ENTRY(early_gdt_descr)
708 .word GDT_ENTRIES*8-1 708 .word GDT_ENTRIES*8-1
709 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 709 .long gdt_page /* Overwritten for secondary CPUs */
710 710
711/* 711/*
712 * The boot_gdt must mirror the equivalent in setup.S and is 712 * The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2d8b5035371c..3d1e6f16b7a6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -27,7 +27,7 @@
27#define GET_CR2_INTO_RCX movq %cr2, %rcx 27#define GET_CR2_INTO_RCX movq %cr2, %rcx
28#endif 28#endif
29 29
30/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 30/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
31 * because we need identity-mapped pages. 31 * because we need identity-mapped pages.
32 * 32 *
33 */ 33 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad80a1c718c6..ee4fa1bfcb33 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -266,7 +266,7 @@ static void hpet_resume_device(void)
266 force_hpet_resume(); 266 force_hpet_resume();
267} 267}
268 268
269static void hpet_resume_counter(void) 269static void hpet_resume_counter(struct clocksource *cs)
270{ 270{
271 hpet_resume_device(); 271 hpet_resume_device();
272 hpet_restart_counter(); 272 hpet_restart_counter();
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index dca2802c666f..d6cc065f519f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -344,13 +344,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
344 } 344 }
345 345
346 /* 346 /*
347 * For kernel-addresses, either the address or symbol name can be
348 * specified.
349 */
350 if (info->name)
351 info->address = (unsigned long)
352 kallsyms_lookup_name(info->name);
353 /*
354 * Check that the low-order bits of the address are appropriate 347 * Check that the low-order bits of the address are appropriate
355 * for the alignment implied by len. 348 * for the alignment implied by len.
356 */ 349 */
@@ -535,8 +528,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
535{ 528{
536 /* TODO */ 529 /* TODO */
537} 530}
538
539void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
540{
541 /* TODO */
542}
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index df89102bef80..fb725ee15f55 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,8 +32,14 @@
32 */ 32 */
33 33
34static int i8259A_auto_eoi; 34static int i8259A_auto_eoi;
35DEFINE_SPINLOCK(i8259A_lock); 35DEFINE_RAW_SPINLOCK(i8259A_lock);
36static void mask_and_ack_8259A(unsigned int); 36static void mask_and_ack_8259A(unsigned int);
37static void mask_8259A(void);
38static void unmask_8259A(void);
39static void disable_8259A_irq(unsigned int irq);
40static void enable_8259A_irq(unsigned int irq);
41static void init_8259A(int auto_eoi);
42static int i8259A_irq_pending(unsigned int irq);
37 43
38struct irq_chip i8259A_chip = { 44struct irq_chip i8259A_chip = {
39 .name = "XT-PIC", 45 .name = "XT-PIC",
@@ -63,51 +69,51 @@ unsigned int cached_irq_mask = 0xffff;
63 */ 69 */
64unsigned long io_apic_irqs; 70unsigned long io_apic_irqs;
65 71
66void disable_8259A_irq(unsigned int irq) 72static void disable_8259A_irq(unsigned int irq)
67{ 73{
68 unsigned int mask = 1 << irq; 74 unsigned int mask = 1 << irq;
69 unsigned long flags; 75 unsigned long flags;
70 76
71 spin_lock_irqsave(&i8259A_lock, flags); 77 raw_spin_lock_irqsave(&i8259A_lock, flags);
72 cached_irq_mask |= mask; 78 cached_irq_mask |= mask;
73 if (irq & 8) 79 if (irq & 8)
74 outb(cached_slave_mask, PIC_SLAVE_IMR); 80 outb(cached_slave_mask, PIC_SLAVE_IMR);
75 else 81 else
76 outb(cached_master_mask, PIC_MASTER_IMR); 82 outb(cached_master_mask, PIC_MASTER_IMR);
77 spin_unlock_irqrestore(&i8259A_lock, flags); 83 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
78} 84}
79 85
80void enable_8259A_irq(unsigned int irq) 86static void enable_8259A_irq(unsigned int irq)
81{ 87{
82 unsigned int mask = ~(1 << irq); 88 unsigned int mask = ~(1 << irq);
83 unsigned long flags; 89 unsigned long flags;
84 90
85 spin_lock_irqsave(&i8259A_lock, flags); 91 raw_spin_lock_irqsave(&i8259A_lock, flags);
86 cached_irq_mask &= mask; 92 cached_irq_mask &= mask;
87 if (irq & 8) 93 if (irq & 8)
88 outb(cached_slave_mask, PIC_SLAVE_IMR); 94 outb(cached_slave_mask, PIC_SLAVE_IMR);
89 else 95 else
90 outb(cached_master_mask, PIC_MASTER_IMR); 96 outb(cached_master_mask, PIC_MASTER_IMR);
91 spin_unlock_irqrestore(&i8259A_lock, flags); 97 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
92} 98}
93 99
94int i8259A_irq_pending(unsigned int irq) 100static int i8259A_irq_pending(unsigned int irq)
95{ 101{
96 unsigned int mask = 1<<irq; 102 unsigned int mask = 1<<irq;
97 unsigned long flags; 103 unsigned long flags;
98 int ret; 104 int ret;
99 105
100 spin_lock_irqsave(&i8259A_lock, flags); 106 raw_spin_lock_irqsave(&i8259A_lock, flags);
101 if (irq < 8) 107 if (irq < 8)
102 ret = inb(PIC_MASTER_CMD) & mask; 108 ret = inb(PIC_MASTER_CMD) & mask;
103 else 109 else
104 ret = inb(PIC_SLAVE_CMD) & (mask >> 8); 110 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
105 spin_unlock_irqrestore(&i8259A_lock, flags); 111 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
106 112
107 return ret; 113 return ret;
108} 114}
109 115
110void make_8259A_irq(unsigned int irq) 116static void make_8259A_irq(unsigned int irq)
111{ 117{
112 disable_irq_nosync(irq); 118 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 119 io_apic_irqs &= ~(1<<irq);
@@ -150,7 +156,7 @@ static void mask_and_ack_8259A(unsigned int irq)
150 unsigned int irqmask = 1 << irq; 156 unsigned int irqmask = 1 << irq;
151 unsigned long flags; 157 unsigned long flags;
152 158
153 spin_lock_irqsave(&i8259A_lock, flags); 159 raw_spin_lock_irqsave(&i8259A_lock, flags);
154 /* 160 /*
155 * Lightweight spurious IRQ detection. We do not want 161 * Lightweight spurious IRQ detection. We do not want
156 * to overdo spurious IRQ handling - it's usually a sign 162 * to overdo spurious IRQ handling - it's usually a sign
@@ -183,7 +189,7 @@ handle_real_irq:
183 outb(cached_master_mask, PIC_MASTER_IMR); 189 outb(cached_master_mask, PIC_MASTER_IMR);
184 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ 190 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
185 } 191 }
186 spin_unlock_irqrestore(&i8259A_lock, flags); 192 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
187 return; 193 return;
188 194
189spurious_8259A_irq: 195spurious_8259A_irq:
@@ -281,37 +287,37 @@ static int __init i8259A_init_sysfs(void)
281 287
282device_initcall(i8259A_init_sysfs); 288device_initcall(i8259A_init_sysfs);
283 289
284void mask_8259A(void) 290static void mask_8259A(void)
285{ 291{
286 unsigned long flags; 292 unsigned long flags;
287 293
288 spin_lock_irqsave(&i8259A_lock, flags); 294 raw_spin_lock_irqsave(&i8259A_lock, flags);
289 295
290 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 296 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
291 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 297 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
292 298
293 spin_unlock_irqrestore(&i8259A_lock, flags); 299 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
294} 300}
295 301
296void unmask_8259A(void) 302static void unmask_8259A(void)
297{ 303{
298 unsigned long flags; 304 unsigned long flags;
299 305
300 spin_lock_irqsave(&i8259A_lock, flags); 306 raw_spin_lock_irqsave(&i8259A_lock, flags);
301 307
302 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 308 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
303 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 309 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
304 310
305 spin_unlock_irqrestore(&i8259A_lock, flags); 311 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
306} 312}
307 313
308void init_8259A(int auto_eoi) 314static void init_8259A(int auto_eoi)
309{ 315{
310 unsigned long flags; 316 unsigned long flags;
311 317
312 i8259A_auto_eoi = auto_eoi; 318 i8259A_auto_eoi = auto_eoi;
313 319
314 spin_lock_irqsave(&i8259A_lock, flags); 320 raw_spin_lock_irqsave(&i8259A_lock, flags);
315 321
316 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 322 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
317 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 323 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
@@ -356,5 +362,49 @@ void init_8259A(int auto_eoi)
356 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 362 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
357 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 363 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
358 364
359 spin_unlock_irqrestore(&i8259A_lock, flags); 365 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
360} 366}
367
368/*
369 * make i8259 a driver so that we can select pic functions at run time. the goal
370 * is to make x86 binary compatible among pc compatible and non-pc compatible
371 * platforms, such as x86 MID.
372 */
373
374static void legacy_pic_noop(void) { };
375static void legacy_pic_uint_noop(unsigned int unused) { };
376static void legacy_pic_int_noop(int unused) { };
377
378static struct irq_chip dummy_pic_chip = {
379 .name = "dummy pic",
380 .mask = legacy_pic_uint_noop,
381 .unmask = legacy_pic_uint_noop,
382 .disable = legacy_pic_uint_noop,
383 .mask_ack = legacy_pic_uint_noop,
384};
385static int legacy_pic_irq_pending_noop(unsigned int irq)
386{
387 return 0;
388}
389
390struct legacy_pic null_legacy_pic = {
391 .nr_legacy_irqs = 0,
392 .chip = &dummy_pic_chip,
393 .mask_all = legacy_pic_noop,
394 .restore_mask = legacy_pic_noop,
395 .init = legacy_pic_int_noop,
396 .irq_pending = legacy_pic_irq_pending_noop,
397 .make_irq = legacy_pic_uint_noop,
398};
399
400struct legacy_pic default_legacy_pic = {
401 .nr_legacy_irqs = NR_IRQS_LEGACY,
402 .chip = &i8259A_chip,
403 .mask_all = mask_8259A,
404 .restore_mask = unmask_8259A,
405 .init = init_8259A,
406 .irq_pending = i8259A_irq_pending,
407 .make_irq = make_8259A_irq,
408};
409
410struct legacy_pic *legacy_pic = &default_legacy_pic;
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index d5932226614f..ef257fc2921b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -84,24 +84,7 @@ static struct irqaction irq2 = {
84}; 84};
85 85
86DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 86DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
87 [0 ... IRQ0_VECTOR - 1] = -1, 87 [0 ... NR_VECTORS - 1] = -1,
88 [IRQ0_VECTOR] = 0,
89 [IRQ1_VECTOR] = 1,
90 [IRQ2_VECTOR] = 2,
91 [IRQ3_VECTOR] = 3,
92 [IRQ4_VECTOR] = 4,
93 [IRQ5_VECTOR] = 5,
94 [IRQ6_VECTOR] = 6,
95 [IRQ7_VECTOR] = 7,
96 [IRQ8_VECTOR] = 8,
97 [IRQ9_VECTOR] = 9,
98 [IRQ10_VECTOR] = 10,
99 [IRQ11_VECTOR] = 11,
100 [IRQ12_VECTOR] = 12,
101 [IRQ13_VECTOR] = 13,
102 [IRQ14_VECTOR] = 14,
103 [IRQ15_VECTOR] = 15,
104 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
105}; 88};
106 89
107int vector_used_by_percpu_irq(unsigned int vector) 90int vector_used_by_percpu_irq(unsigned int vector)
@@ -123,12 +106,12 @@ void __init init_ISA_irqs(void)
123#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 106#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
124 init_bsp_APIC(); 107 init_bsp_APIC();
125#endif 108#endif
126 init_8259A(0); 109 legacy_pic->init(0);
127 110
128 /* 111 /*
129 * 16 old-style INTA-cycle interrupts: 112 * 16 old-style INTA-cycle interrupts:
130 */ 113 */
131 for (i = 0; i < NR_IRQS_LEGACY; i++) { 114 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
132 struct irq_desc *desc = irq_to_desc(i); 115 struct irq_desc *desc = irq_to_desc(i);
133 116
134 desc->status = IRQ_DISABLED; 117 desc->status = IRQ_DISABLED;
@@ -142,6 +125,19 @@ void __init init_ISA_irqs(void)
142 125
143void __init init_IRQ(void) 126void __init init_IRQ(void)
144{ 127{
128 int i;
129
130 /*
131 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
132 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
133 * then this configuration will likely be static after the boot. If
134 * these IRQ's are handled by more mordern controllers like IO-APIC,
135 * then this vector space can be freed and re-used dynamically as the
136 * irq's migrate etc.
137 */
138 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
139 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
140
145 x86_init.irqs.intr_init(); 141 x86_init.irqs.intr_init();
146} 142}
147 143
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a9c3fd..b43bbaebe2c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h> 51#include <linux/kallsyms.h>
52#include <linux/ftrace.h>
52 53
53#include <asm/cacheflush.h> 54#include <asm/cacheflush.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
106}; 107};
107const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 108const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
108 109
109/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 110static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
110static void __kprobes set_jmp_op(void *from, void *to)
111{ 111{
112 struct __arch_jmp_op { 112 struct __arch_relative_insn {
113 char op; 113 u8 op;
114 s32 raddr; 114 s32 raddr;
115 } __attribute__((packed)) * jop; 115 } __attribute__((packed)) *insn;
116 jop = (struct __arch_jmp_op *)from; 116
117 jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); 117 insn = (struct __arch_relative_insn *)from;
118 jop->op = RELATIVEJUMP_INSTRUCTION; 118 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
119 insn->op = op;
120}
121
122/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
123static void __kprobes synthesize_reljump(void *from, void *to)
124{
125 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
119} 126}
120 127
121/* 128/*
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
202 /* 209 /*
203 * Basically, kp->ainsn.insn has an original instruction. 210 * Basically, kp->ainsn.insn has an original instruction.
204 * However, RIP-relative instruction can not do single-stepping 211 * However, RIP-relative instruction can not do single-stepping
205 * at different place, fix_riprel() tweaks the displacement of 212 * at different place, __copy_instruction() tweaks the displacement of
206 * that instruction. In that case, we can't recover the instruction 213 * that instruction. In that case, we can't recover the instruction
207 * from the kp->ainsn.insn. 214 * from the kp->ainsn.insn.
208 * 215 *
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
284} 291}
285 292
286/* 293/*
287 * Adjust the displacement if the instruction uses the %rip-relative 294 * Copy an instruction and adjust the displacement if the instruction
288 * addressing mode. 295 * uses the %rip-relative addressing mode.
289 * If it does, Return the address of the 32-bit displacement word. 296 * If it does, Return the address of the 32-bit displacement word.
290 * If not, return null. 297 * If not, return null.
291 * Only applicable to 64-bit x86. 298 * Only applicable to 64-bit x86.
292 */ 299 */
293static void __kprobes fix_riprel(struct kprobe *p) 300static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
294{ 301{
295#ifdef CONFIG_X86_64
296 struct insn insn; 302 struct insn insn;
297 kernel_insn_init(&insn, p->ainsn.insn); 303 int ret;
304 kprobe_opcode_t buf[MAX_INSN_SIZE];
298 305
306 kernel_insn_init(&insn, src);
307 if (recover) {
308 insn_get_opcode(&insn);
309 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
310 ret = recover_probed_instruction(buf,
311 (unsigned long)src);
312 if (ret)
313 return 0;
314 kernel_insn_init(&insn, buf);
315 }
316 }
317 insn_get_length(&insn);
318 memcpy(dest, insn.kaddr, insn.length);
319
320#ifdef CONFIG_X86_64
299 if (insn_rip_relative(&insn)) { 321 if (insn_rip_relative(&insn)) {
300 s64 newdisp; 322 s64 newdisp;
301 u8 *disp; 323 u8 *disp;
324 kernel_insn_init(&insn, dest);
302 insn_get_displacement(&insn); 325 insn_get_displacement(&insn);
303 /* 326 /*
304 * The copied instruction uses the %rip-relative addressing 327 * The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
312 * extension of the original signed 32-bit displacement would 335 * extension of the original signed 32-bit displacement would
313 * have given. 336 * have given.
314 */ 337 */
315 newdisp = (u8 *) p->addr + (s64) insn.displacement.value - 338 newdisp = (u8 *) src + (s64) insn.displacement.value -
316 (u8 *) p->ainsn.insn; 339 (u8 *) dest;
317 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 340 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
318 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); 341 disp = (u8 *) dest + insn_offset_displacement(&insn);
319 *(s32 *) disp = (s32) newdisp; 342 *(s32 *) disp = (s32) newdisp;
320 } 343 }
321#endif 344#endif
345 return insn.length;
322} 346}
323 347
324static void __kprobes arch_copy_kprobe(struct kprobe *p) 348static void __kprobes arch_copy_kprobe(struct kprobe *p)
325{ 349{
326 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 350 /*
327 351 * Copy an instruction without recovering int3, because it will be
328 fix_riprel(p); 352 * put by another subsystem.
353 */
354 __copy_instruction(p->ainsn.insn, p->addr, 0);
329 355
330 if (can_boost(p->addr)) 356 if (can_boost(p->addr))
331 p->ainsn.boostable = 0; 357 p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void)
406 update_debugctlmsr(current->thread.debugctlmsr); 432 update_debugctlmsr(current->thread.debugctlmsr);
407} 433}
408 434
409static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
410{
411 clear_btf();
412 regs->flags |= X86_EFLAGS_TF;
413 regs->flags &= ~X86_EFLAGS_IF;
414 /* single step inline if the instruction is an int3 */
415 if (p->opcode == BREAKPOINT_INSTRUCTION)
416 regs->ip = (unsigned long)p->addr;
417 else
418 regs->ip = (unsigned long)p->ainsn.insn;
419}
420
421void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
422 struct pt_regs *regs) 436 struct pt_regs *regs)
423{ 437{
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429 *sara = (unsigned long) &kretprobe_trampoline; 443 *sara = (unsigned long) &kretprobe_trampoline;
430} 444}
431 445
446#ifdef CONFIG_OPTPROBES
447static int __kprobes setup_detour_execution(struct kprobe *p,
448 struct pt_regs *regs,
449 int reenter);
450#else
451#define setup_detour_execution(p, regs, reenter) (0)
452#endif
453
432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 454static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
433 struct kprobe_ctlblk *kcb) 455 struct kprobe_ctlblk *kcb, int reenter)
434{ 456{
457 if (setup_detour_execution(p, regs, reenter))
458 return;
459
435#if !defined(CONFIG_PREEMPT) 460#if !defined(CONFIG_PREEMPT)
436 if (p->ainsn.boostable == 1 && !p->post_handler) { 461 if (p->ainsn.boostable == 1 && !p->post_handler) {
437 /* Boost up -- we can execute copied instructions directly */ 462 /* Boost up -- we can execute copied instructions directly */
438 reset_current_kprobe(); 463 if (!reenter)
464 reset_current_kprobe();
465 /*
466 * Reentering boosted probe doesn't reset current_kprobe,
467 * nor set current_kprobe, because it doesn't use single
468 * stepping.
469 */
439 regs->ip = (unsigned long)p->ainsn.insn; 470 regs->ip = (unsigned long)p->ainsn.insn;
440 preempt_enable_no_resched(); 471 preempt_enable_no_resched();
441 return; 472 return;
442 } 473 }
443#endif 474#endif
444 prepare_singlestep(p, regs); 475 if (reenter) {
445 kcb->kprobe_status = KPROBE_HIT_SS; 476 save_previous_kprobe(kcb);
477 set_current_kprobe(p, regs, kcb);
478 kcb->kprobe_status = KPROBE_REENTER;
479 } else
480 kcb->kprobe_status = KPROBE_HIT_SS;
481 /* Prepare real single stepping */
482 clear_btf();
483 regs->flags |= X86_EFLAGS_TF;
484 regs->flags &= ~X86_EFLAGS_IF;
485 /* single step inline if the instruction is an int3 */
486 if (p->opcode == BREAKPOINT_INSTRUCTION)
487 regs->ip = (unsigned long)p->addr;
488 else
489 regs->ip = (unsigned long)p->ainsn.insn;
446} 490}
447 491
448/* 492/*
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
456 switch (kcb->kprobe_status) { 500 switch (kcb->kprobe_status) {
457 case KPROBE_HIT_SSDONE: 501 case KPROBE_HIT_SSDONE:
458 case KPROBE_HIT_ACTIVE: 502 case KPROBE_HIT_ACTIVE:
459 save_previous_kprobe(kcb);
460 set_current_kprobe(p, regs, kcb);
461 kprobes_inc_nmissed_count(p); 503 kprobes_inc_nmissed_count(p);
462 prepare_singlestep(p, regs); 504 setup_singlestep(p, regs, kcb, 1);
463 kcb->kprobe_status = KPROBE_REENTER;
464 break; 505 break;
465 case KPROBE_HIT_SS: 506 case KPROBE_HIT_SS:
466 /* A probe has been hit in the codepath leading up to, or just 507 /* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
535 * more here. 576 * more here.
536 */ 577 */
537 if (!p->pre_handler || !p->pre_handler(p, regs)) 578 if (!p->pre_handler || !p->pre_handler(p, regs))
538 setup_singlestep(p, regs, kcb); 579 setup_singlestep(p, regs, kcb, 0);
539 return 1; 580 return 1;
540 } 581 }
541 } else if (kprobe_running()) { 582 } else if (kprobe_running()) {
542 p = __get_cpu_var(current_kprobe); 583 p = __get_cpu_var(current_kprobe);
543 if (p->break_handler && p->break_handler(p, regs)) { 584 if (p->break_handler && p->break_handler(p, regs)) {
544 setup_singlestep(p, regs, kcb); 585 setup_singlestep(p, regs, kcb, 0);
545 return 1; 586 return 1;
546 } 587 }
547 } /* else: not a kprobe fault; let the kernel handle it */ 588 } /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
550 return 0; 591 return 0;
551} 592}
552 593
594#ifdef CONFIG_X86_64
595#define SAVE_REGS_STRING \
596 /* Skip cs, ip, orig_ax. */ \
597 " subq $24, %rsp\n" \
598 " pushq %rdi\n" \
599 " pushq %rsi\n" \
600 " pushq %rdx\n" \
601 " pushq %rcx\n" \
602 " pushq %rax\n" \
603 " pushq %r8\n" \
604 " pushq %r9\n" \
605 " pushq %r10\n" \
606 " pushq %r11\n" \
607 " pushq %rbx\n" \
608 " pushq %rbp\n" \
609 " pushq %r12\n" \
610 " pushq %r13\n" \
611 " pushq %r14\n" \
612 " pushq %r15\n"
613#define RESTORE_REGS_STRING \
614 " popq %r15\n" \
615 " popq %r14\n" \
616 " popq %r13\n" \
617 " popq %r12\n" \
618 " popq %rbp\n" \
619 " popq %rbx\n" \
620 " popq %r11\n" \
621 " popq %r10\n" \
622 " popq %r9\n" \
623 " popq %r8\n" \
624 " popq %rax\n" \
625 " popq %rcx\n" \
626 " popq %rdx\n" \
627 " popq %rsi\n" \
628 " popq %rdi\n" \
629 /* Skip orig_ax, ip, cs */ \
630 " addq $24, %rsp\n"
631#else
632#define SAVE_REGS_STRING \
633 /* Skip cs, ip, orig_ax and gs. */ \
634 " subl $16, %esp\n" \
635 " pushl %fs\n" \
636 " pushl %ds\n" \
637 " pushl %es\n" \
638 " pushl %eax\n" \
639 " pushl %ebp\n" \
640 " pushl %edi\n" \
641 " pushl %esi\n" \
642 " pushl %edx\n" \
643 " pushl %ecx\n" \
644 " pushl %ebx\n"
645#define RESTORE_REGS_STRING \
646 " popl %ebx\n" \
647 " popl %ecx\n" \
648 " popl %edx\n" \
649 " popl %esi\n" \
650 " popl %edi\n" \
651 " popl %ebp\n" \
652 " popl %eax\n" \
653 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
654 " addl $24, %esp\n"
655#endif
656
553/* 657/*
554 * When a retprobed function returns, this code saves registers and 658 * When a retprobed function returns, this code saves registers and
555 * calls trampoline_handler() runs, which calls the kretprobe's handler. 659 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
563 /* We don't bother saving the ss register */ 667 /* We don't bother saving the ss register */
564 " pushq %rsp\n" 668 " pushq %rsp\n"
565 " pushfq\n" 669 " pushfq\n"
566 /* 670 SAVE_REGS_STRING
567 * Skip cs, ip, orig_ax.
568 * trampoline_handler() will plug in these values
569 */
570 " subq $24, %rsp\n"
571 " pushq %rdi\n"
572 " pushq %rsi\n"
573 " pushq %rdx\n"
574 " pushq %rcx\n"
575 " pushq %rax\n"
576 " pushq %r8\n"
577 " pushq %r9\n"
578 " pushq %r10\n"
579 " pushq %r11\n"
580 " pushq %rbx\n"
581 " pushq %rbp\n"
582 " pushq %r12\n"
583 " pushq %r13\n"
584 " pushq %r14\n"
585 " pushq %r15\n"
586 " movq %rsp, %rdi\n" 671 " movq %rsp, %rdi\n"
587 " call trampoline_handler\n" 672 " call trampoline_handler\n"
588 /* Replace saved sp with true return address. */ 673 /* Replace saved sp with true return address. */
589 " movq %rax, 152(%rsp)\n" 674 " movq %rax, 152(%rsp)\n"
590 " popq %r15\n" 675 RESTORE_REGS_STRING
591 " popq %r14\n"
592 " popq %r13\n"
593 " popq %r12\n"
594 " popq %rbp\n"
595 " popq %rbx\n"
596 " popq %r11\n"
597 " popq %r10\n"
598 " popq %r9\n"
599 " popq %r8\n"
600 " popq %rax\n"
601 " popq %rcx\n"
602 " popq %rdx\n"
603 " popq %rsi\n"
604 " popq %rdi\n"
605 /* Skip orig_ax, ip, cs */
606 " addq $24, %rsp\n"
607 " popfq\n" 676 " popfq\n"
608#else 677#else
609 " pushf\n" 678 " pushf\n"
610 /* 679 SAVE_REGS_STRING
611 * Skip cs, ip, orig_ax and gs.
612 * trampoline_handler() will plug in these values
613 */
614 " subl $16, %esp\n"
615 " pushl %fs\n"
616 " pushl %es\n"
617 " pushl %ds\n"
618 " pushl %eax\n"
619 " pushl %ebp\n"
620 " pushl %edi\n"
621 " pushl %esi\n"
622 " pushl %edx\n"
623 " pushl %ecx\n"
624 " pushl %ebx\n"
625 " movl %esp, %eax\n" 680 " movl %esp, %eax\n"
626 " call trampoline_handler\n" 681 " call trampoline_handler\n"
627 /* Move flags to cs */ 682 /* Move flags to cs */
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
629 " movl %edx, 52(%esp)\n" 684 " movl %edx, 52(%esp)\n"
630 /* Replace saved flags with true return address. */ 685 /* Replace saved flags with true return address. */
631 " movl %eax, 56(%esp)\n" 686 " movl %eax, 56(%esp)\n"
632 " popl %ebx\n" 687 RESTORE_REGS_STRING
633 " popl %ecx\n"
634 " popl %edx\n"
635 " popl %esi\n"
636 " popl %edi\n"
637 " popl %ebp\n"
638 " popl %eax\n"
639 /* Skip ds, es, fs, gs, orig_ax and ip */
640 " addl $24, %esp\n"
641 " popf\n" 688 " popf\n"
642#endif 689#endif
643 " ret\n"); 690 " ret\n");
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p,
805 * These instructions can be executed directly if it 852 * These instructions can be executed directly if it
806 * jumps back to correct address. 853 * jumps back to correct address.
807 */ 854 */
808 set_jmp_op((void *)regs->ip, 855 synthesize_reljump((void *)regs->ip,
809 (void *)orig_ip + (regs->ip - copy_ip)); 856 (void *)orig_ip + (regs->ip - copy_ip));
810 p->ainsn.boostable = 1; 857 p->ainsn.boostable = 1;
811 } else { 858 } else {
812 p->ainsn.boostable = -1; 859 p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1033 return 0; 1080 return 0;
1034} 1081}
1035 1082
1083
1084#ifdef CONFIG_OPTPROBES
1085
1086/* Insert a call instruction at address 'from', which calls address 'to'.*/
1087static void __kprobes synthesize_relcall(void *from, void *to)
1088{
1089 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1090}
1091
1092/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1093static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1094 unsigned long val)
1095{
1096#ifdef CONFIG_X86_64
1097 *addr++ = 0x48;
1098 *addr++ = 0xbf;
1099#else
1100 *addr++ = 0xb8;
1101#endif
1102 *(unsigned long *)addr = val;
1103}
1104
1105void __kprobes kprobes_optinsn_template_holder(void)
1106{
1107 asm volatile (
1108 ".global optprobe_template_entry\n"
1109 "optprobe_template_entry: \n"
1110#ifdef CONFIG_X86_64
1111 /* We don't bother saving the ss register */
1112 " pushq %rsp\n"
1113 " pushfq\n"
1114 SAVE_REGS_STRING
1115 " movq %rsp, %rsi\n"
1116 ".global optprobe_template_val\n"
1117 "optprobe_template_val: \n"
1118 ASM_NOP5
1119 ASM_NOP5
1120 ".global optprobe_template_call\n"
1121 "optprobe_template_call: \n"
1122 ASM_NOP5
1123 /* Move flags to rsp */
1124 " movq 144(%rsp), %rdx\n"
1125 " movq %rdx, 152(%rsp)\n"
1126 RESTORE_REGS_STRING
1127 /* Skip flags entry */
1128 " addq $8, %rsp\n"
1129 " popfq\n"
1130#else /* CONFIG_X86_32 */
1131 " pushf\n"
1132 SAVE_REGS_STRING
1133 " movl %esp, %edx\n"
1134 ".global optprobe_template_val\n"
1135 "optprobe_template_val: \n"
1136 ASM_NOP5
1137 ".global optprobe_template_call\n"
1138 "optprobe_template_call: \n"
1139 ASM_NOP5
1140 RESTORE_REGS_STRING
1141 " addl $4, %esp\n" /* skip cs */
1142 " popf\n"
1143#endif
1144 ".global optprobe_template_end\n"
1145 "optprobe_template_end: \n");
1146}
1147
1148#define TMPL_MOVE_IDX \
1149 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1150#define TMPL_CALL_IDX \
1151 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1152#define TMPL_END_IDX \
1153 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1154
1155#define INT3_SIZE sizeof(kprobe_opcode_t)
1156
1157/* Optimized kprobe call back function: called from optinsn */
1158static void __kprobes optimized_callback(struct optimized_kprobe *op,
1159 struct pt_regs *regs)
1160{
1161 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1162
1163 preempt_disable();
1164 if (kprobe_running()) {
1165 kprobes_inc_nmissed_count(&op->kp);
1166 } else {
1167 /* Save skipped registers */
1168#ifdef CONFIG_X86_64
1169 regs->cs = __KERNEL_CS;
1170#else
1171 regs->cs = __KERNEL_CS | get_kernel_rpl();
1172 regs->gs = 0;
1173#endif
1174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1175 regs->orig_ax = ~0UL;
1176
1177 __get_cpu_var(current_kprobe) = &op->kp;
1178 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1179 opt_pre_handler(&op->kp, regs);
1180 __get_cpu_var(current_kprobe) = NULL;
1181 }
1182 preempt_enable_no_resched();
1183}
1184
1185static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1186{
1187 int len = 0, ret;
1188
1189 while (len < RELATIVEJUMP_SIZE) {
1190 ret = __copy_instruction(dest + len, src + len, 1);
1191 if (!ret || !can_boost(dest + len))
1192 return -EINVAL;
1193 len += ret;
1194 }
1195 /* Check whether the address range is reserved */
1196 if (ftrace_text_reserved(src, src + len - 1) ||
1197 alternatives_text_reserved(src, src + len - 1))
1198 return -EBUSY;
1199
1200 return len;
1201}
1202
1203/* Check whether insn is indirect jump */
1204static int __kprobes insn_is_indirect_jump(struct insn *insn)
1205{
1206 return ((insn->opcode.bytes[0] == 0xff &&
1207 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1208 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1209}
1210
1211/* Check whether insn jumps into specified address range */
1212static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1213{
1214 unsigned long target = 0;
1215
1216 switch (insn->opcode.bytes[0]) {
1217 case 0xe0: /* loopne */
1218 case 0xe1: /* loope */
1219 case 0xe2: /* loop */
1220 case 0xe3: /* jcxz */
1221 case 0xe9: /* near relative jump */
1222 case 0xeb: /* short relative jump */
1223 break;
1224 case 0x0f:
1225 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1226 break;
1227 return 0;
1228 default:
1229 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1230 break;
1231 return 0;
1232 }
1233 target = (unsigned long)insn->next_byte + insn->immediate.value;
1234
1235 return (start <= target && target <= start + len);
1236}
1237
1238/* Decode whole function to ensure any instructions don't jump into target */
1239static int __kprobes can_optimize(unsigned long paddr)
1240{
1241 int ret;
1242 unsigned long addr, size = 0, offset = 0;
1243 struct insn insn;
1244 kprobe_opcode_t buf[MAX_INSN_SIZE];
1245 /* Dummy buffers for lookup_symbol_attrs */
1246 static char __dummy_buf[KSYM_NAME_LEN];
1247
1248 /* Lookup symbol including addr */
1249 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
1250 return 0;
1251
1252 /* Check there is enough space for a relative jump. */
1253 if (size - offset < RELATIVEJUMP_SIZE)
1254 return 0;
1255
1256 /* Decode instructions */
1257 addr = paddr - offset;
1258 while (addr < paddr - offset + size) { /* Decode until function end */
1259 if (search_exception_tables(addr))
1260 /*
1261 * Since some fixup code will jumps into this function,
1262 * we can't optimize kprobe in this function.
1263 */
1264 return 0;
1265 kernel_insn_init(&insn, (void *)addr);
1266 insn_get_opcode(&insn);
1267 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1268 ret = recover_probed_instruction(buf, addr);
1269 if (ret)
1270 return 0;
1271 kernel_insn_init(&insn, buf);
1272 }
1273 insn_get_length(&insn);
1274 /* Recover address */
1275 insn.kaddr = (void *)addr;
1276 insn.next_byte = (void *)(addr + insn.length);
1277 /* Check any instructions don't jump into target */
1278 if (insn_is_indirect_jump(&insn) ||
1279 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1280 RELATIVE_ADDR_SIZE))
1281 return 0;
1282 addr += insn.length;
1283 }
1284
1285 return 1;
1286}
1287
1288/* Check optimized_kprobe can actually be optimized. */
1289int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1290{
1291 int i;
1292 struct kprobe *p;
1293
1294 for (i = 1; i < op->optinsn.size; i++) {
1295 p = get_kprobe(op->kp.addr + i);
1296 if (p && !kprobe_disabled(p))
1297 return -EEXIST;
1298 }
1299
1300 return 0;
1301}
1302
1303/* Check the addr is within the optimized instructions. */
1304int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1305 unsigned long addr)
1306{
1307 return ((unsigned long)op->kp.addr <= addr &&
1308 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1309}
1310
1311/* Free optimized instruction slot */
1312static __kprobes
1313void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1314{
1315 if (op->optinsn.insn) {
1316 free_optinsn_slot(op->optinsn.insn, dirty);
1317 op->optinsn.insn = NULL;
1318 op->optinsn.size = 0;
1319 }
1320}
1321
1322void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1323{
1324 __arch_remove_optimized_kprobe(op, 1);
1325}
1326
1327/*
1328 * Copy replacing target instructions
1329 * Target instructions MUST be relocatable (checked inside)
1330 */
1331int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1332{
1333 u8 *buf;
1334 int ret;
1335 long rel;
1336
1337 if (!can_optimize((unsigned long)op->kp.addr))
1338 return -EILSEQ;
1339
1340 op->optinsn.insn = get_optinsn_slot();
1341 if (!op->optinsn.insn)
1342 return -ENOMEM;
1343
1344 /*
1345 * Verify if the address gap is in 2GB range, because this uses
1346 * a relative jump.
1347 */
1348 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1349 if (abs(rel) > 0x7fffffff)
1350 return -ERANGE;
1351
1352 buf = (u8 *)op->optinsn.insn;
1353
1354 /* Copy instructions into the out-of-line buffer */
1355 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1356 if (ret < 0) {
1357 __arch_remove_optimized_kprobe(op, 0);
1358 return ret;
1359 }
1360 op->optinsn.size = ret;
1361
1362 /* Copy arch-dep-instance from template */
1363 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1364
1365 /* Set probe information */
1366 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1367
1368 /* Set probe function call */
1369 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1370
1371 /* Set returning jmp instruction at the tail of out-of-line buffer */
1372 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1373 (u8 *)op->kp.addr + op->optinsn.size);
1374
1375 flush_icache_range((unsigned long) buf,
1376 (unsigned long) buf + TMPL_END_IDX +
1377 op->optinsn.size + RELATIVEJUMP_SIZE);
1378 return 0;
1379}
1380
1381/* Replace a breakpoint (int3) with a relative jump. */
1382int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1383{
1384 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1385 s32 rel = (s32)((long)op->optinsn.insn -
1386 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1387
1388 /* Backup instructions which will be replaced by jump address */
1389 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1390 RELATIVE_ADDR_SIZE);
1391
1392 jmp_code[0] = RELATIVEJUMP_OPCODE;
1393 *(s32 *)(&jmp_code[1]) = rel;
1394
1395 /*
1396 * text_poke_smp doesn't support NMI/MCE code modifying.
1397 * However, since kprobes itself also doesn't support NMI/MCE
1398 * code probing, it's not a problem.
1399 */
1400 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
1401 return 0;
1402}
1403
1404/* Replace a relative jump with a breakpoint (int3). */
1405void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1406{
1407 u8 buf[RELATIVEJUMP_SIZE];
1408
1409 /* Set int3 to first byte for kprobes */
1410 buf[0] = BREAKPOINT_INSTRUCTION;
1411 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1412 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1413}
1414
1415static int __kprobes setup_detour_execution(struct kprobe *p,
1416 struct pt_regs *regs,
1417 int reenter)
1418{
1419 struct optimized_kprobe *op;
1420
1421 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1422 /* This kprobe is really able to run optimized path. */
1423 op = container_of(p, struct optimized_kprobe, kp);
1424 /* Detour through copied instructions */
1425 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1426 if (!reenter)
1427 reset_current_kprobe();
1428 preempt_enable_no_resched();
1429 return 1;
1430 }
1431 return 0;
1432}
1433#endif
1434
1036int __init arch_init_kprobes(void) 1435int __init arch_init_kprobes(void)
1037{ 1436{
1038 return 0; 1437 return 0;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/dmi.h> 9#include <linux/dmi.h>
10#include <linux/range.h>
11
10#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
11#include <linux/sort.h> 13#include <linux/sort.h>
12#include <asm/io.h> 14#include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
30 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, 32 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
31}; 33};
32 34
33struct range {
34 u64 start;
35 u64 end;
36};
37
38static int __cpuinit cmp_range(const void *x1, const void *x2) 35static int __cpuinit cmp_range(const void *x1, const void *x2)
39{ 36{
40 const struct range *r1 = x1; 37 const struct range *r1 = x1;
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 3b7078abc871..0aad8670858e 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -10,8 +10,211 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
13 17
14#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
29static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
30int sfi_mtimer_num;
31
32struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
33EXPORT_SYMBOL_GPL(sfi_mrtc_array);
34int sfi_mrtc_num;
35
36static inline void assign_to_mp_irq(struct mpc_intsrc *m,
37 struct mpc_intsrc *mp_irq)
38{
39 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
40}
41
42static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
43 struct mpc_intsrc *m)
44{
45 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
46}
47
48static void save_mp_irq(struct mpc_intsrc *m)
49{
50 int i;
51
52 for (i = 0; i < mp_irq_entries; i++) {
53 if (!mp_irq_cmp(&mp_irqs[i], m))
54 return;
55 }
56
57 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
58 if (++mp_irq_entries == MAX_IRQ_SOURCES)
59 panic("Max # of irq sources exceeded!!\n");
60}
61
62/* parse all the mtimer info to a static mtimer array */
63static int __init sfi_parse_mtmr(struct sfi_table_header *table)
64{
65 struct sfi_table_simple *sb;
66 struct sfi_timer_table_entry *pentry;
67 struct mpc_intsrc mp_irq;
68 int totallen;
69
70 sb = (struct sfi_table_simple *)table;
71 if (!sfi_mtimer_num) {
72 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
73 struct sfi_timer_table_entry);
74 pentry = (struct sfi_timer_table_entry *) sb->pentry;
75 totallen = sfi_mtimer_num * sizeof(*pentry);
76 memcpy(sfi_mtimer_array, pentry, totallen);
77 }
78
79 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
80 pentry = sfi_mtimer_array;
81 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
82 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
83 " irq = %d\n", totallen, (u32)pentry->phys_addr,
84 pentry->freq_hz, pentry->irq);
85 if (!pentry->irq)
86 continue;
87 mp_irq.type = MP_IOAPIC;
88 mp_irq.irqtype = mp_INT;
89/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
90 mp_irq.irqflag = 5;
91 mp_irq.srcbus = 0;
92 mp_irq.srcbusirq = pentry->irq; /* IRQ */
93 mp_irq.dstapic = MP_APIC_ALL;
94 mp_irq.dstirq = pentry->irq;
95 save_mp_irq(&mp_irq);
96 }
97
98 return 0;
99}
100
101struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
102{
103 int i;
104 if (hint < sfi_mtimer_num) {
105 if (!sfi_mtimer_usage[hint]) {
106 pr_debug("hint taken for timer %d irq %d\n",\
107 hint, sfi_mtimer_array[hint].irq);
108 sfi_mtimer_usage[hint] = 1;
109 return &sfi_mtimer_array[hint];
110 }
111 }
112 /* take the first timer available */
113 for (i = 0; i < sfi_mtimer_num;) {
114 if (!sfi_mtimer_usage[i]) {
115 sfi_mtimer_usage[i] = 1;
116 return &sfi_mtimer_array[i];
117 }
118 i++;
119 }
120 return NULL;
121}
122
123void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
124{
125 int i;
126 for (i = 0; i < sfi_mtimer_num;) {
127 if (mtmr->irq == sfi_mtimer_array[i].irq) {
128 sfi_mtimer_usage[i] = 0;
129 return;
130 }
131 i++;
132 }
133}
134
135/* parse all the mrtc info to a global mrtc array */
136int __init sfi_parse_mrtc(struct sfi_table_header *table)
137{
138 struct sfi_table_simple *sb;
139 struct sfi_rtc_table_entry *pentry;
140 struct mpc_intsrc mp_irq;
141
142 int totallen;
143
144 sb = (struct sfi_table_simple *)table;
145 if (!sfi_mrtc_num) {
146 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
147 struct sfi_rtc_table_entry);
148 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
149 totallen = sfi_mrtc_num * sizeof(*pentry);
150 memcpy(sfi_mrtc_array, pentry, totallen);
151 }
152
153 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
154 pentry = sfi_mrtc_array;
155 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
156 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
157 totallen, (u32)pentry->phys_addr, pentry->irq);
158 mp_irq.type = MP_IOAPIC;
159 mp_irq.irqtype = mp_INT;
160 mp_irq.irqflag = 0;
161 mp_irq.srcbus = 0;
162 mp_irq.srcbusirq = pentry->irq; /* IRQ */
163 mp_irq.dstapic = MP_APIC_ALL;
164 mp_irq.dstirq = pentry->irq;
165 save_mp_irq(&mp_irq);
166 }
167 return 0;
168}
169
170/*
171 * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
172 * APBT but cmdline option can also override it.
173 */
174static void __cpuinit mrst_setup_secondary_clock(void)
175{
176 /* restore default lapic clock if disabled by cmdline */
177 if (disable_apbt_percpu)
178 return setup_secondary_APIC_clock();
179 apbt_setup_secondary_clock();
180}
181
182static unsigned long __init mrst_calibrate_tsc(void)
183{
184 unsigned long flags, fast_calibrate;
185
186 local_irq_save(flags);
187 fast_calibrate = apbt_quick_calibrate();
188 local_irq_restore(flags);
189
190 if (fast_calibrate)
191 return fast_calibrate;
192
193 return 0;
194}
195
196void __init mrst_time_init(void)
197{
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
199 pre_init_apic_IRQ0();
200 apbt_time_init();
201}
202
203void __init mrst_rtc_init(void)
204{
205 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
206}
207
208/*
209 * if we use per cpu apb timer, the bootclock already setup. if we use lapic
210 * timer and one apbt timer for broadcast, we need to set up lapic boot clock.
211 */
212static void __init mrst_setup_boot_clock(void)
213{
214 pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu);
215 if (disable_apbt_percpu)
216 setup_boot_APIC_clock();
217};
15 218
16/* 219/*
17 * Moorestown specific x86_init function overrides and early setup 220 * Moorestown specific x86_init function overrides and early setup
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void)
21{ 224{
22 x86_init.resources.probe_roms = x86_init_noop; 225 x86_init.resources.probe_roms = x86_init_noop;
23 x86_init.resources.reserve_resources = x86_init_noop; 226 x86_init.resources.reserve_resources = x86_init_noop;
227
228 x86_init.timers.timer_init = mrst_time_init;
229 x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock;
230
231 x86_init.irqs.pre_vector_init = x86_init_noop;
232
233 x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
234
235 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
236 x86_init.pci.init = pci_mrst_init;
237 x86_init.pci.fixup_irqs = x86_init_noop;
238
239 legacy_pic = &null_legacy_pic;
24} 240}
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 9d1d263f786f..8297160c41b3 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,7 +17,9 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20
20#include <asm/geode.h> 21#include <asm/geode.h>
22#include <asm/setup.h>
21#include <asm/olpc.h> 23#include <asm/olpc.h>
22 24
23#ifdef CONFIG_OPEN_FIRMWARE 25#ifdef CONFIG_OPEN_FIRMWARE
@@ -243,9 +245,11 @@ static int __init olpc_init(void)
243 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, 245 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
244 (unsigned char *) &olpc_platform_info.ecver, 1); 246 (unsigned char *) &olpc_platform_info.ecver, 1);
245 247
246 /* check to see if the VSA exists */ 248#ifdef CONFIG_PCI_OLPC
247 if (cs5535_has_vsa2()) 249 /* If the VSA exists let it emulate PCI, if not emulate in kernel */
248 olpc_platform_info.flags |= OLPC_F_VSA; 250 if (!cs5535_has_vsa2())
251 x86_init.pci.arch_init = pci_olpc_init;
252#endif
249 253
250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", 254 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
251 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", 255 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b1739d16310..1db183ed7c01 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = {
428 .ptep_modify_prot_start = __ptep_modify_prot_start, 428 .ptep_modify_prot_start = __ptep_modify_prot_start,
429 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 429 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
430 430
431#ifdef CONFIG_HIGHPTE
432 .kmap_atomic_pte = kmap_atomic,
433#endif
434
435#if PAGETABLE_LEVELS >= 3 431#if PAGETABLE_LEVELS >= 3
436#ifdef CONFIG_X86_PAE 432#ifdef CONFIG_X86_PAE
437 .set_pte_atomic = native_set_pte_atomic, 433 .set_pte_atomic = native_set_pte_atomic,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2bbde6078143..fb99f7edb341 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1309/* 1309/*
1310 * get_tce_space_from_tar(): 1310 * get_tce_space_from_tar():
1311 * Function for kdump case. Get the tce tables from first kernel 1311 * Function for kdump case. Get the tce tables from first kernel
1312 * by reading the contents of the base adress register of calgary iommu 1312 * by reading the contents of the base address register of calgary iommu
1313 */ 1313 */
1314static void __init get_tce_space_from_tar(void) 1314static void __init get_tce_space_from_tar(void)
1315{ 1315{
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 75e14e21f61a..a4ac764a6880 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -38,7 +38,7 @@ int iommu_detected __read_mostly = 0;
38 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 38 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
39 * If this variable is 1, IOMMU implementations do no DMA translation for 39 * If this variable is 1, IOMMU implementations do no DMA translation for
40 * devices and allow every device to access to whole physical memory. This is 40 * devices and allow every device to access to whole physical memory. This is
41 * useful if a user want to use an IOMMU only for KVM device assignment to 41 * useful if a user wants to use an IOMMU only for KVM device assignment to
42 * guests and not for driver dma translation. 42 * guests and not for driver dma translation.
43 */ 43 */
44int iommu_pass_through __read_mostly; 44int iommu_pass_through __read_mostly;
@@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask)
65} 65}
66EXPORT_SYMBOL(dma_set_mask); 66EXPORT_SYMBOL(dma_set_mask);
67 67
68#ifdef CONFIG_X86_64 68#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA)
69static __initdata void *dma32_bootmem_ptr; 69static __initdata void *dma32_bootmem_ptr;
70static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); 70static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
71 71
@@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void)
116 dma32_bootmem_ptr = NULL; 116 dma32_bootmem_ptr = NULL;
117 dma32_bootmem_size = 0; 117 dma32_bootmem_size = 0;
118} 118}
119#else
120void __init dma32_reserve_bootmem(void)
121{
122}
123static void __init dma32_free_bootmem(void)
124{
125}
126
119#endif 127#endif
120 128
121void __init pci_iommu_alloc(void) 129void __init pci_iommu_alloc(void)
122{ 130{
123#ifdef CONFIG_X86_64
124 /* free the range so iommu could get some range less than 4G */ 131 /* free the range so iommu could get some range less than 4G */
125 dma32_free_bootmem(); 132 dma32_free_bootmem();
126#endif 133
127 if (pci_swiotlb_detect()) 134 if (pci_swiotlb_detect())
128 goto out; 135 goto out;
129 136
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2d96aab82a48..a503b1fd04e5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -581,7 +581,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
581 struct perf_event_attr attr; 581 struct perf_event_attr attr;
582 582
583 /* 583 /*
584 * We shoud have at least an inactive breakpoint at this 584 * We should have at least an inactive breakpoint at this
585 * slot. It means the user is writing dr7 without having 585 * slot. It means the user is writing dr7 without having
586 * written the address register first 586 * written the address register first
587 */ 587 */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 704bddcdf64d..8e1aac86b50c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), 461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
462 }, 462 },
463 }, 463 },
464 { /* Handle problems with rebooting on the iMac9,1. */
465 .callback = set_pci_reboot,
466 .ident = "Apple iMac9,1",
467 .matches = {
468 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
469 DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
470 },
471 },
464 { } 472 { }
465}; 473};
466 474
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb42109a55b4..5d7ba1a449bd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -969,15 +969,11 @@ void __init setup_arch(char **cmdline_p)
969#endif 969#endif
970 970
971 initmem_init(0, max_pfn, acpi, k8); 971 initmem_init(0, max_pfn, acpi, k8);
972#ifndef CONFIG_NO_BOOTMEM
973 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
974#endif
972 975
973#ifdef CONFIG_X86_64
974 /*
975 * dma32_reserve_bootmem() allocates bootmem which may conflict
976 * with the crashkernel command line, so do that after
977 * reserve_crashkernel()
978 */
979 dma32_reserve_bootmem(); 976 dma32_reserve_bootmem();
980#endif
981 977
982 reserve_ibft_region(); 978 reserve_ibft_region();
983 979
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
137 137
138static void __init pcpu_fc_free(void *ptr, size_t size) 138static void __init pcpu_fc_free(void *ptr, size_t size)
139{ 139{
140#ifdef CONFIG_NO_BOOTMEM
141 u64 start = __pa(ptr);
142 u64 end = start + size;
143 free_early_partial(start, end);
144#else
140 free_bootmem(__pa(ptr), size); 145 free_bootmem(__pa(ptr), size);
146#endif
141} 147}
142 148
143static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 149static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b4401115ea1..a02e80c3c54b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -48,6 +48,7 @@
48#include <linux/err.h> 48#include <linux/err.h>
49#include <linux/nmi.h> 49#include <linux/nmi.h>
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h>
51 52
52#include <asm/acpi.h> 53#include <asm/acpi.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
@@ -67,6 +68,7 @@
67#include <linux/mc146818rtc.h> 68#include <linux/mc146818rtc.h>
68 69
69#include <asm/smpboot_hooks.h> 70#include <asm/smpboot_hooks.h>
71#include <asm/i8259.h>
70 72
71#ifdef CONFIG_X86_32 73#ifdef CONFIG_X86_32
72u8 apicid_2_node[MAX_APICID]; 74u8 apicid_2_node[MAX_APICID];
@@ -241,6 +243,11 @@ static void __cpuinit smp_callin(void)
241 map_cpu_to_logical_apicid(); 243 map_cpu_to_logical_apicid();
242 244
243 notify_cpu_starting(cpuid); 245 notify_cpu_starting(cpuid);
246
247 /*
248 * Need to setup vector mappings before we enable interrupts.
249 */
250 __setup_vector_irq(smp_processor_id());
244 /* 251 /*
245 * Get our bogomips. 252 * Get our bogomips.
246 * 253 *
@@ -286,9 +293,9 @@ notrace static void __cpuinit start_secondary(void *unused)
286 check_tsc_sync_target(); 293 check_tsc_sync_target();
287 294
288 if (nmi_watchdog == NMI_IO_APIC) { 295 if (nmi_watchdog == NMI_IO_APIC) {
289 disable_8259A_irq(0); 296 legacy_pic->chip->mask(0);
290 enable_NMI_through_LVT0(); 297 enable_NMI_through_LVT0();
291 enable_8259A_irq(0); 298 legacy_pic->chip->unmask(0);
292 } 299 }
293 300
294#ifdef CONFIG_X86_32 301#ifdef CONFIG_X86_32
@@ -315,7 +322,6 @@ notrace static void __cpuinit start_secondary(void *unused)
315 */ 322 */
316 ipi_call_lock(); 323 ipi_call_lock();
317 lock_vector_lock(); 324 lock_vector_lock();
318 __setup_vector_irq(smp_processor_id());
319 set_cpu_online(smp_processor_id(), true); 325 set_cpu_online(smp_processor_id(), true);
320 unlock_vector_lock(); 326 unlock_vector_lock();
321 ipi_call_unlock(); 327 ipi_call_unlock();
@@ -325,6 +331,9 @@ notrace static void __cpuinit start_secondary(void *unused)
325 /* enable local interrupts */ 331 /* enable local interrupts */
326 local_irq_enable(); 332 local_irq_enable();
327 333
334 /* to prevent fake stack check failure in clock setup */
335 boot_init_stack_canary();
336
328 x86_cpuinit.setup_percpu_clockev(); 337 x86_cpuinit.setup_percpu_clockev();
329 338
330 wmb(); 339 wmb();
@@ -1212,11 +1221,12 @@ __init void prefill_possible_map(void)
1212 1221
1213 total_cpus = max_t(int, possible, num_processors + disabled_cpus); 1222 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1214 1223
1215 if (possible > CONFIG_NR_CPUS) { 1224 /* nr_cpu_ids could be reduced via nr_cpus= */
1225 if (possible > nr_cpu_ids) {
1216 printk(KERN_WARNING 1226 printk(KERN_WARNING
1217 "%d Processors exceeds NR_CPUS limit of %d\n", 1227 "%d Processors exceeds NR_CPUS limit of %d\n",
1218 possible, CONFIG_NR_CPUS); 1228 possible, nr_cpu_ids);
1219 possible = CONFIG_NR_CPUS; 1229 possible = nr_cpu_ids;
1220 } 1230 }
1221 1231
1222 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1232 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index dee1ff7cba58..196552bb412c 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -25,191 +25,6 @@
25#include <asm/syscalls.h> 25#include <asm/syscalls.h>
26 26
27/* 27/*
28 * Perform the select(nd, in, out, ex, tv) and mmap() system
29 * calls. Linux/i386 didn't use to be able to handle more than
30 * 4 system call parameters, so these system calls used a memory
31 * block for parameter passing..
32 */
33
34struct mmap_arg_struct {
35 unsigned long addr;
36 unsigned long len;
37 unsigned long prot;
38 unsigned long flags;
39 unsigned long fd;
40 unsigned long offset;
41};
42
43asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
44{
45 struct mmap_arg_struct a;
46 int err = -EFAULT;
47
48 if (copy_from_user(&a, arg, sizeof(a)))
49 goto out;
50
51 err = -EINVAL;
52 if (a.offset & ~PAGE_MASK)
53 goto out;
54
55 err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
56 a.fd, a.offset >> PAGE_SHIFT);
57out:
58 return err;
59}
60
61
62struct sel_arg_struct {
63 unsigned long n;
64 fd_set __user *inp, *outp, *exp;
65 struct timeval __user *tvp;
66};
67
68asmlinkage int old_select(struct sel_arg_struct __user *arg)
69{
70 struct sel_arg_struct a;
71
72 if (copy_from_user(&a, arg, sizeof(a)))
73 return -EFAULT;
74 /* sys_select() does the appropriate kernel locking */
75 return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
76}
77
78/*
79 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
80 *
81 * This is really horribly ugly.
82 */
83asmlinkage int sys_ipc(uint call, int first, int second,
84 int third, void __user *ptr, long fifth)
85{
86 int version, ret;
87
88 version = call >> 16; /* hack for backward compatibility */
89 call &= 0xffff;
90
91 switch (call) {
92 case SEMOP:
93 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
94 case SEMTIMEDOP:
95 return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
96 (const struct timespec __user *)fifth);
97
98 case SEMGET:
99 return sys_semget(first, second, third);
100 case SEMCTL: {
101 union semun fourth;
102 if (!ptr)
103 return -EINVAL;
104 if (get_user(fourth.__pad, (void __user * __user *) ptr))
105 return -EFAULT;
106 return sys_semctl(first, second, third, fourth);
107 }
108
109 case MSGSND:
110 return sys_msgsnd(first, (struct msgbuf __user *) ptr,
111 second, third);
112 case MSGRCV:
113 switch (version) {
114 case 0: {
115 struct ipc_kludge tmp;
116 if (!ptr)
117 return -EINVAL;
118
119 if (copy_from_user(&tmp,
120 (struct ipc_kludge __user *) ptr,
121 sizeof(tmp)))
122 return -EFAULT;
123 return sys_msgrcv(first, tmp.msgp, second,
124 tmp.msgtyp, third);
125 }
126 default:
127 return sys_msgrcv(first,
128 (struct msgbuf __user *) ptr,
129 second, fifth, third);
130 }
131 case MSGGET:
132 return sys_msgget((key_t) first, second);
133 case MSGCTL:
134 return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
135
136 case SHMAT:
137 switch (version) {
138 default: {
139 ulong raddr;
140 ret = do_shmat(first, (char __user *) ptr, second, &raddr);
141 if (ret)
142 return ret;
143 return put_user(raddr, (ulong __user *) third);
144 }
145 case 1: /* iBCS2 emulator entry point */
146 if (!segment_eq(get_fs(), get_ds()))
147 return -EINVAL;
148 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
149 return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
150 }
151 case SHMDT:
152 return sys_shmdt((char __user *)ptr);
153 case SHMGET:
154 return sys_shmget(first, second, third);
155 case SHMCTL:
156 return sys_shmctl(first, second,
157 (struct shmid_ds __user *) ptr);
158 default:
159 return -ENOSYS;
160 }
161}
162
163/*
164 * Old cruft
165 */
166asmlinkage int sys_uname(struct old_utsname __user *name)
167{
168 int err;
169 if (!name)
170 return -EFAULT;
171 down_read(&uts_sem);
172 err = copy_to_user(name, utsname(), sizeof(*name));
173 up_read(&uts_sem);
174 return err? -EFAULT:0;
175}
176
177asmlinkage int sys_olduname(struct oldold_utsname __user *name)
178{
179 int error;
180
181 if (!name)
182 return -EFAULT;
183 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
184 return -EFAULT;
185
186 down_read(&uts_sem);
187
188 error = __copy_to_user(&name->sysname, &utsname()->sysname,
189 __OLD_UTS_LEN);
190 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
191 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
192 __OLD_UTS_LEN);
193 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
194 error |= __copy_to_user(&name->release, &utsname()->release,
195 __OLD_UTS_LEN);
196 error |= __put_user(0, name->release + __OLD_UTS_LEN);
197 error |= __copy_to_user(&name->version, &utsname()->version,
198 __OLD_UTS_LEN);
199 error |= __put_user(0, name->version + __OLD_UTS_LEN);
200 error |= __copy_to_user(&name->machine, &utsname()->machine,
201 __OLD_UTS_LEN);
202 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
203
204 up_read(&uts_sem);
205
206 error = error ? -EFAULT : 0;
207
208 return error;
209}
210
211
212/*
213 * Do a system call from kernel instead of calling sys_execve so we 28 * Do a system call from kernel instead of calling sys_execve so we
214 * end up with proper pt_regs. 29 * end up with proper pt_regs.
215 */ 30 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8aa2057efd12..ff14a5044ce6 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -209,15 +209,3 @@ bottomup:
209 209
210 return addr; 210 return addr;
211} 211}
212
213
214SYSCALL_DEFINE1(uname, struct new_utsname __user *, name)
215{
216 int err;
217 down_read(&uts_sem);
218 err = copy_to_user(name, utsname(), sizeof(*name));
219 up_read(&uts_sem);
220 if (personality(current->personality) == PER_LINUX32)
221 err |= copy_to_user(&name->machine, "i686", 5);
222 return err ? -EFAULT : 0;
223}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5d3eb7..8b3729341216 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -81,7 +81,7 @@ ENTRY(sys_call_table)
81 .long sys_settimeofday 81 .long sys_settimeofday
82 .long sys_getgroups16 /* 80 */ 82 .long sys_getgroups16 /* 80 */
83 .long sys_setgroups16 83 .long sys_setgroups16
84 .long old_select 84 .long sys_old_select
85 .long sys_symlink 85 .long sys_symlink
86 .long sys_lstat 86 .long sys_lstat
87 .long sys_readlink /* 85 */ 87 .long sys_readlink /* 85 */
@@ -89,7 +89,7 @@ ENTRY(sys_call_table)
89 .long sys_swapon 89 .long sys_swapon
90 .long sys_reboot 90 .long sys_reboot
91 .long sys_old_readdir 91 .long sys_old_readdir
92 .long old_mmap /* 90 */ 92 .long sys_old_mmap /* 90 */
93 .long sys_munmap 93 .long sys_munmap
94 .long sys_truncate 94 .long sys_truncate
95 .long sys_ftruncate 95 .long sys_ftruncate
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be2573448ed9..fb5cc5e14cfa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
70 * manually to deassert NMI lines for the watchdog if run 70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system. 71 * on an 82489DX-based system.
72 */ 72 */
73 spin_lock(&i8259A_lock); 73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3); 74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */ 75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL); 76 inb(PIC_MASTER_POLL);
77 spin_unlock(&i8259A_lock); 77 raw_spin_unlock(&i8259A_lock);
78 } 78 }
79 79
80 global_clock_event->event_handler(global_clock_event); 80 global_clock_event->event_handler(global_clock_event);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 23066ecf12fa..9faf91ae1841 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -50,7 +50,7 @@ u64 native_sched_clock(void)
50 * unstable. We do this because unlike Time Of Day, 50 * unstable. We do this because unlike Time Of Day,
51 * the scheduler clock tolerates small errors and it's 51 * the scheduler clock tolerates small errors and it's
52 * very important for it to be as fast as the platform 52 * very important for it to be as fast as the platform
53 * can achive it. ) 53 * can achieve it. )
54 */ 54 */
55 if (unlikely(tsc_disabled)) { 55 if (unlikely(tsc_disabled)) {
56 /* No locking but a rare wrong value is not a big deal: */ 56 /* No locking but a rare wrong value is not a big deal: */
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
740} 740}
741#endif 741#endif
742 742
743static void resume_tsc(void) 743static void resume_tsc(struct clocksource *cs)
744{ 744{
745 clocksource_tsc.cycle_last = 0; 745 clocksource_tsc.cycle_last = 0;
746} 746}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 34a279a7471d..e680ea52db9b 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -49,11 +49,6 @@ extern int no_broadcast;
49char visws_board_type = -1; 49char visws_board_type = -1;
50char visws_board_rev = -1; 50char visws_board_rev = -1;
51 51
52int is_visws_box(void)
53{
54 return visws_board_type >= 0;
55}
56
57static void __init visws_time_init(void) 52static void __init visws_time_init(void)
58{ 53{
59 printk(KERN_INFO "Starting Cobalt Timer system clock\n"); 54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -242,6 +237,8 @@ void __init visws_early_detect(void)
242 x86_init.irqs.pre_vector_init = visws_pre_intr_init; 237 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
243 x86_init.irqs.trap_init = visws_trap_init; 238 x86_init.irqs.trap_init = visws_trap_init;
244 x86_init.timers.timer_init = visws_time_init; 239 x86_init.timers.timer_init = visws_time_init;
240 x86_init.pci.init = pci_visws_init;
241 x86_init.pci.init_irq = x86_init_noop;
245 242
246 /* 243 /*
247 * Install reboot quirks: 244 * Install reboot quirks:
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = {
508 */ 505 */
509static unsigned int startup_piix4_master_irq(unsigned int irq) 506static unsigned int startup_piix4_master_irq(unsigned int irq)
510{ 507{
511 init_8259A(0); 508 legacy_pic->init(0);
512 509
513 return startup_cobalt_irq(irq); 510 return startup_cobalt_irq(irq);
514} 511}
@@ -532,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = {
532 529
533static struct irq_chip piix4_virtual_irq_type = { 530static struct irq_chip piix4_virtual_irq_type = {
534 .name = "PIIX4-virtual", 531 .name = "PIIX4-virtual",
535 .shutdown = disable_8259A_irq,
536 .enable = enable_8259A_irq,
537 .disable = disable_8259A_irq,
538}; 532};
539 533
540 534
@@ -559,7 +553,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
559 struct irq_desc *desc; 553 struct irq_desc *desc;
560 unsigned long flags; 554 unsigned long flags;
561 555
562 spin_lock_irqsave(&i8259A_lock, flags); 556 raw_spin_lock_irqsave(&i8259A_lock, flags);
563 557
564 /* Find out what's interrupting in the PIIX4 master 8259 */ 558 /* Find out what's interrupting in the PIIX4 master 8259 */
565 outb(0x0c, 0x20); /* OCW3 Poll command */ 559 outb(0x0c, 0x20); /* OCW3 Poll command */
@@ -596,7 +590,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
596 outb(0x60 + realirq, 0x20); 590 outb(0x60 + realirq, 0x20);
597 } 591 }
598 592
599 spin_unlock_irqrestore(&i8259A_lock, flags); 593 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
600 594
601 desc = irq_to_desc(realirq); 595 desc = irq_to_desc(realirq);
602 596
@@ -609,12 +603,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
609 handle_IRQ_event(realirq, desc->action); 603 handle_IRQ_event(realirq, desc->action);
610 604
611 if (!(desc->status & IRQ_DISABLED)) 605 if (!(desc->status & IRQ_DISABLED))
612 enable_8259A_irq(realirq); 606 legacy_pic->chip->unmask(realirq);
613 607
614 return IRQ_HANDLED; 608 return IRQ_HANDLED;
615 609
616out_unlock: 610out_unlock:
617 spin_unlock_irqrestore(&i8259A_lock, flags); 611 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
618 return IRQ_NONE; 612 return IRQ_NONE;
619} 613}
620 614
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = {
628 .name = "cascade", 622 .name = "cascade",
629}; 623};
630 624
625static inline void set_piix4_virtual_irq_type(void)
626{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask;
630}
631 631
632void init_VISWS_APIC_irqs(void) 632void init_VISWS_APIC_irqs(void)
633{ 633{
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void)
653 desc->chip = &piix4_master_irq_type; 653 desc->chip = &piix4_master_irq_type;
654 } 654 }
655 else if (i < CO_IRQ_APIC0) { 655 else if (i < CO_IRQ_APIC0) {
656 set_piix4_virtual_irq_type();
656 desc->chip = &piix4_virtual_irq_type; 657 desc->chip = &piix4_virtual_irq_type;
657 } 658 }
658 else if (IS_CO_APIC(i)) { 659 else if (IS_CO_APIC(i)) {
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index d430e4c30193..7dd599deca4a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -33,6 +33,7 @@
33#include <asm/fixmap.h> 33#include <asm/fixmap.h>
34#include <asm/apicdef.h> 34#include <asm/apicdef.h>
35#include <asm/apic.h> 35#include <asm/apic.h>
36#include <asm/pgalloc.h>
36#include <asm/processor.h> 37#include <asm/processor.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/vmi_time.h> 39#include <asm/vmi_time.h>
@@ -266,30 +267,6 @@ static void vmi_nop(void)
266{ 267{
267} 268}
268 269
269#ifdef CONFIG_HIGHPTE
270static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
271{
272 void *va = kmap_atomic(page, type);
273
274 /*
275 * Internally, the VMI ROM must map virtual addresses to physical
276 * addresses for processing MMU updates. By the time MMU updates
277 * are issued, this information is typically already lost.
278 * Fortunately, the VMI provides a cache of mapping slots for active
279 * page tables.
280 *
281 * We use slot zero for the linear mapping of physical memory, and
282 * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
283 *
284 * args: SLOT VA COUNT PFN
285 */
286 BUG_ON(type != KM_PTE0 && type != KM_PTE1);
287 vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
288
289 return va;
290}
291#endif
292
293static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 270static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
294{ 271{
295 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 272 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -640,6 +617,12 @@ static inline int __init activate_vmi(void)
640 u64 reloc; 617 u64 reloc;
641 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; 618 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
642 619
620 /*
621 * Prevent page tables from being allocated in highmem, even if
622 * CONFIG_HIGHPTE is enabled.
623 */
624 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
625
643 if (call_vrom_func(vmi_rom, vmi_init) != 0) { 626 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
644 printk(KERN_ERR "VMI ROM failed to initialize!"); 627 printk(KERN_ERR "VMI ROM failed to initialize!");
645 return 0; 628 return 0;
@@ -778,10 +761,6 @@ static inline int __init activate_vmi(void)
778 761
779 /* Set linear is needed in all cases */ 762 /* Set linear is needed in all cases */
780 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); 763 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
781#ifdef CONFIG_HIGHPTE
782 if (vmi_ops.set_linear_mapping)
783 pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
784#endif
785 764
786 /* 765 /*
787 * These MUST always be patched. Don't support indirect jumps 766 * These MUST always be patched. Don't support indirect jumps
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 74c92bb194df..5e1ff66ecd73 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void)
79 79
80static inline unsigned int vmi_get_timer_vector(void) 80static inline unsigned int vmi_get_timer_vector(void)
81{ 81{
82#ifdef CONFIG_X86_IO_APIC 82 return IRQ0_VECTOR;
83 return FIRST_DEVICE_VECTOR;
84#else
85 return FIRST_EXTERNAL_VECTOR;
86#endif
87} 83}
88 84
89/** vmi clockchip */ 85/** vmi clockchip */
@@ -171,7 +167,7 @@ static int vmi_timer_next_event(unsigned long delta,
171{ 167{
172 /* Unfortunately, set_next_event interface only passes relative 168 /* Unfortunately, set_next_event interface only passes relative
173 * expiry, but we want absolute expiry. It'd be better if were 169 * expiry, but we want absolute expiry. It'd be better if were
174 * were passed an aboslute expiry, since a bunch of time may 170 * were passed an absolute expiry, since a bunch of time may
175 * have been stolen between the time the delta is computed and 171 * have been stolen between the time the delta is computed and
176 * when we set the alarm below. */ 172 * when we set the alarm below. */
177 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); 173 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f92a0da608cb..44879df55696 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ SECTIONS
341 * Per-cpu symbols which need to be offset from __per_cpu_load 341 * Per-cpu symbols which need to be offset from __per_cpu_load
342 * for the boot processor. 342 * for the boot processor.
343 */ 343 */
344#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load 344#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
345INIT_PER_CPU(gdt_page); 345INIT_PER_CPU(gdt_page);
346INIT_PER_CPU(irq_stack_union); 346INIT_PER_CPU(irq_stack_union);
347 347
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union);
352 "kernel image bigger than KERNEL_IMAGE_SIZE"); 352 "kernel image bigger than KERNEL_IMAGE_SIZE");
353 353
354#ifdef CONFIG_SMP 354#ifdef CONFIG_SMP
355. = ASSERT((per_cpu__irq_stack_union == 0), 355. = ASSERT((irq_stack_union == 0),
356 "irq_stack_union is not at start of per-cpu area"); 356 "irq_stack_union is not at start of per-cpu area");
357#endif 357#endif
358 358
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9055e5872ff0..1c0c6ab9c60f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
301 register_sysctl_table(kernel_root_table2); 301 register_sysctl_table(kernel_root_table2);
302#endif 302#endif
303 on_each_cpu(cpu_vsyscall_init, NULL, 1); 303 on_each_cpu(cpu_vsyscall_init, NULL, 1);
304 hotcpu_notifier(cpu_vsyscall_notifier, 0); 304 /* notifier priority > KVM */
305 hotcpu_notifier(cpu_vsyscall_notifier, 30);
305 return 0; 306 return 0;
306} 307}
307 308
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ee5746c94628..61a1e8c7e19f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -4,9 +4,11 @@
4 * For licencing details see kernel-base/COPYING 4 * For licencing details see kernel-base/COPYING
5 */ 5 */
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h>
7 8
8#include <asm/bios_ebda.h> 9#include <asm/bios_ebda.h>
9#include <asm/paravirt.h> 10#include <asm/paravirt.h>
11#include <asm/pci_x86.h>
10#include <asm/mpspec.h> 12#include <asm/mpspec.h>
11#include <asm/setup.h> 13#include <asm/setup.h>
12#include <asm/apic.h> 14#include <asm/apic.h>
@@ -70,6 +72,12 @@ struct x86_init_ops x86_init __initdata = {
70 .iommu = { 72 .iommu = {
71 .iommu_init = iommu_init_noop, 73 .iommu_init = iommu_init_noop,
72 }, 74 },
75
76 .pci = {
77 .init = x86_default_pci_init,
78 .init_irq = x86_default_pci_init_irq,
79 .fixup_irqs = x86_default_pci_fixup_irqs,
80 },
73}; 81};
74 82
75struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 83struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {