aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-03-09 11:11:53 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-09 11:11:53 -0500
commit548b84166917d6f5e2296123b85ad24aecd3801d (patch)
tree0ab0300e23a02df0fe3c0579627e4998bb122c00 /arch/x86/kernel
parentcfb581bcd4f8c158c6f2b48bf5e232bb9e6855c0 (diff)
parent57d54889cd00db2752994b389ba714138652e60c (diff)
Merge commit 'v2.6.34-rc1' into perf/urgent
Conflicts: tools/perf/util/probe-event.c Merge reason: Pick up -rc1 and resolve the conflict as well. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c34
-rw-r--r--arch/x86/kernel/alternative.c64
-rw-r--r--arch/x86/kernel/apb_timer.c784
-rw-r--r--arch/x86/kernel/apic/apic.c10
-rw-r--r--arch/x86/kernel/apic/io_apic.c342
-rw-r--r--arch/x86/kernel/apic/nmi.c14
-rw-r--r--arch/x86/kernel/apic/numaq_32.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c89
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c39
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c620
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c336
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c208
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c7
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h6
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/e820.c357
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/ftrace.c36
-rw-r--r--arch/x86/kernel/head32.c10
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i387.c71
-rw-r--r--arch/x86/kernel/i8259.c94
-rw-r--r--arch/x86/kernel/irqinit.c36
-rw-r--r--arch/x86/kernel/kprobes.c609
-rw-r--r--arch/x86/kernel/microcode_intel.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c7
-rw-r--r--arch/x86/kernel/mrst.c216
-rw-r--r--arch/x86/kernel/olpc.c10
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/ptrace.c34
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c31
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/smpboot.c23
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/uv_sysfs.c6
-rw-r--r--arch/x86/kernel/visws_quirks.c27
-rw-r--r--arch/x86/kernel/vmi_32.c35
-rw-r--r--arch/x86/kernel/vmiclock_32.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c11
-rw-r--r--arch/x86/kernel/xsave.c1
66 files changed, 3216 insertions, 1190 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d87f09bc5a52..4c58352209e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88 88
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o
90 91
91obj-$(CONFIG_K8_NB) += k8.o 92obj-$(CONFIG_K8_NB) += k8.o
92obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index af1c5833ff23..a54d714545ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
35#include <linux/ioport.h> 35#include <linux/ioport.h>
36#include <linux/pci.h> 36#include <linux/pci.h>
37 37
38#include <asm/pci_x86.h>
38#include <asm/pgtable.h> 39#include <asm/pgtable.h>
39#include <asm/io_apic.h> 40#include <asm/io_apic.h>
40#include <asm/apic.h> 41#include <asm/apic.h>
@@ -49,6 +50,7 @@ EXPORT_SYMBOL(acpi_disabled);
49 50
50#ifdef CONFIG_X86_64 51#ifdef CONFIG_X86_64
51# include <asm/proto.h> 52# include <asm/proto.h>
53# include <asm/numa_64.h>
52#endif /* X86 */ 54#endif /* X86 */
53 55
54#define BAD_MADT_ENTRY(entry, end) ( \ 56#define BAD_MADT_ENTRY(entry, end) ( \
@@ -446,6 +448,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
446int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) 448int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
447{ 449{
448 *irq = gsi; 450 *irq = gsi;
451
452#ifdef CONFIG_X86_IO_APIC
453 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
454 setup_IO_APIC_irq_extra(gsi);
455#endif
456
449 return 0; 457 return 0;
450} 458}
451 459
@@ -473,7 +481,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
473 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); 481 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
474 } 482 }
475#endif 483#endif
476 acpi_gsi_to_irq(plat_gsi, &irq); 484 irq = plat_gsi;
485
477 return irq; 486 return irq;
478} 487}
479 488
@@ -482,6 +491,25 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
482 */ 491 */
483#ifdef CONFIG_ACPI_HOTPLUG_CPU 492#ifdef CONFIG_ACPI_HOTPLUG_CPU
484 493
494static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
495{
496#ifdef CONFIG_ACPI_NUMA
497 int nid;
498
499 nid = acpi_get_node(handle);
500 if (nid == -1 || !node_online(nid))
501 return;
502#ifdef CONFIG_X86_64
503 apicid_to_node[physid] = nid;
504 numa_set_node(cpu, nid);
505#else /* CONFIG_X86_32 */
506 apicid_2_node[physid] = nid;
507 cpu_to_node_map[cpu] = nid;
508#endif
509
510#endif
511}
512
485static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) 513static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
486{ 514{
487 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 515 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -540,6 +568,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
540 } 568 }
541 569
542 cpu = cpumask_first(new_map); 570 cpu = cpumask_first(new_map);
571 acpi_map_cpu2node(handle, cpu, physid);
543 572
544 *pcpu = cpu; 573 *pcpu = cpu;
545 retval = 0; 574 retval = 0;
@@ -1596,6 +1625,9 @@ int __init acpi_boot_init(void)
1596 1625
1597 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); 1626 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
1598 1627
1628 if (!acpi_noirq)
1629 x86_init.pci.init = pci_acpi_init;
1630
1599 return 0; 1631 return 0;
1600} 1632}
1601 1633
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e63b80e5861c..3a4bf35c179b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
7#include <linux/mm.h> 7#include <linux/mm.h>
8#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
9#include <linux/memory.h> 9#include <linux/memory.h>
10#include <linux/stop_machine.h>
10#include <asm/alternative.h> 11#include <asm/alternative.h>
11#include <asm/sections.h> 12#include <asm/sections.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
@@ -205,7 +206,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
205 struct alt_instr *end) 206 struct alt_instr *end)
206{ 207{
207 struct alt_instr *a; 208 struct alt_instr *a;
208 char insnbuf[MAX_PATCH_LEN]; 209 u8 insnbuf[MAX_PATCH_LEN];
209 210
210 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 211 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
211 for (a = start; a < end; a++) { 212 for (a = start; a < end; a++) {
@@ -223,6 +224,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
223 } 224 }
224#endif 225#endif
225 memcpy(insnbuf, a->replacement, a->replacementlen); 226 memcpy(insnbuf, a->replacement, a->replacementlen);
227 if (*insnbuf == 0xe8 && a->replacementlen == 5)
228 *(s32 *)(insnbuf + 1) += a->replacement - a->instr;
226 add_nops(insnbuf + a->replacementlen, 229 add_nops(insnbuf + a->replacementlen,
227 a->instrlen - a->replacementlen); 230 a->instrlen - a->replacementlen);
228 text_poke_early(instr, insnbuf, a->instrlen); 231 text_poke_early(instr, insnbuf, a->instrlen);
@@ -570,3 +573,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
570 local_irq_restore(flags); 573 local_irq_restore(flags);
571 return addr; 574 return addr;
572} 575}
576
577/*
578 * Cross-modifying kernel text with stop_machine().
579 * This code originally comes from immediate value.
580 */
581static atomic_t stop_machine_first;
582static int wrote_text;
583
584struct text_poke_params {
585 void *addr;
586 const void *opcode;
587 size_t len;
588};
589
590static int __kprobes stop_machine_text_poke(void *data)
591{
592 struct text_poke_params *tpp = data;
593
594 if (atomic_dec_and_test(&stop_machine_first)) {
595 text_poke(tpp->addr, tpp->opcode, tpp->len);
596 smp_wmb(); /* Make sure other cpus see that this has run */
597 wrote_text = 1;
598 } else {
599 while (!wrote_text)
600 cpu_relax();
601 smp_mb(); /* Load wrote_text before following execution */
602 }
603
604 flush_icache_range((unsigned long)tpp->addr,
605 (unsigned long)tpp->addr + tpp->len);
606 return 0;
607}
608
609/**
610 * text_poke_smp - Update instructions on a live kernel on SMP
611 * @addr: address to modify
612 * @opcode: source of the copy
613 * @len: length to copy
614 *
615 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
616 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
617 * should be allowed, since stop_machine() does _not_ protect code against
618 * NMI and MCE.
619 *
620 * Note: Must be called under get_online_cpus() and text_mutex.
621 */
622void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
623{
624 struct text_poke_params tpp;
625
626 tpp.addr = addr;
627 tpp.opcode = opcode;
628 tpp.len = len;
629 atomic_set(&stop_machine_first, 1);
630 wrote_text = 0;
631 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
632 return addr;
633}
634
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
new file mode 100644
index 000000000000..4b7099526d2c
--- /dev/null
+++ b/arch/x86/kernel/apb_timer.c
@@ -0,0 +1,784 @@
1/*
2 * apb_timer.c: Driver for Langwell APB timers
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 * Langwell is the south complex of Intel Moorestown MID platform. There are
14 * eight external timers in total that can be used by the operating system.
15 * The timer information, such as frequency and addresses, is provided to the
16 * OS via SFI tables.
17 * Timer interrupts are routed via FW/HW emulated IOAPIC independently via
18 * individual redirection table entries (RTE).
19 * Unlike HPET, there is no master counter, therefore one of the timers are
20 * used as clocksource. The overall allocation looks like:
21 * - timer 0 - NR_CPUs for per cpu timer
22 * - one timer for clocksource
23 * - one timer for watchdog driver.
24 * It is also worth notice that APB timer does not support true one-shot mode,
25 * free-running mode will be used here to emulate one-shot mode.
26 * APB timer can also be used as broadcast timer along with per cpu local APIC
27 * timer, but by default APB timer has higher rating than local APIC timers.
28 */
29
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/delay.h>
33#include <linux/errno.h>
34#include <linux/init.h>
35#include <linux/sysdev.h>
36#include <linux/pm.h>
37#include <linux/pci.h>
38#include <linux/sfi.h>
39#include <linux/interrupt.h>
40#include <linux/cpu.h>
41#include <linux/irq.h>
42
43#include <asm/fixmap.h>
44#include <asm/apb_timer.h>
45
46#define APBT_MASK CLOCKSOURCE_MASK(32)
47#define APBT_SHIFT 22
48#define APBT_CLOCKEVENT_RATING 150
49#define APBT_CLOCKSOURCE_RATING 250
50#define APBT_MIN_DELTA_USEC 200
51
52#define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt)
53#define APBT_CLOCKEVENT0_NUM (0)
54#define APBT_CLOCKEVENT1_NUM (1)
55#define APBT_CLOCKSOURCE_NUM (2)
56
57static unsigned long apbt_address;
58static int apb_timer_block_enabled;
59static void __iomem *apbt_virt_address;
60static int phy_cs_timer_id;
61
62/*
63 * Common DW APB timer info
64 */
65static uint64_t apbt_freq;
66
67static void apbt_set_mode(enum clock_event_mode mode,
68 struct clock_event_device *evt);
69static int apbt_next_event(unsigned long delta,
70 struct clock_event_device *evt);
71static cycle_t apbt_read_clocksource(struct clocksource *cs);
72static void apbt_restart_clocksource(struct clocksource *cs);
73
74struct apbt_dev {
75 struct clock_event_device evt;
76 unsigned int num;
77 int cpu;
78 unsigned int irq;
79 unsigned int tick;
80 unsigned int count;
81 unsigned int flags;
82 char name[10];
83};
84
85int disable_apbt_percpu __cpuinitdata;
86
87static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
88
89#ifdef CONFIG_SMP
90static unsigned int apbt_num_timers_used;
91static struct apbt_dev *apbt_devs;
92#endif
93
94static inline unsigned long apbt_readl_reg(unsigned long a)
95{
96 return readl(apbt_virt_address + a);
97}
98
99static inline void apbt_writel_reg(unsigned long d, unsigned long a)
100{
101 writel(d, apbt_virt_address + a);
102}
103
104static inline unsigned long apbt_readl(int n, unsigned long a)
105{
106 return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE);
107}
108
109static inline void apbt_writel(int n, unsigned long d, unsigned long a)
110{
111 writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE);
112}
113
114static inline void apbt_set_mapping(void)
115{
116 struct sfi_timer_table_entry *mtmr;
117
118 if (apbt_virt_address) {
119 pr_debug("APBT base already mapped\n");
120 return;
121 }
122 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
123 if (mtmr == NULL) {
124 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
125 APBT_CLOCKEVENT0_NUM);
126 return;
127 }
128 apbt_address = (unsigned long)mtmr->phys_addr;
129 if (!apbt_address) {
130 printk(KERN_WARNING "No timer base from SFI, use default\n");
131 apbt_address = APBT_DEFAULT_BASE;
132 }
133 apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE);
134 if (apbt_virt_address) {
135 pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\
136 (void *)apbt_address, (void *)apbt_virt_address);
137 } else {
138 pr_debug("Failed mapping APBT phy address at %p\n",\
139 (void *)apbt_address);
140 goto panic_noapbt;
141 }
142 apbt_freq = mtmr->freq_hz / USEC_PER_SEC;
143 sfi_free_mtmr(mtmr);
144
145 /* Now figure out the physical timer id for clocksource device */
146 mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
147 if (mtmr == NULL)
148 goto panic_noapbt;
149
150 /* Now figure out the physical timer id */
151 phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff)
152 / APBTMRS_REG_SIZE;
153 pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id);
154 return;
155
156panic_noapbt:
157 panic("Failed to setup APB system timer\n");
158
159}
160
161static inline void apbt_clear_mapping(void)
162{
163 iounmap(apbt_virt_address);
164 apbt_virt_address = NULL;
165}
166
167/*
168 * APBT timer interrupt enable / disable
169 */
170static inline int is_apbt_capable(void)
171{
172 return apbt_virt_address ? 1 : 0;
173}
174
175static struct clocksource clocksource_apbt = {
176 .name = "apbt",
177 .rating = APBT_CLOCKSOURCE_RATING,
178 .read = apbt_read_clocksource,
179 .mask = APBT_MASK,
180 .shift = APBT_SHIFT,
181 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
182 .resume = apbt_restart_clocksource,
183};
184
185/* boot APB clock event device */
186static struct clock_event_device apbt_clockevent = {
187 .name = "apbt0",
188 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
189 .set_mode = apbt_set_mode,
190 .set_next_event = apbt_next_event,
191 .shift = APBT_SHIFT,
192 .irq = 0,
193 .rating = APBT_CLOCKEVENT_RATING,
194};
195
196/*
197 * if user does not want to use per CPU apb timer, just give it a lower rating
198 * than local apic timer and skip the late per cpu timer init.
199 */
200static inline int __init setup_x86_mrst_timer(char *arg)
201{
202 if (!arg)
203 return -EINVAL;
204
205 if (strcmp("apbt_only", arg) == 0)
206 disable_apbt_percpu = 0;
207 else if (strcmp("lapic_and_apbt", arg) == 0)
208 disable_apbt_percpu = 1;
209 else {
210 pr_warning("X86 MRST timer option %s not recognised"
211 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
212 arg);
213 return -EINVAL;
214 }
215 return 0;
216}
217__setup("x86_mrst_timer=", setup_x86_mrst_timer);
218
219/*
220 * start count down from 0xffff_ffff. this is done by toggling the enable bit
221 * then load initial load count to ~0.
222 */
223static void apbt_start_counter(int n)
224{
225 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
226
227 ctrl &= ~APBTMR_CONTROL_ENABLE;
228 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
229 apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT);
230 /* enable, mask interrupt */
231 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
232 ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT);
233 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
234 /* read it once to get cached counter value initialized */
235 apbt_read_clocksource(&clocksource_apbt);
236}
237
238static irqreturn_t apbt_interrupt_handler(int irq, void *data)
239{
240 struct apbt_dev *dev = (struct apbt_dev *)data;
241 struct clock_event_device *aevt = &dev->evt;
242
243 if (!aevt->event_handler) {
244 printk(KERN_INFO "Spurious APBT timer interrupt on %d\n",
245 dev->num);
246 return IRQ_NONE;
247 }
248 aevt->event_handler(aevt);
249 return IRQ_HANDLED;
250}
251
252static void apbt_restart_clocksource(struct clocksource *cs)
253{
254 apbt_start_counter(phy_cs_timer_id);
255}
256
257/* Setup IRQ routing via IOAPIC */
258#ifdef CONFIG_SMP
259static void apbt_setup_irq(struct apbt_dev *adev)
260{
261 struct irq_chip *chip;
262 struct irq_desc *desc;
263
264 /* timer0 irq has been setup early */
265 if (adev->irq == 0)
266 return;
267 desc = irq_to_desc(adev->irq);
268 chip = get_irq_chip(adev->irq);
269 disable_irq(adev->irq);
270 desc->status |= IRQ_MOVE_PCNTXT;
271 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
272 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
273 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
274 enable_irq(adev->irq);
275 if (system_state == SYSTEM_BOOTING)
276 if (request_irq(adev->irq, apbt_interrupt_handler,
277 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
278 adev->name, adev)) {
279 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
280 adev->num);
281 }
282}
283#endif
284
285static void apbt_enable_int(int n)
286{
287 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
288 /* clear pending intr */
289 apbt_readl(n, APBTMR_N_EOI);
290 ctrl &= ~APBTMR_CONTROL_INT;
291 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
292}
293
294static void apbt_disable_int(int n)
295{
296 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
297
298 ctrl |= APBTMR_CONTROL_INT;
299 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
300}
301
302
303static int __init apbt_clockevent_register(void)
304{
305 struct sfi_timer_table_entry *mtmr;
306 struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev);
307
308 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
309 if (mtmr == NULL) {
310 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
311 APBT_CLOCKEVENT0_NUM);
312 return -ENODEV;
313 }
314
315 /*
316 * We need to calculate the scaled math multiplication factor for
317 * nanosecond to apbt tick conversion.
318 * mult = (nsec/cycle)*2^APBT_SHIFT
319 */
320 apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz
321 , NSEC_PER_SEC, APBT_SHIFT);
322
323 /* Calculate the min / max delta */
324 apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
325 &apbt_clockevent);
326 apbt_clockevent.min_delta_ns = clockevent_delta2ns(
327 APBT_MIN_DELTA_USEC*apbt_freq,
328 &apbt_clockevent);
329 /*
330 * Start apbt with the boot cpu mask and make it
331 * global if not used for per cpu timer.
332 */
333 apbt_clockevent.cpumask = cpumask_of(smp_processor_id());
334 adev->num = smp_processor_id();
335 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
336
337 if (disable_apbt_percpu) {
338 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
339 global_clock_event = &adev->evt;
340 printk(KERN_DEBUG "%s clockevent registered as global\n",
341 global_clock_event->name);
342 }
343
344 if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler,
345 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
346 apbt_clockevent.name, adev)) {
347 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
348 apbt_clockevent.irq);
349 }
350
351 clockevents_register_device(&adev->evt);
352 /* Start APBT 0 interrupts */
353 apbt_enable_int(APBT_CLOCKEVENT0_NUM);
354
355 sfi_free_mtmr(mtmr);
356 return 0;
357}
358
359#ifdef CONFIG_SMP
360/* Should be called with per cpu */
361void apbt_setup_secondary_clock(void)
362{
363 struct apbt_dev *adev;
364 struct clock_event_device *aevt;
365 int cpu;
366
367 /* Don't register boot CPU clockevent */
368 cpu = smp_processor_id();
369 if (cpu == boot_cpu_id)
370 return;
371 /*
372 * We need to calculate the scaled math multiplication factor for
373 * nanosecond to apbt tick conversion.
374 * mult = (nsec/cycle)*2^APBT_SHIFT
375 */
376 printk(KERN_INFO "Init per CPU clockevent %d\n", cpu);
377 adev = &per_cpu(cpu_apbt_dev, cpu);
378 aevt = &adev->evt;
379
380 memcpy(aevt, &apbt_clockevent, sizeof(*aevt));
381 aevt->cpumask = cpumask_of(cpu);
382 aevt->name = adev->name;
383 aevt->mode = CLOCK_EVT_MODE_UNUSED;
384
385 printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n",
386 cpu, aevt->name, *(u32 *)aevt->cpumask);
387
388 apbt_setup_irq(adev);
389
390 clockevents_register_device(aevt);
391
392 apbt_enable_int(cpu);
393
394 return;
395}
396
397/*
398 * this notify handler process CPU hotplug events. in case of S0i3, nonboot
399 * cpus are disabled/enabled frequently, for performance reasons, we keep the
400 * per cpu timer irq registered so that we do need to do free_irq/request_irq.
401 *
402 * TODO: it might be more reliable to directly disable percpu clockevent device
403 * without the notifier chain. currently, cpu 0 may get interrupts from other
404 * cpu timers during the offline process due to the ordering of notification.
405 * the extra interrupt is harmless.
406 */
407static int apbt_cpuhp_notify(struct notifier_block *n,
408 unsigned long action, void *hcpu)
409{
410 unsigned long cpu = (unsigned long)hcpu;
411 struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
412
413 switch (action & 0xf) {
414 case CPU_DEAD:
415 apbt_disable_int(cpu);
416 if (system_state == SYSTEM_RUNNING)
417 pr_debug("skipping APBT CPU %lu offline\n", cpu);
418 else if (adev) {
419 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
420 free_irq(adev->irq, adev);
421 }
422 break;
423 default:
424 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action);
425 }
426 return NOTIFY_OK;
427}
428
429static __init int apbt_late_init(void)
430{
431 if (disable_apbt_percpu)
432 return 0;
433 /* This notifier should be called after workqueue is ready */
434 hotcpu_notifier(apbt_cpuhp_notify, -20);
435 return 0;
436}
437fs_initcall(apbt_late_init);
438#else
439
440void apbt_setup_secondary_clock(void) {}
441
442#endif /* CONFIG_SMP */
443
444static void apbt_set_mode(enum clock_event_mode mode,
445 struct clock_event_device *evt)
446{
447 unsigned long ctrl;
448 uint64_t delta;
449 int timer_num;
450 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
451
452 timer_num = adev->num;
453 pr_debug("%s CPU %d timer %d mode=%d\n",
454 __func__, first_cpu(*evt->cpumask), timer_num, mode);
455
456 switch (mode) {
457 case CLOCK_EVT_MODE_PERIODIC:
458 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult;
459 delta >>= apbt_clockevent.shift;
460 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
461 ctrl |= APBTMR_CONTROL_MODE_PERIODIC;
462 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
463 /*
464 * DW APB p. 46, have to disable timer before load counter,
465 * may cause sync problem.
466 */
467 ctrl &= ~APBTMR_CONTROL_ENABLE;
468 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
469 udelay(1);
470 pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ);
471 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
472 ctrl |= APBTMR_CONTROL_ENABLE;
473 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
474 break;
475 /* APB timer does not have one-shot mode, use free running mode */
476 case CLOCK_EVT_MODE_ONESHOT:
477 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
478 /*
479 * set free running mode, this mode will let timer reload max
480 * timeout which will give time (3min on 25MHz clock) to rearm
481 * the next event, therefore emulate the one-shot mode.
482 */
483 ctrl &= ~APBTMR_CONTROL_ENABLE;
484 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
485
486 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
487 /* write again to set free running mode */
488 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
489
490 /*
491 * DW APB p. 46, load counter with all 1s before starting free
492 * running mode.
493 */
494 apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT);
495 ctrl &= ~APBTMR_CONTROL_INT;
496 ctrl |= APBTMR_CONTROL_ENABLE;
497 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
498 break;
499
500 case CLOCK_EVT_MODE_UNUSED:
501 case CLOCK_EVT_MODE_SHUTDOWN:
502 apbt_disable_int(timer_num);
503 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
504 ctrl &= ~APBTMR_CONTROL_ENABLE;
505 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
506 break;
507
508 case CLOCK_EVT_MODE_RESUME:
509 apbt_enable_int(timer_num);
510 break;
511 }
512}
513
514static int apbt_next_event(unsigned long delta,
515 struct clock_event_device *evt)
516{
517 unsigned long ctrl;
518 int timer_num;
519
520 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
521
522 timer_num = adev->num;
523 /* Disable timer */
524 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
525 ctrl &= ~APBTMR_CONTROL_ENABLE;
526 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
527 /* write new count */
528 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
529 ctrl |= APBTMR_CONTROL_ENABLE;
530 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
531 return 0;
532}
533
534/*
535 * APB timer clock is not in sync with pclk on Langwell, which translates to
536 * unreliable read value caused by sampling error. the error does not add up
537 * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
538 * would go backwards. the following code is trying to prevent time traveling
539 * backwards. little bit paranoid.
540 */
541static cycle_t apbt_read_clocksource(struct clocksource *cs)
542{
543 unsigned long t0, t1, t2;
544 static unsigned long last_read;
545
546bad_count:
547 t1 = apbt_readl(phy_cs_timer_id,
548 APBTMR_N_CURRENT_VALUE);
549 t2 = apbt_readl(phy_cs_timer_id,
550 APBTMR_N_CURRENT_VALUE);
551 if (unlikely(t1 < t2)) {
552 pr_debug("APBT: read current count error %lx:%lx:%lx\n",
553 t1, t2, t2 - t1);
554 goto bad_count;
555 }
556 /*
557 * check against cached last read, makes sure time does not go back.
558 * it could be a normal rollover but we will do tripple check anyway
559 */
560 if (unlikely(t2 > last_read)) {
561 /* check if we have a normal rollover */
562 unsigned long raw_intr_status =
563 apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
564 /*
565 * cs timer interrupt is masked but raw intr bit is set if
566 * rollover occurs. then we read EOI reg to clear it.
567 */
568 if (raw_intr_status & (1 << phy_cs_timer_id)) {
569 apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
570 goto out;
571 }
572 pr_debug("APB CS going back %lx:%lx:%lx ",
573 t2, last_read, t2 - last_read);
574bad_count_x3:
575 pr_debug(KERN_INFO "tripple check enforced\n");
576 t0 = apbt_readl(phy_cs_timer_id,
577 APBTMR_N_CURRENT_VALUE);
578 udelay(1);
579 t1 = apbt_readl(phy_cs_timer_id,
580 APBTMR_N_CURRENT_VALUE);
581 udelay(1);
582 t2 = apbt_readl(phy_cs_timer_id,
583 APBTMR_N_CURRENT_VALUE);
584 if ((t2 > t1) || (t1 > t0)) {
585 printk(KERN_ERR "Error: APB CS tripple check failed\n");
586 goto bad_count_x3;
587 }
588 }
589out:
590 last_read = t2;
591 return (cycle_t)~t2;
592}
593
594static int apbt_clocksource_register(void)
595{
596 u64 start, now;
597 cycle_t t1;
598
599 /* Start the counter, use timer 2 as source, timer 0/1 for event */
600 apbt_start_counter(phy_cs_timer_id);
601
602 /* Verify whether apbt counter works */
603 t1 = apbt_read_clocksource(&clocksource_apbt);
604 rdtscll(start);
605
606 /*
607 * We don't know the TSC frequency yet, but waiting for
608 * 200000 TSC cycles is safe:
609 * 4 GHz == 50us
610 * 1 GHz == 200us
611 */
612 do {
613 rep_nop();
614 rdtscll(now);
615 } while ((now - start) < 200000UL);
616
617 /* APBT is the only always on clocksource, it has to work! */
618 if (t1 == apbt_read_clocksource(&clocksource_apbt))
619 panic("APBT counter not counting. APBT disabled\n");
620
621 /*
622 * initialize and register APBT clocksource
623 * convert that to ns/clock cycle
624 * mult = (ns/c) * 2^APBT_SHIFT
625 */
626 clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
627 (unsigned long) apbt_freq, APBT_SHIFT);
628 clocksource_register(&clocksource_apbt);
629
630 return 0;
631}
632
633/*
634 * Early setup the APBT timer, only use timer 0 for booting then switch to
635 * per CPU timer if possible.
636 * returns 1 if per cpu apbt is setup
637 * returns 0 if no per cpu apbt is chosen
638 * panic if set up failed, this is the only platform timer on Moorestown.
639 */
640void __init apbt_time_init(void)
641{
642#ifdef CONFIG_SMP
643 int i;
644 struct sfi_timer_table_entry *p_mtmr;
645 unsigned int percpu_timer;
646 struct apbt_dev *adev;
647#endif
648
649 if (apb_timer_block_enabled)
650 return;
651 apbt_set_mapping();
652 if (apbt_virt_address) {
653 pr_debug("Found APBT version 0x%lx\n",\
654 apbt_readl_reg(APBTMRS_COMP_VERSION));
655 } else
656 goto out_noapbt;
657 /*
658 * Read the frequency and check for a sane value, for ESL model
659 * we extend the possible clock range to allow time scaling.
660 */
661
662 if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
663 pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq);
664 goto out_noapbt;
665 }
666 if (apbt_clocksource_register()) {
667 pr_debug("APBT has failed to register clocksource\n");
668 goto out_noapbt;
669 }
670 if (!apbt_clockevent_register())
671 apb_timer_block_enabled = 1;
672 else {
673 pr_debug("APBT has failed to register clockevent\n");
674 goto out_noapbt;
675 }
676#ifdef CONFIG_SMP
677 /* kernel cmdline disable apb timer, so we will use lapic timers */
678 if (disable_apbt_percpu) {
679 printk(KERN_INFO "apbt: disabled per cpu timer\n");
680 return;
681 }
682 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
683 if (num_possible_cpus() <= sfi_mtimer_num) {
684 percpu_timer = 1;
685 apbt_num_timers_used = num_possible_cpus();
686 } else {
687 percpu_timer = 0;
688 apbt_num_timers_used = 1;
689 adev = &per_cpu(cpu_apbt_dev, 0);
690 adev->flags &= ~APBT_DEV_USED;
691 }
692 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
693
694 /* here we set up per CPU timer data structure */
695 apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used,
696 GFP_KERNEL);
697 if (!apbt_devs) {
698 printk(KERN_ERR "Failed to allocate APB timer devices\n");
699 return;
700 }
701 for (i = 0; i < apbt_num_timers_used; i++) {
702 adev = &per_cpu(cpu_apbt_dev, i);
703 adev->num = i;
704 adev->cpu = i;
705 p_mtmr = sfi_get_mtmr(i);
706 if (p_mtmr) {
707 adev->tick = p_mtmr->freq_hz;
708 adev->irq = p_mtmr->irq;
709 } else
710 printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
711 adev->count = 0;
712 sprintf(adev->name, "apbt%d", i);
713 }
714#endif
715
716 return;
717
718out_noapbt:
719 apbt_clear_mapping();
720 apb_timer_block_enabled = 0;
721 panic("failed to enable APB timer\n");
722}
723
724static inline void apbt_disable(int n)
725{
726 if (is_apbt_capable()) {
727 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
728 ctrl &= ~APBTMR_CONTROL_ENABLE;
729 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
730 }
731}
732
733/* called before apb_timer_enable, use early map */
734unsigned long apbt_quick_calibrate()
735{
736 int i, scale;
737 u64 old, new;
738 cycle_t t1, t2;
739 unsigned long khz = 0;
740 u32 loop, shift;
741
742 apbt_set_mapping();
743 apbt_start_counter(phy_cs_timer_id);
744
745 /* check if the timer can count down, otherwise return */
746 old = apbt_read_clocksource(&clocksource_apbt);
747 i = 10000;
748 while (--i) {
749 if (old != apbt_read_clocksource(&clocksource_apbt))
750 break;
751 }
752 if (!i)
753 goto failed;
754
755 /* count 16 ms */
756 loop = (apbt_freq * 1000) << 4;
757
758 /* restart the timer to ensure it won't get to 0 in the calibration */
759 apbt_start_counter(phy_cs_timer_id);
760
761 old = apbt_read_clocksource(&clocksource_apbt);
762 old += loop;
763
764 t1 = __native_read_tsc();
765
766 do {
767 new = apbt_read_clocksource(&clocksource_apbt);
768 } while (new < old);
769
770 t2 = __native_read_tsc();
771
772 shift = 5;
773 if (unlikely(loop >> shift == 0)) {
774 printk(KERN_INFO
775 "APBT TSC calibration failed, not enough resolution\n");
776 return 0;
777 }
778 scale = (int)div_u64((t2 - t1), loop >> shift);
779 khz = (scale * apbt_freq * 1000) >> shift;
780 printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
781 return khz;
782failed:
783 return 0;
784}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dfca210f6a10..00187f1fcfb7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -581,7 +581,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
581 res = (((u64)(*deltatsc)) * pm_100ms); 581 res = (((u64)(*deltatsc)) * pm_100ms);
582 do_div(res, deltapm); 582 do_div(res, deltapm);
583 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 583 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
584 "PM-Timer: %lu (%ld) \n", 584 "PM-Timer: %lu (%ld)\n",
585 (unsigned long)res, *deltatsc); 585 (unsigned long)res, *deltatsc);
586 *deltatsc = (long)res; 586 *deltatsc = (long)res;
587 } 587 }
@@ -1390,7 +1390,7 @@ void __init enable_IR_x2apic(void)
1390 } 1390 }
1391 1391
1392 local_irq_save(flags); 1392 local_irq_save(flags);
1393 mask_8259A(); 1393 legacy_pic->mask_all();
1394 mask_IO_APIC_setup(ioapic_entries); 1394 mask_IO_APIC_setup(ioapic_entries);
1395 1395
1396 if (dmar_table_init_ret) 1396 if (dmar_table_init_ret)
@@ -1422,7 +1422,7 @@ void __init enable_IR_x2apic(void)
1422nox2apic: 1422nox2apic:
1423 if (!ret) /* IR enabling failed */ 1423 if (!ret) /* IR enabling failed */
1424 restore_IO_APIC_setup(ioapic_entries); 1424 restore_IO_APIC_setup(ioapic_entries);
1425 unmask_8259A(); 1425 legacy_pic->restore_mask();
1426 local_irq_restore(flags); 1426 local_irq_restore(flags);
1427 1427
1428out: 1428out:
@@ -2018,7 +2018,7 @@ static int lapic_resume(struct sys_device *dev)
2018 } 2018 }
2019 2019
2020 mask_IO_APIC_setup(ioapic_entries); 2020 mask_IO_APIC_setup(ioapic_entries);
2021 mask_8259A(); 2021 legacy_pic->mask_all();
2022 } 2022 }
2023 2023
2024 if (x2apic_mode) 2024 if (x2apic_mode)
@@ -2062,7 +2062,7 @@ static int lapic_resume(struct sys_device *dev)
2062 2062
2063 if (intr_remapping_enabled) { 2063 if (intr_remapping_enabled) {
2064 reenable_intr_remapping(x2apic_mode); 2064 reenable_intr_remapping(x2apic_mode);
2065 unmask_8259A(); 2065 legacy_pic->restore_mask();
2066 restore_IO_APIC_setup(ioapic_entries); 2066 restore_IO_APIC_setup(ioapic_entries);
2067 free_ioapic_entries(ioapic_entries); 2067 free_ioapic_entries(ioapic_entries);
2068 } 2068 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..e4e0ddcb1546 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -73,8 +73,8 @@
73 */ 73 */
74int sis_apic_bug = -1; 74int sis_apic_bug = -1;
75 75
76static DEFINE_SPINLOCK(ioapic_lock); 76static DEFINE_RAW_SPINLOCK(ioapic_lock);
77static DEFINE_SPINLOCK(vector_lock); 77static DEFINE_RAW_SPINLOCK(vector_lock);
78 78
79/* 79/*
80 * # of IRQ routing registers 80 * # of IRQ routing registers
@@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
94/* # of MP IRQ source entries */ 94/* # of MP IRQ source entries */
95int mp_irq_entries; 95int mp_irq_entries;
96 96
97/* Number of legacy interrupts */
98static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
99/* GSI interrupts */ 97/* GSI interrupts */
100static int nr_irqs_gsi = NR_IRQS_LEGACY; 98static int nr_irqs_gsi = NR_IRQS_LEGACY;
101 99
@@ -140,33 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
140 138
141/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
142#ifdef CONFIG_SPARSE_IRQ 140#ifdef CONFIG_SPARSE_IRQ
143static struct irq_cfg irq_cfgx[] = { 141static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
144#else 142#else
145static struct irq_cfg irq_cfgx[NR_IRQS] = { 143static struct irq_cfg irq_cfgx[NR_IRQS];
146#endif 144#endif
147 [0] = { .vector = IRQ0_VECTOR, },
148 [1] = { .vector = IRQ1_VECTOR, },
149 [2] = { .vector = IRQ2_VECTOR, },
150 [3] = { .vector = IRQ3_VECTOR, },
151 [4] = { .vector = IRQ4_VECTOR, },
152 [5] = { .vector = IRQ5_VECTOR, },
153 [6] = { .vector = IRQ6_VECTOR, },
154 [7] = { .vector = IRQ7_VECTOR, },
155 [8] = { .vector = IRQ8_VECTOR, },
156 [9] = { .vector = IRQ9_VECTOR, },
157 [10] = { .vector = IRQ10_VECTOR, },
158 [11] = { .vector = IRQ11_VECTOR, },
159 [12] = { .vector = IRQ12_VECTOR, },
160 [13] = { .vector = IRQ13_VECTOR, },
161 [14] = { .vector = IRQ14_VECTOR, },
162 [15] = { .vector = IRQ15_VECTOR, },
163};
164
165void __init io_apic_disable_legacy(void)
166{
167 nr_legacy_irqs = 0;
168 nr_irqs_gsi = 0;
169}
170 145
171int __init arch_early_irq_init(void) 146int __init arch_early_irq_init(void)
172{ 147{
@@ -176,6 +151,11 @@ int __init arch_early_irq_init(void)
176 int node; 151 int node;
177 int i; 152 int i;
178 153
154 if (!legacy_pic->nr_legacy_irqs) {
155 nr_irqs_gsi = 0;
156 io_apic_irqs = ~0UL;
157 }
158
179 cfg = irq_cfgx; 159 cfg = irq_cfgx;
180 count = ARRAY_SIZE(irq_cfgx); 160 count = ARRAY_SIZE(irq_cfgx);
181 node= cpu_to_node(boot_cpu_id); 161 node= cpu_to_node(boot_cpu_id);
@@ -185,8 +165,14 @@ int __init arch_early_irq_init(void)
185 desc->chip_data = &cfg[i]; 165 desc->chip_data = &cfg[i];
186 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 166 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
187 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); 167 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
188 if (i < nr_legacy_irqs) 168 /*
189 cpumask_setall(cfg[i].domain); 169 * For legacy IRQ's, start with assigning irq0 to irq15 to
170 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
171 */
172 if (i < legacy_pic->nr_legacy_irqs) {
173 cfg[i].vector = IRQ0_VECTOR + i;
174 cpumask_set_cpu(0, cfg[i].domain);
175 }
190 } 176 }
191 177
192 return 0; 178 return 0;
@@ -406,7 +392,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
406 struct irq_pin_list *entry; 392 struct irq_pin_list *entry;
407 unsigned long flags; 393 unsigned long flags;
408 394
409 spin_lock_irqsave(&ioapic_lock, flags); 395 raw_spin_lock_irqsave(&ioapic_lock, flags);
410 for_each_irq_pin(entry, cfg->irq_2_pin) { 396 for_each_irq_pin(entry, cfg->irq_2_pin) {
411 unsigned int reg; 397 unsigned int reg;
412 int pin; 398 int pin;
@@ -415,11 +401,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
415 reg = io_apic_read(entry->apic, 0x10 + pin*2); 401 reg = io_apic_read(entry->apic, 0x10 + pin*2);
416 /* Is the remote IRR bit set? */ 402 /* Is the remote IRR bit set? */
417 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 403 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
418 spin_unlock_irqrestore(&ioapic_lock, flags); 404 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
419 return true; 405 return true;
420 } 406 }
421 } 407 }
422 spin_unlock_irqrestore(&ioapic_lock, flags); 408 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
423 409
424 return false; 410 return false;
425} 411}
@@ -433,10 +419,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
433{ 419{
434 union entry_union eu; 420 union entry_union eu;
435 unsigned long flags; 421 unsigned long flags;
436 spin_lock_irqsave(&ioapic_lock, flags); 422 raw_spin_lock_irqsave(&ioapic_lock, flags);
437 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 423 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
438 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 424 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
439 spin_unlock_irqrestore(&ioapic_lock, flags); 425 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
440 return eu.entry; 426 return eu.entry;
441} 427}
442 428
@@ -459,9 +445,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
459void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 445void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
460{ 446{
461 unsigned long flags; 447 unsigned long flags;
462 spin_lock_irqsave(&ioapic_lock, flags); 448 raw_spin_lock_irqsave(&ioapic_lock, flags);
463 __ioapic_write_entry(apic, pin, e); 449 __ioapic_write_entry(apic, pin, e);
464 spin_unlock_irqrestore(&ioapic_lock, flags); 450 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
465} 451}
466 452
467/* 453/*
@@ -474,10 +460,10 @@ static void ioapic_mask_entry(int apic, int pin)
474 unsigned long flags; 460 unsigned long flags;
475 union entry_union eu = { .entry.mask = 1 }; 461 union entry_union eu = { .entry.mask = 1 };
476 462
477 spin_lock_irqsave(&ioapic_lock, flags); 463 raw_spin_lock_irqsave(&ioapic_lock, flags);
478 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 464 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
479 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 465 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
480 spin_unlock_irqrestore(&ioapic_lock, flags); 466 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
481} 467}
482 468
483/* 469/*
@@ -604,9 +590,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
604 590
605 BUG_ON(!cfg); 591 BUG_ON(!cfg);
606 592
607 spin_lock_irqsave(&ioapic_lock, flags); 593 raw_spin_lock_irqsave(&ioapic_lock, flags);
608 __mask_IO_APIC_irq(cfg); 594 __mask_IO_APIC_irq(cfg);
609 spin_unlock_irqrestore(&ioapic_lock, flags); 595 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
610} 596}
611 597
612static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 598static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -614,9 +600,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
614 struct irq_cfg *cfg = desc->chip_data; 600 struct irq_cfg *cfg = desc->chip_data;
615 unsigned long flags; 601 unsigned long flags;
616 602
617 spin_lock_irqsave(&ioapic_lock, flags); 603 raw_spin_lock_irqsave(&ioapic_lock, flags);
618 __unmask_IO_APIC_irq(cfg); 604 __unmask_IO_APIC_irq(cfg);
619 spin_unlock_irqrestore(&ioapic_lock, flags); 605 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
620} 606}
621 607
622static void mask_IO_APIC_irq(unsigned int irq) 608static void mask_IO_APIC_irq(unsigned int irq)
@@ -865,7 +851,7 @@ static int __init find_isa_irq_apic(int irq, int type)
865 */ 851 */
866static int EISA_ELCR(unsigned int irq) 852static int EISA_ELCR(unsigned int irq)
867{ 853{
868 if (irq < nr_legacy_irqs) { 854 if (irq < legacy_pic->nr_legacy_irqs) {
869 unsigned int port = 0x4d0 + (irq >> 3); 855 unsigned int port = 0x4d0 + (irq >> 3);
870 return (inb(port) >> (irq & 7)) & 1; 856 return (inb(port) >> (irq & 7)) & 1;
871 } 857 }
@@ -1140,12 +1126,12 @@ void lock_vector_lock(void)
1140 /* Used to the online set of cpus does not change 1126 /* Used to the online set of cpus does not change
1141 * during assign_irq_vector. 1127 * during assign_irq_vector.
1142 */ 1128 */
1143 spin_lock(&vector_lock); 1129 raw_spin_lock(&vector_lock);
1144} 1130}
1145 1131
1146void unlock_vector_lock(void) 1132void unlock_vector_lock(void)
1147{ 1133{
1148 spin_unlock(&vector_lock); 1134 raw_spin_unlock(&vector_lock);
1149} 1135}
1150 1136
1151static int 1137static int
@@ -1162,7 +1148,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1162 * Also, we've got to be careful not to trash gate 1148 * Also, we've got to be careful not to trash gate
1163 * 0x80, because int 0x80 is hm, kind of importantish. ;) 1149 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1164 */ 1150 */
1165 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1151 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1152 static int current_offset = VECTOR_OFFSET_START % 8;
1166 unsigned int old_vector; 1153 unsigned int old_vector;
1167 int cpu, err; 1154 int cpu, err;
1168 cpumask_var_t tmp_mask; 1155 cpumask_var_t tmp_mask;
@@ -1198,7 +1185,7 @@ next:
1198 if (vector >= first_system_vector) { 1185 if (vector >= first_system_vector) {
1199 /* If out of vectors on large boxen, must share them. */ 1186 /* If out of vectors on large boxen, must share them. */
1200 offset = (offset + 1) % 8; 1187 offset = (offset + 1) % 8;
1201 vector = FIRST_DEVICE_VECTOR + offset; 1188 vector = FIRST_EXTERNAL_VECTOR + offset;
1202 } 1189 }
1203 if (unlikely(current_vector == vector)) 1190 if (unlikely(current_vector == vector))
1204 continue; 1191 continue;
@@ -1232,9 +1219,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1232 int err; 1219 int err;
1233 unsigned long flags; 1220 unsigned long flags;
1234 1221
1235 spin_lock_irqsave(&vector_lock, flags); 1222 raw_spin_lock_irqsave(&vector_lock, flags);
1236 err = __assign_irq_vector(irq, cfg, mask); 1223 err = __assign_irq_vector(irq, cfg, mask);
1237 spin_unlock_irqrestore(&vector_lock, flags); 1224 raw_spin_unlock_irqrestore(&vector_lock, flags);
1238 return err; 1225 return err;
1239} 1226}
1240 1227
@@ -1268,11 +1255,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1268void __setup_vector_irq(int cpu) 1255void __setup_vector_irq(int cpu)
1269{ 1256{
1270 /* Initialize vector_irq on a new cpu */ 1257 /* Initialize vector_irq on a new cpu */
1271 /* This function must be called with vector_lock held */
1272 int irq, vector; 1258 int irq, vector;
1273 struct irq_cfg *cfg; 1259 struct irq_cfg *cfg;
1274 struct irq_desc *desc; 1260 struct irq_desc *desc;
1275 1261
1262 /*
1263 * vector_lock will make sure that we don't run into irq vector
1264 * assignments that might be happening on another cpu in parallel,
1265 * while we setup our initial vector to irq mappings.
1266 */
1267 raw_spin_lock(&vector_lock);
1276 /* Mark the inuse vectors */ 1268 /* Mark the inuse vectors */
1277 for_each_irq_desc(irq, desc) { 1269 for_each_irq_desc(irq, desc) {
1278 cfg = desc->chip_data; 1270 cfg = desc->chip_data;
@@ -1291,6 +1283,7 @@ void __setup_vector_irq(int cpu)
1291 if (!cpumask_test_cpu(cpu, cfg->domain)) 1283 if (!cpumask_test_cpu(cpu, cfg->domain))
1292 per_cpu(vector_irq, cpu)[vector] = -1; 1284 per_cpu(vector_irq, cpu)[vector] = -1;
1293 } 1285 }
1286 raw_spin_unlock(&vector_lock);
1294} 1287}
1295 1288
1296static struct irq_chip ioapic_chip; 1289static struct irq_chip ioapic_chip;
@@ -1440,6 +1433,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1440 1433
1441 cfg = desc->chip_data; 1434 cfg = desc->chip_data;
1442 1435
1436 /*
1437 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1438 * controllers like 8259. Now that IO-APIC can handle this irq, update
1439 * the cfg->domain.
1440 */
1441 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1442 apic->vector_allocation_domain(0, cfg->domain);
1443
1443 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1444 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1444 return; 1445 return;
1445 1446
@@ -1461,8 +1462,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1461 } 1462 }
1462 1463
1463 ioapic_register_intr(irq, desc, trigger); 1464 ioapic_register_intr(irq, desc, trigger);
1464 if (irq < nr_legacy_irqs) 1465 if (irq < legacy_pic->nr_legacy_irqs)
1465 disable_8259A_irq(irq); 1466 legacy_pic->chip->mask(irq);
1466 1467
1467 ioapic_write_entry(apic_id, pin, entry); 1468 ioapic_write_entry(apic_id, pin, entry);
1468} 1469}
@@ -1473,7 +1474,7 @@ static struct {
1473 1474
1474static void __init setup_IO_APIC_irqs(void) 1475static void __init setup_IO_APIC_irqs(void)
1475{ 1476{
1476 int apic_id = 0, pin, idx, irq; 1477 int apic_id, pin, idx, irq;
1477 int notcon = 0; 1478 int notcon = 0;
1478 struct irq_desc *desc; 1479 struct irq_desc *desc;
1479 struct irq_cfg *cfg; 1480 struct irq_cfg *cfg;
@@ -1481,14 +1482,7 @@ static void __init setup_IO_APIC_irqs(void)
1481 1482
1482 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1483 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1483 1484
1484#ifdef CONFIG_ACPI 1485 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1485 if (!acpi_disabled && acpi_ioapic) {
1486 apic_id = mp_find_ioapic(0);
1487 if (apic_id < 0)
1488 apic_id = 0;
1489 }
1490#endif
1491
1492 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1486 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1493 idx = find_irq_entry(apic_id, pin, mp_INT); 1487 idx = find_irq_entry(apic_id, pin, mp_INT);
1494 if (idx == -1) { 1488 if (idx == -1) {
@@ -1510,6 +1504,9 @@ static void __init setup_IO_APIC_irqs(void)
1510 1504
1511 irq = pin_2_irq(idx, apic_id, pin); 1505 irq = pin_2_irq(idx, apic_id, pin);
1512 1506
1507 if ((apic_id > 0) && (irq > 16))
1508 continue;
1509
1513 /* 1510 /*
1514 * Skip the timer IRQ if there's a quirk handler 1511 * Skip the timer IRQ if there's a quirk handler
1515 * installed and if it returns 1: 1512 * installed and if it returns 1:
@@ -1539,6 +1536,56 @@ static void __init setup_IO_APIC_irqs(void)
1539} 1536}
1540 1537
1541/* 1538/*
1539 * for the gsit that is not in first ioapic
1540 * but could not use acpi_register_gsi()
1541 * like some special sci in IBM x3330
1542 */
1543void setup_IO_APIC_irq_extra(u32 gsi)
1544{
1545 int apic_id = 0, pin, idx, irq;
1546 int node = cpu_to_node(boot_cpu_id);
1547 struct irq_desc *desc;
1548 struct irq_cfg *cfg;
1549
1550 /*
1551 * Convert 'gsi' to 'ioapic.pin'.
1552 */
1553 apic_id = mp_find_ioapic(gsi);
1554 if (apic_id < 0)
1555 return;
1556
1557 pin = mp_find_ioapic_pin(apic_id, gsi);
1558 idx = find_irq_entry(apic_id, pin, mp_INT);
1559 if (idx == -1)
1560 return;
1561
1562 irq = pin_2_irq(idx, apic_id, pin);
1563#ifdef CONFIG_SPARSE_IRQ
1564 desc = irq_to_desc(irq);
1565 if (desc)
1566 return;
1567#endif
1568 desc = irq_to_desc_alloc_node(irq, node);
1569 if (!desc) {
1570 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1571 return;
1572 }
1573
1574 cfg = desc->chip_data;
1575 add_pin_to_irq_node(cfg, node, apic_id, pin);
1576
1577 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1578 pr_debug("Pin %d-%d already programmed\n",
1579 mp_ioapics[apic_id].apicid, pin);
1580 return;
1581 }
1582 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1583
1584 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1585 irq_trigger(idx), irq_polarity(idx));
1586}
1587
1588/*
1542 * Set up the timer pin, possibly with the 8259A-master behind. 1589 * Set up the timer pin, possibly with the 8259A-master behind.
1543 */ 1590 */
1544static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, 1591static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1601,14 +1648,14 @@ __apicdebuginit(void) print_IO_APIC(void)
1601 1648
1602 for (apic = 0; apic < nr_ioapics; apic++) { 1649 for (apic = 0; apic < nr_ioapics; apic++) {
1603 1650
1604 spin_lock_irqsave(&ioapic_lock, flags); 1651 raw_spin_lock_irqsave(&ioapic_lock, flags);
1605 reg_00.raw = io_apic_read(apic, 0); 1652 reg_00.raw = io_apic_read(apic, 0);
1606 reg_01.raw = io_apic_read(apic, 1); 1653 reg_01.raw = io_apic_read(apic, 1);
1607 if (reg_01.bits.version >= 0x10) 1654 if (reg_01.bits.version >= 0x10)
1608 reg_02.raw = io_apic_read(apic, 2); 1655 reg_02.raw = io_apic_read(apic, 2);
1609 if (reg_01.bits.version >= 0x20) 1656 if (reg_01.bits.version >= 0x20)
1610 reg_03.raw = io_apic_read(apic, 3); 1657 reg_03.raw = io_apic_read(apic, 3);
1611 spin_unlock_irqrestore(&ioapic_lock, flags); 1658 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1612 1659
1613 printk("\n"); 1660 printk("\n");
1614 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); 1661 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1647,7 +1694,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1647 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1694 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1648 1695
1649 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1696 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1650 " Stat Dmod Deli Vect: \n"); 1697 " Stat Dmod Deli Vect:\n");
1651 1698
1652 for (i = 0; i <= reg_01.bits.entries; i++) { 1699 for (i = 0; i <= reg_01.bits.entries; i++) {
1653 struct IO_APIC_route_entry entry; 1700 struct IO_APIC_route_entry entry;
@@ -1825,12 +1872,12 @@ __apicdebuginit(void) print_PIC(void)
1825 unsigned int v; 1872 unsigned int v;
1826 unsigned long flags; 1873 unsigned long flags;
1827 1874
1828 if (!nr_legacy_irqs) 1875 if (!legacy_pic->nr_legacy_irqs)
1829 return; 1876 return;
1830 1877
1831 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1878 printk(KERN_DEBUG "\nprinting PIC contents\n");
1832 1879
1833 spin_lock_irqsave(&i8259A_lock, flags); 1880 raw_spin_lock_irqsave(&i8259A_lock, flags);
1834 1881
1835 v = inb(0xa1) << 8 | inb(0x21); 1882 v = inb(0xa1) << 8 | inb(0x21);
1836 printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 1883 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
@@ -1844,7 +1891,7 @@ __apicdebuginit(void) print_PIC(void)
1844 outb(0x0a,0xa0); 1891 outb(0x0a,0xa0);
1845 outb(0x0a,0x20); 1892 outb(0x0a,0x20);
1846 1893
1847 spin_unlock_irqrestore(&i8259A_lock, flags); 1894 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1848 1895
1849 printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 1896 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1850 1897
@@ -1903,13 +1950,13 @@ void __init enable_IO_APIC(void)
1903 * The number of IO-APIC IRQ registers (== #pins): 1950 * The number of IO-APIC IRQ registers (== #pins):
1904 */ 1951 */
1905 for (apic = 0; apic < nr_ioapics; apic++) { 1952 for (apic = 0; apic < nr_ioapics; apic++) {
1906 spin_lock_irqsave(&ioapic_lock, flags); 1953 raw_spin_lock_irqsave(&ioapic_lock, flags);
1907 reg_01.raw = io_apic_read(apic, 1); 1954 reg_01.raw = io_apic_read(apic, 1);
1908 spin_unlock_irqrestore(&ioapic_lock, flags); 1955 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1909 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1956 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1910 } 1957 }
1911 1958
1912 if (!nr_legacy_irqs) 1959 if (!legacy_pic->nr_legacy_irqs)
1913 return; 1960 return;
1914 1961
1915 for(apic = 0; apic < nr_ioapics; apic++) { 1962 for(apic = 0; apic < nr_ioapics; apic++) {
@@ -1966,7 +2013,7 @@ void disable_IO_APIC(void)
1966 */ 2013 */
1967 clear_IO_APIC(); 2014 clear_IO_APIC();
1968 2015
1969 if (!nr_legacy_irqs) 2016 if (!legacy_pic->nr_legacy_irqs)
1970 return; 2017 return;
1971 2018
1972 /* 2019 /*
@@ -2045,9 +2092,9 @@ void __init setup_ioapic_ids_from_mpc(void)
2045 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 2092 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2046 2093
2047 /* Read the register 0 value */ 2094 /* Read the register 0 value */
2048 spin_lock_irqsave(&ioapic_lock, flags); 2095 raw_spin_lock_irqsave(&ioapic_lock, flags);
2049 reg_00.raw = io_apic_read(apic_id, 0); 2096 reg_00.raw = io_apic_read(apic_id, 0);
2050 spin_unlock_irqrestore(&ioapic_lock, flags); 2097 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2051 2098
2052 old_id = mp_ioapics[apic_id].apicid; 2099 old_id = mp_ioapics[apic_id].apicid;
2053 2100
@@ -2106,16 +2153,16 @@ void __init setup_ioapic_ids_from_mpc(void)
2106 mp_ioapics[apic_id].apicid); 2153 mp_ioapics[apic_id].apicid);
2107 2154
2108 reg_00.bits.ID = mp_ioapics[apic_id].apicid; 2155 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2109 spin_lock_irqsave(&ioapic_lock, flags); 2156 raw_spin_lock_irqsave(&ioapic_lock, flags);
2110 io_apic_write(apic_id, 0, reg_00.raw); 2157 io_apic_write(apic_id, 0, reg_00.raw);
2111 spin_unlock_irqrestore(&ioapic_lock, flags); 2158 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2112 2159
2113 /* 2160 /*
2114 * Sanity check 2161 * Sanity check
2115 */ 2162 */
2116 spin_lock_irqsave(&ioapic_lock, flags); 2163 raw_spin_lock_irqsave(&ioapic_lock, flags);
2117 reg_00.raw = io_apic_read(apic_id, 0); 2164 reg_00.raw = io_apic_read(apic_id, 0);
2118 spin_unlock_irqrestore(&ioapic_lock, flags); 2165 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2119 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) 2166 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2120 printk("could not set ID!\n"); 2167 printk("could not set ID!\n");
2121 else 2168 else
@@ -2198,15 +2245,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2198 unsigned long flags; 2245 unsigned long flags;
2199 struct irq_cfg *cfg; 2246 struct irq_cfg *cfg;
2200 2247
2201 spin_lock_irqsave(&ioapic_lock, flags); 2248 raw_spin_lock_irqsave(&ioapic_lock, flags);
2202 if (irq < nr_legacy_irqs) { 2249 if (irq < legacy_pic->nr_legacy_irqs) {
2203 disable_8259A_irq(irq); 2250 legacy_pic->chip->mask(irq);
2204 if (i8259A_irq_pending(irq)) 2251 if (legacy_pic->irq_pending(irq))
2205 was_pending = 1; 2252 was_pending = 1;
2206 } 2253 }
2207 cfg = irq_cfg(irq); 2254 cfg = irq_cfg(irq);
2208 __unmask_IO_APIC_irq(cfg); 2255 __unmask_IO_APIC_irq(cfg);
2209 spin_unlock_irqrestore(&ioapic_lock, flags); 2256 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2210 2257
2211 return was_pending; 2258 return was_pending;
2212} 2259}
@@ -2217,9 +2264,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
2217 struct irq_cfg *cfg = irq_cfg(irq); 2264 struct irq_cfg *cfg = irq_cfg(irq);
2218 unsigned long flags; 2265 unsigned long flags;
2219 2266
2220 spin_lock_irqsave(&vector_lock, flags); 2267 raw_spin_lock_irqsave(&vector_lock, flags);
2221 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2268 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2222 spin_unlock_irqrestore(&vector_lock, flags); 2269 raw_spin_unlock_irqrestore(&vector_lock, flags);
2223 2270
2224 return 1; 2271 return 1;
2225} 2272}
@@ -2312,14 +2359,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2312 irq = desc->irq; 2359 irq = desc->irq;
2313 cfg = desc->chip_data; 2360 cfg = desc->chip_data;
2314 2361
2315 spin_lock_irqsave(&ioapic_lock, flags); 2362 raw_spin_lock_irqsave(&ioapic_lock, flags);
2316 ret = set_desc_affinity(desc, mask, &dest); 2363 ret = set_desc_affinity(desc, mask, &dest);
2317 if (!ret) { 2364 if (!ret) {
2318 /* Only the high 8 bits are valid. */ 2365 /* Only the high 8 bits are valid. */
2319 dest = SET_APIC_LOGICAL_ID(dest); 2366 dest = SET_APIC_LOGICAL_ID(dest);
2320 __target_IO_APIC_irq(irq, dest, cfg); 2367 __target_IO_APIC_irq(irq, dest, cfg);
2321 } 2368 }
2322 spin_unlock_irqrestore(&ioapic_lock, flags); 2369 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2323 2370
2324 return ret; 2371 return ret;
2325} 2372}
@@ -2554,9 +2601,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
2554 irq = desc->irq; 2601 irq = desc->irq;
2555 cfg = desc->chip_data; 2602 cfg = desc->chip_data;
2556 2603
2557 spin_lock_irqsave(&ioapic_lock, flags); 2604 raw_spin_lock_irqsave(&ioapic_lock, flags);
2558 __eoi_ioapic_irq(irq, cfg); 2605 __eoi_ioapic_irq(irq, cfg);
2559 spin_unlock_irqrestore(&ioapic_lock, flags); 2606 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2560} 2607}
2561 2608
2562static void ack_apic_level(unsigned int irq) 2609static void ack_apic_level(unsigned int irq)
@@ -2734,8 +2781,8 @@ static inline void init_IO_APIC_traps(void)
2734 * so default to an old-fashioned 8259 2781 * so default to an old-fashioned 8259
2735 * interrupt if we can.. 2782 * interrupt if we can..
2736 */ 2783 */
2737 if (irq < nr_legacy_irqs) 2784 if (irq < legacy_pic->nr_legacy_irqs)
2738 make_8259A_irq(irq); 2785 legacy_pic->make_irq(irq);
2739 else 2786 else
2740 /* Strange. Oh, well.. */ 2787 /* Strange. Oh, well.. */
2741 desc->chip = &no_irq_chip; 2788 desc->chip = &no_irq_chip;
@@ -2892,7 +2939,7 @@ static inline void __init check_timer(void)
2892 /* 2939 /*
2893 * get/set the timer IRQ vector: 2940 * get/set the timer IRQ vector:
2894 */ 2941 */
2895 disable_8259A_irq(0); 2942 legacy_pic->chip->mask(0);
2896 assign_irq_vector(0, cfg, apic->target_cpus()); 2943 assign_irq_vector(0, cfg, apic->target_cpus());
2897 2944
2898 /* 2945 /*
@@ -2905,7 +2952,7 @@ static inline void __init check_timer(void)
2905 * automatically. 2952 * automatically.
2906 */ 2953 */
2907 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2954 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2908 init_8259A(1); 2955 legacy_pic->init(1);
2909#ifdef CONFIG_X86_32 2956#ifdef CONFIG_X86_32
2910 { 2957 {
2911 unsigned int ver; 2958 unsigned int ver;
@@ -2964,7 +3011,7 @@ static inline void __init check_timer(void)
2964 if (timer_irq_works()) { 3011 if (timer_irq_works()) {
2965 if (nmi_watchdog == NMI_IO_APIC) { 3012 if (nmi_watchdog == NMI_IO_APIC) {
2966 setup_nmi(); 3013 setup_nmi();
2967 enable_8259A_irq(0); 3014 legacy_pic->chip->unmask(0);
2968 } 3015 }
2969 if (disable_timer_pin_1 > 0) 3016 if (disable_timer_pin_1 > 0)
2970 clear_IO_APIC_pin(0, pin1); 3017 clear_IO_APIC_pin(0, pin1);
@@ -2987,14 +3034,14 @@ static inline void __init check_timer(void)
2987 */ 3034 */
2988 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 3035 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2989 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 3036 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2990 enable_8259A_irq(0); 3037 legacy_pic->chip->unmask(0);
2991 if (timer_irq_works()) { 3038 if (timer_irq_works()) {
2992 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 3039 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2993 timer_through_8259 = 1; 3040 timer_through_8259 = 1;
2994 if (nmi_watchdog == NMI_IO_APIC) { 3041 if (nmi_watchdog == NMI_IO_APIC) {
2995 disable_8259A_irq(0); 3042 legacy_pic->chip->mask(0);
2996 setup_nmi(); 3043 setup_nmi();
2997 enable_8259A_irq(0); 3044 legacy_pic->chip->unmask(0);
2998 } 3045 }
2999 goto out; 3046 goto out;
3000 } 3047 }
@@ -3002,7 +3049,7 @@ static inline void __init check_timer(void)
3002 * Cleanup, just in case ... 3049 * Cleanup, just in case ...
3003 */ 3050 */
3004 local_irq_disable(); 3051 local_irq_disable();
3005 disable_8259A_irq(0); 3052 legacy_pic->chip->mask(0);
3006 clear_IO_APIC_pin(apic2, pin2); 3053 clear_IO_APIC_pin(apic2, pin2);
3007 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 3054 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3008 } 3055 }
@@ -3021,22 +3068,22 @@ static inline void __init check_timer(void)
3021 3068
3022 lapic_register_intr(0, desc); 3069 lapic_register_intr(0, desc);
3023 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3070 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3024 enable_8259A_irq(0); 3071 legacy_pic->chip->unmask(0);
3025 3072
3026 if (timer_irq_works()) { 3073 if (timer_irq_works()) {
3027 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3074 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3028 goto out; 3075 goto out;
3029 } 3076 }
3030 local_irq_disable(); 3077 local_irq_disable();
3031 disable_8259A_irq(0); 3078 legacy_pic->chip->mask(0);
3032 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3079 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3033 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3080 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3034 3081
3035 apic_printk(APIC_QUIET, KERN_INFO 3082 apic_printk(APIC_QUIET, KERN_INFO
3036 "...trying to set up timer as ExtINT IRQ...\n"); 3083 "...trying to set up timer as ExtINT IRQ...\n");
3037 3084
3038 init_8259A(0); 3085 legacy_pic->init(0);
3039 make_8259A_irq(0); 3086 legacy_pic->make_irq(0);
3040 apic_write(APIC_LVT0, APIC_DM_EXTINT); 3087 apic_write(APIC_LVT0, APIC_DM_EXTINT);
3041 3088
3042 unlock_ExtINT_logic(); 3089 unlock_ExtINT_logic();
@@ -3078,7 +3125,7 @@ void __init setup_IO_APIC(void)
3078 /* 3125 /*
3079 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3126 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3080 */ 3127 */
3081 io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 3128 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
3082 3129
3083 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 3130 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3084 /* 3131 /*
@@ -3089,7 +3136,7 @@ void __init setup_IO_APIC(void)
3089 sync_Arb_IDs(); 3136 sync_Arb_IDs();
3090 setup_IO_APIC_irqs(); 3137 setup_IO_APIC_irqs();
3091 init_IO_APIC_traps(); 3138 init_IO_APIC_traps();
3092 if (nr_legacy_irqs) 3139 if (legacy_pic->nr_legacy_irqs)
3093 check_timer(); 3140 check_timer();
3094} 3141}
3095 3142
@@ -3138,13 +3185,13 @@ static int ioapic_resume(struct sys_device *dev)
3138 data = container_of(dev, struct sysfs_ioapic_data, dev); 3185 data = container_of(dev, struct sysfs_ioapic_data, dev);
3139 entry = data->entry; 3186 entry = data->entry;
3140 3187
3141 spin_lock_irqsave(&ioapic_lock, flags); 3188 raw_spin_lock_irqsave(&ioapic_lock, flags);
3142 reg_00.raw = io_apic_read(dev->id, 0); 3189 reg_00.raw = io_apic_read(dev->id, 0);
3143 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 3190 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3144 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 3191 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3145 io_apic_write(dev->id, 0, reg_00.raw); 3192 io_apic_write(dev->id, 0, reg_00.raw);
3146 } 3193 }
3147 spin_unlock_irqrestore(&ioapic_lock, flags); 3194 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3148 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 3195 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3149 ioapic_write_entry(dev->id, i, entry[i]); 3196 ioapic_write_entry(dev->id, i, entry[i]);
3150 3197
@@ -3207,7 +3254,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3207 if (irq_want < nr_irqs_gsi) 3254 if (irq_want < nr_irqs_gsi)
3208 irq_want = nr_irqs_gsi; 3255 irq_want = nr_irqs_gsi;
3209 3256
3210 spin_lock_irqsave(&vector_lock, flags); 3257 raw_spin_lock_irqsave(&vector_lock, flags);
3211 for (new = irq_want; new < nr_irqs; new++) { 3258 for (new = irq_want; new < nr_irqs; new++) {
3212 desc_new = irq_to_desc_alloc_node(new, node); 3259 desc_new = irq_to_desc_alloc_node(new, node);
3213 if (!desc_new) { 3260 if (!desc_new) {
@@ -3226,14 +3273,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3226 irq = new; 3273 irq = new;
3227 break; 3274 break;
3228 } 3275 }
3229 spin_unlock_irqrestore(&vector_lock, flags); 3276 raw_spin_unlock_irqrestore(&vector_lock, flags);
3277
3278 if (irq > 0)
3279 dynamic_irq_init_keep_chip_data(irq);
3230 3280
3231 if (irq > 0) {
3232 dynamic_irq_init(irq);
3233 /* restore it, in case dynamic_irq_init clear it */
3234 if (desc_new)
3235 desc_new->chip_data = cfg_new;
3236 }
3237 return irq; 3281 return irq;
3238} 3282}
3239 3283
@@ -3255,20 +3299,13 @@ int create_irq(void)
3255void destroy_irq(unsigned int irq) 3299void destroy_irq(unsigned int irq)
3256{ 3300{
3257 unsigned long flags; 3301 unsigned long flags;
3258 struct irq_cfg *cfg;
3259 struct irq_desc *desc;
3260 3302
3261 /* store it, in case dynamic_irq_cleanup clear it */ 3303 dynamic_irq_cleanup_keep_chip_data(irq);
3262 desc = irq_to_desc(irq);
3263 cfg = desc->chip_data;
3264 dynamic_irq_cleanup(irq);
3265 /* connect back irq_cfg */
3266 desc->chip_data = cfg;
3267 3304
3268 free_irte(irq); 3305 free_irte(irq);
3269 spin_lock_irqsave(&vector_lock, flags); 3306 raw_spin_lock_irqsave(&vector_lock, flags);
3270 __clear_irq_vector(irq, cfg); 3307 __clear_irq_vector(irq, get_irq_chip_data(irq));
3271 spin_unlock_irqrestore(&vector_lock, flags); 3308 raw_spin_unlock_irqrestore(&vector_lock, flags);
3272} 3309}
3273 3310
3274/* 3311/*
@@ -3805,9 +3842,9 @@ int __init io_apic_get_redir_entries (int ioapic)
3805 union IO_APIC_reg_01 reg_01; 3842 union IO_APIC_reg_01 reg_01;
3806 unsigned long flags; 3843 unsigned long flags;
3807 3844
3808 spin_lock_irqsave(&ioapic_lock, flags); 3845 raw_spin_lock_irqsave(&ioapic_lock, flags);
3809 reg_01.raw = io_apic_read(ioapic, 1); 3846 reg_01.raw = io_apic_read(ioapic, 1);
3810 spin_unlock_irqrestore(&ioapic_lock, flags); 3847 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3811 3848
3812 return reg_01.bits.entries; 3849 return reg_01.bits.entries;
3813} 3850}
@@ -3890,7 +3927,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3890 /* 3927 /*
3891 * IRQs < 16 are already in the irq_2_pin[] map 3928 * IRQs < 16 are already in the irq_2_pin[] map
3892 */ 3929 */
3893 if (irq >= nr_legacy_irqs) { 3930 if (irq >= legacy_pic->nr_legacy_irqs) {
3894 cfg = desc->chip_data; 3931 cfg = desc->chip_data;
3895 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { 3932 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3896 printk(KERN_INFO "can not add pin %d for irq %d\n", 3933 printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -3969,9 +4006,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3969 if (physids_empty(apic_id_map)) 4006 if (physids_empty(apic_id_map))
3970 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); 4007 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
3971 4008
3972 spin_lock_irqsave(&ioapic_lock, flags); 4009 raw_spin_lock_irqsave(&ioapic_lock, flags);
3973 reg_00.raw = io_apic_read(ioapic, 0); 4010 reg_00.raw = io_apic_read(ioapic, 0);
3974 spin_unlock_irqrestore(&ioapic_lock, flags); 4011 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3975 4012
3976 if (apic_id >= get_physical_broadcast()) { 4013 if (apic_id >= get_physical_broadcast()) {
3977 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " 4014 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -4005,10 +4042,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
4005 if (reg_00.bits.ID != apic_id) { 4042 if (reg_00.bits.ID != apic_id) {
4006 reg_00.bits.ID = apic_id; 4043 reg_00.bits.ID = apic_id;
4007 4044
4008 spin_lock_irqsave(&ioapic_lock, flags); 4045 raw_spin_lock_irqsave(&ioapic_lock, flags);
4009 io_apic_write(ioapic, 0, reg_00.raw); 4046 io_apic_write(ioapic, 0, reg_00.raw);
4010 reg_00.raw = io_apic_read(ioapic, 0); 4047 reg_00.raw = io_apic_read(ioapic, 0);
4011 spin_unlock_irqrestore(&ioapic_lock, flags); 4048 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4012 4049
4013 /* Sanity check */ 4050 /* Sanity check */
4014 if (reg_00.bits.ID != apic_id) { 4051 if (reg_00.bits.ID != apic_id) {
@@ -4029,9 +4066,9 @@ int __init io_apic_get_version(int ioapic)
4029 union IO_APIC_reg_01 reg_01; 4066 union IO_APIC_reg_01 reg_01;
4030 unsigned long flags; 4067 unsigned long flags;
4031 4068
4032 spin_lock_irqsave(&ioapic_lock, flags); 4069 raw_spin_lock_irqsave(&ioapic_lock, flags);
4033 reg_01.raw = io_apic_read(ioapic, 1); 4070 reg_01.raw = io_apic_read(ioapic, 1);
4034 spin_unlock_irqrestore(&ioapic_lock, flags); 4071 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4035 4072
4036 return reg_01.bits.version; 4073 return reg_01.bits.version;
4037} 4074}
@@ -4063,27 +4100,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4063#ifdef CONFIG_SMP 4100#ifdef CONFIG_SMP
4064void __init setup_ioapic_dest(void) 4101void __init setup_ioapic_dest(void)
4065{ 4102{
4066 int pin, ioapic = 0, irq, irq_entry; 4103 int pin, ioapic, irq, irq_entry;
4067 struct irq_desc *desc; 4104 struct irq_desc *desc;
4068 const struct cpumask *mask; 4105 const struct cpumask *mask;
4069 4106
4070 if (skip_ioapic_setup == 1) 4107 if (skip_ioapic_setup == 1)
4071 return; 4108 return;
4072 4109
4073#ifdef CONFIG_ACPI 4110 for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
4074 if (!acpi_disabled && acpi_ioapic) {
4075 ioapic = mp_find_ioapic(0);
4076 if (ioapic < 0)
4077 ioapic = 0;
4078 }
4079#endif
4080
4081 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 4111 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
4082 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 4112 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4083 if (irq_entry == -1) 4113 if (irq_entry == -1)
4084 continue; 4114 continue;
4085 irq = pin_2_irq(irq_entry, ioapic, pin); 4115 irq = pin_2_irq(irq_entry, ioapic, pin);
4086 4116
4117 if ((ioapic > 0) && (irq > 16))
4118 continue;
4119
4087 desc = irq_to_desc(irq); 4120 desc = irq_to_desc(irq);
4088 4121
4089 /* 4122 /*
@@ -4268,3 +4301,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4268 4301
4269 nr_ioapics++; 4302 nr_ioapics++;
4270} 4303}
4304
4305/* Enable IOAPIC early just for system timer */
4306void __init pre_init_apic_IRQ0(void)
4307{
4308 struct irq_cfg *cfg;
4309 struct irq_desc *desc;
4310
4311 printk(KERN_INFO "Early APIC setup for system timer0\n");
4312#ifndef CONFIG_SMP
4313 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4314#endif
4315 desc = irq_to_desc_alloc_node(0, 0);
4316
4317 setup_local_APIC();
4318
4319 cfg = irq_cfg(0);
4320 add_pin_to_irq_node(cfg, 0, 0, 0);
4321 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4322
4323 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4324}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 0159a69396cb..8aa65adbd25d 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -177,7 +177,7 @@ int __init check_nmi_watchdog(void)
177error: 177error:
178 if (nmi_watchdog == NMI_IO_APIC) { 178 if (nmi_watchdog == NMI_IO_APIC) {
179 if (!timer_through_8259) 179 if (!timer_through_8259)
180 disable_8259A_irq(0); 180 legacy_pic->chip->mask(0);
181 on_each_cpu(__acpi_nmi_disable, NULL, 1); 181 on_each_cpu(__acpi_nmi_disable, NULL, 1);
182 } 182 }
183 183
@@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
416 416
417 /* We can be called before check_nmi_watchdog, hence NULL check. */ 417 /* We can be called before check_nmi_watchdog, hence NULL check. */
418 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 418 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
419 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 419 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
420 420
421 spin_lock(&lock); 421 raw_spin_lock(&lock);
422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
423 show_regs(regs); 423 show_regs(regs);
424 dump_stack(); 424 dump_stack();
425 spin_unlock(&lock); 425 raw_spin_unlock(&lock);
426 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 426 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
427 427
428 rc = 1; 428 rc = 1;
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
438 * Ayiee, looks like this CPU is stuck ... 438 * Ayiee, looks like this CPU is stuck ...
439 * wait a few IRQs (5 seconds) before doing the oops ... 439 * wait a few IRQs (5 seconds) before doing the oops ...
440 */ 440 */
441 __this_cpu_inc(per_cpu_var(alert_counter)); 441 __this_cpu_inc(alert_counter);
442 if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) 442 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
443 /* 443 /*
444 * die_nmi will return ONLY if NOTIFY_STOP happens.. 444 * die_nmi will return ONLY if NOTIFY_STOP happens..
445 */ 445 */
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
447 regs, panic_on_timeout); 447 regs, panic_on_timeout);
448 } else { 448 } else {
449 __get_cpu_var(last_irq_sum) = sum; 449 __get_cpu_var(last_irq_sum) = sum;
450 __this_cpu_write(per_cpu_var(alert_counter), 0); 450 __this_cpu_write(alert_counter, 0);
451 } 451 }
452 452
453 /* see if the nmi watchdog went off */ 453 /* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 98c4665f251c..3e28401f161c 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -225,7 +225,7 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
225 225
226 mpc_record = 0; 226 mpc_record = 0;
227 printk(KERN_INFO 227 printk(KERN_INFO
228 "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); 228 "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
229 229
230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { 230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
231 printk(KERN_WARNING 231 printk(KERN_WARNING
@@ -277,6 +277,7 @@ static __init void early_check_numaq(void)
277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; 277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; 278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
279 x86_init.timers.tsc_pre_init = numaq_tsc_init; 279 x86_init.timers.tsc_pre_init = numaq_tsc_init;
280 x86_init.pci.init = pci_numaq_init;
280 } 281 }
281} 282}
282 283
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 21db3cbea7dc..3740c8a4eae7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -20,6 +20,8 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/pci.h>
24#include <linux/kdebug.h>
23 25
24#include <asm/uv/uv_mmrs.h> 26#include <asm/uv/uv_mmrs.h>
25#include <asm/uv/uv_hub.h> 27#include <asm/uv/uv_hub.h>
@@ -34,10 +36,13 @@
34 36
35DEFINE_PER_CPU(int, x2apic_extra_bits); 37DEFINE_PER_CPU(int, x2apic_extra_bits);
36 38
39#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
40
37static enum uv_system_type uv_system_type; 41static enum uv_system_type uv_system_type;
38static u64 gru_start_paddr, gru_end_paddr; 42static u64 gru_start_paddr, gru_end_paddr;
39int uv_min_hub_revision_id; 43int uv_min_hub_revision_id;
40EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 44EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
45static DEFINE_SPINLOCK(uv_nmi_lock);
41 46
42static inline bool is_GRU_range(u64 start, u64 end) 47static inline bool is_GRU_range(u64 start, u64 end)
43{ 48{
@@ -71,6 +76,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
71 if (!strcmp(oem_id, "SGI")) { 76 if (!strcmp(oem_id, "SGI")) {
72 nodeid = early_get_nodeid(); 77 nodeid = early_get_nodeid();
73 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 78 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
79 x86_platform.nmi_init = uv_nmi_init;
74 if (!strcmp(oem_table_id, "UVL")) 80 if (!strcmp(oem_table_id, "UVL"))
75 uv_system_type = UV_LEGACY_APIC; 81 uv_system_type = UV_LEGACY_APIC;
76 else if (!strcmp(oem_table_id, "UVX")) 82 else if (!strcmp(oem_table_id, "UVX"))
@@ -482,7 +488,7 @@ static void uv_heartbeat(unsigned long ignored)
482 488
483static void __cpuinit uv_heartbeat_enable(int cpu) 489static void __cpuinit uv_heartbeat_enable(int cpu)
484{ 490{
485 if (!uv_cpu_hub_info(cpu)->scir.enabled) { 491 while (!uv_cpu_hub_info(cpu)->scir.enabled) {
486 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; 492 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
487 493
488 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); 494 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
@@ -490,11 +496,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
490 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; 496 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
491 add_timer_on(timer, cpu); 497 add_timer_on(timer, cpu);
492 uv_cpu_hub_info(cpu)->scir.enabled = 1; 498 uv_cpu_hub_info(cpu)->scir.enabled = 1;
493 }
494 499
495 /* check boot cpu */ 500 /* also ensure that boot cpu is enabled */
496 if (!uv_cpu_hub_info(0)->scir.enabled) 501 cpu = 0;
497 uv_heartbeat_enable(0); 502 }
498} 503}
499 504
500#ifdef CONFIG_HOTPLUG_CPU 505#ifdef CONFIG_HOTPLUG_CPU
@@ -553,6 +558,30 @@ late_initcall(uv_init_heartbeat);
553 558
554#endif /* !CONFIG_HOTPLUG_CPU */ 559#endif /* !CONFIG_HOTPLUG_CPU */
555 560
561/* Direct Legacy VGA I/O traffic to designated IOH */
562int uv_set_vga_state(struct pci_dev *pdev, bool decode,
563 unsigned int command_bits, bool change_bridge)
564{
565 int domain, bus, rc;
566
567 PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n",
568 pdev->devfn, decode, command_bits, change_bridge);
569
570 if (!change_bridge)
571 return 0;
572
573 if ((command_bits & PCI_COMMAND_IO) == 0)
574 return 0;
575
576 domain = pci_domain_nr(pdev->bus);
577 bus = pdev->bus->number;
578
579 rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
580 PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
581
582 return rc;
583}
584
556/* 585/*
557 * Called on each cpu to initialize the per_cpu UV data area. 586 * Called on each cpu to initialize the per_cpu UV data area.
558 * FIXME: hotplug not supported yet 587 * FIXME: hotplug not supported yet
@@ -569,6 +598,46 @@ void __cpuinit uv_cpu_init(void)
569 set_x2apic_extra_bits(uv_hub_info->pnode); 598 set_x2apic_extra_bits(uv_hub_info->pnode);
570} 599}
571 600
601/*
602 * When NMI is received, print a stack trace.
603 */
604int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
605{
606 if (reason != DIE_NMI_IPI)
607 return NOTIFY_OK;
608 /*
609 * Use a lock so only one cpu prints at a time
610 * to prevent intermixed output.
611 */
612 spin_lock(&uv_nmi_lock);
613 pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
614 dump_stack();
615 spin_unlock(&uv_nmi_lock);
616
617 return NOTIFY_STOP;
618}
619
620static struct notifier_block uv_dump_stack_nmi_nb = {
621 .notifier_call = uv_handle_nmi
622};
623
624void uv_register_nmi_notifier(void)
625{
626 if (register_die_notifier(&uv_dump_stack_nmi_nb))
627 printk(KERN_WARNING "UV NMI handler failed to register\n");
628}
629
630void uv_nmi_init(void)
631{
632 unsigned int value;
633
634 /*
635 * Unmask NMI on all cpus
636 */
637 value = apic_read(APIC_LVT1) | APIC_DM_NMI;
638 value &= ~APIC_LVT_MASKED;
639 apic_write(APIC_LVT1, value);
640}
572 641
573void __init uv_system_init(void) 642void __init uv_system_init(void)
574{ 643{
@@ -634,8 +703,8 @@ void __init uv_system_init(void)
634 } 703 }
635 704
636 uv_bios_init(); 705 uv_bios_init();
637 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 706 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
638 &sn_coherency_id, &sn_region_size); 707 &sn_region_size, &system_serial_number);
639 uv_rtc_init(); 708 uv_rtc_init();
640 709
641 for_each_present_cpu(cpu) { 710 for_each_present_cpu(cpu) {
@@ -690,5 +759,9 @@ void __init uv_system_init(void)
690 759
691 uv_cpu_init(); 760 uv_cpu_init();
692 uv_scir_register_cpu_notifier(); 761 uv_scir_register_cpu_notifier();
762 uv_register_nmi_notifier();
693 proc_mkdir("sgi_uv", NULL); 763 proc_mkdir("sgi_uv", NULL);
764
765 /* register Legacy VGA I/O redirection handler */
766 pci_register_set_vga_state(uv_set_vga_state);
694} 767}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b5b6b23bce53..031aa887b0eb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1992,8 +1992,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d)
1992 apm_info.disabled = 1; 1992 apm_info.disabled = 1;
1993 printk(KERN_INFO "%s machine detected. " 1993 printk(KERN_INFO "%s machine detected. "
1994 "Disabling APM.\n", d->ident); 1994 "Disabling APM.\n", d->ident);
1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); 1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n");
1996 printk(KERN_INFO "download from support.intel.com \n"); 1996 printk(KERN_INFO "download from support.intel.com\n");
1997 } 1997 }
1998 return 0; 1998 return 0;
1999} 1999}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index b0206a211b09..8bc57baaa9ad 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -15,8 +15,8 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * 17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 18 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson 19 * Copyright (c) Russ Anderson <rja@sgi.com>
20 */ 20 */
21 21
22#include <linux/efi.h> 22#include <linux/efi.h>
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab;
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
32 struct uv_systab *tab = &uv_systab; 32 struct uv_systab *tab = &uv_systab;
33 s64 ret;
33 34
34 if (!tab->function) 35 if (!tab->function)
35 /* 36 /*
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
37 */ 38 */
38 return BIOS_STATUS_UNIMPLEMENTED; 39 return BIOS_STATUS_UNIMPLEMENTED;
39 40
40 return efi_call6((void *)__va(tab->function), 41 ret = efi_call6((void *)__va(tab->function), (u64)which,
41 (u64)which, a1, a2, a3, a4, a5); 42 a1, a2, a3, a4, a5);
43 return ret;
42} 44}
45EXPORT_SYMBOL_GPL(uv_bios_call);
43 46
44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, 47s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
45 u64 a4, u64 a5) 48 u64 a4, u64 a5)
@@ -73,11 +76,14 @@ long sn_coherency_id;
73EXPORT_SYMBOL_GPL(sn_coherency_id); 76EXPORT_SYMBOL_GPL(sn_coherency_id);
74long sn_region_size; 77long sn_region_size;
75EXPORT_SYMBOL_GPL(sn_region_size); 78EXPORT_SYMBOL_GPL(sn_region_size);
79long system_serial_number;
80EXPORT_SYMBOL_GPL(system_serial_number);
76int uv_type; 81int uv_type;
82EXPORT_SYMBOL_GPL(uv_type);
77 83
78 84
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, 85s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region) 86 long *region, long *ssn)
81{ 87{
82 s64 ret; 88 s64 ret;
83 u64 v0, v1; 89 u64 v0, v1;
@@ -97,8 +103,11 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
97 *coher = part.coherence_id; 103 *coher = part.coherence_id;
98 if (region) 104 if (region)
99 *region = part.region_size; 105 *region = part.region_size;
106 if (ssn)
107 *ssn = v1;
100 return ret; 108 return ret;
101} 109}
110EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
102 111
103int 112int
104uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, 113uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -154,6 +163,25 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
154} 163}
155EXPORT_SYMBOL_GPL(uv_bios_freq_base); 164EXPORT_SYMBOL_GPL(uv_bios_freq_base);
156 165
166/*
167 * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
168 * @decode: true to enable target, false to disable target
169 * @domain: PCI domain number
170 * @bus: PCI bus number
171 *
172 * Returns:
173 * 0: Success
174 * -EINVAL: Invalid domain or bus number
175 * -ENOSYS: Capability not available
176 * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
177 */
178int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
179{
180 return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
181 (u64)decode, (u64)domain, (u64)bus, 0, 0);
182}
183EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
184
157 185
158#ifdef CONFIG_EFI 186#ifdef CONFIG_EFI
159void uv_bios_init(void) 187void uv_bios_init(void)
@@ -185,4 +213,3 @@ void uv_bios_init(void)
185 213
186void uv_bios_init(void) { } 214void uv_bios_init(void) { }
187#endif 215#endif
188
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 468489b57aae..97ad79cdf688 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, 34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
35 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
36 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
37 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },
38 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
35 { 0, 0, 0, 0 } 39 { 0, 0, 0, 0 }
36 }; 40 };
37 41
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b9..870e6cc6ad28 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
10 10
11comment "CPUFreq processor drivers" 11comment "CPUFreq processor drivers"
12 12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
13config X86_ACPI_CPUFREQ 27config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver" 28 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE 29 select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294d..1840c0a5170b 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 000000000000..ff36d2979a90
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,620 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/spinlock.h>
37#include <linux/uaccess.h>
38
39#include <acpi/processor.h>
40
41#define PCC_VERSION "1.00.00"
42#define POLL_LOOPS 300
43
44#define CMD_COMPLETE 0x1
45#define CMD_GET_FREQ 0x0
46#define CMD_SET_FREQ 0x1
47
48#define BUF_SZ 4
49
50#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
51 "pcc-cpufreq", msg)
52
53struct pcc_register_resource {
54 u8 descriptor;
55 u16 length;
56 u8 space_id;
57 u8 bit_width;
58 u8 bit_offset;
59 u8 access_size;
60 u64 address;
61} __attribute__ ((packed));
62
63struct pcc_memory_resource {
64 u8 descriptor;
65 u16 length;
66 u8 space_id;
67 u8 resource_usage;
68 u8 type_specific;
69 u64 granularity;
70 u64 minimum;
71 u64 maximum;
72 u64 translation_offset;
73 u64 address_length;
74} __attribute__ ((packed));
75
76static struct cpufreq_driver pcc_cpufreq_driver;
77
78struct pcc_header {
79 u32 signature;
80 u16 length;
81 u8 major;
82 u8 minor;
83 u32 features;
84 u16 command;
85 u16 status;
86 u32 latency;
87 u32 minimum_time;
88 u32 maximum_time;
89 u32 nominal;
90 u32 throttled_frequency;
91 u32 minimum_frequency;
92};
93
94static void __iomem *pcch_virt_addr;
95static struct pcc_header __iomem *pcch_hdr;
96
97static DEFINE_SPINLOCK(pcc_lock);
98
99static struct acpi_generic_address doorbell;
100
101static u64 doorbell_preserve;
102static u64 doorbell_write;
103
104static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
105 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
106
107struct pcc_cpu {
108 u32 input_offset;
109 u32 output_offset;
110};
111
112static struct pcc_cpu *pcc_cpu_info;
113
114static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
115{
116 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
117 policy->cpuinfo.max_freq);
118 return 0;
119}
120
121static inline void pcc_cmd(void)
122{
123 u64 doorbell_value;
124 int i;
125
126 acpi_read(&doorbell_value, &doorbell);
127 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
128 &doorbell);
129
130 for (i = 0; i < POLL_LOOPS; i++) {
131 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
132 break;
133 }
134}
135
136static inline void pcc_clear_mapping(void)
137{
138 if (pcch_virt_addr)
139 iounmap(pcch_virt_addr);
140 pcch_virt_addr = NULL;
141}
142
143static unsigned int pcc_get_freq(unsigned int cpu)
144{
145 struct pcc_cpu *pcc_cpu_data;
146 unsigned int curr_freq;
147 unsigned int freq_limit;
148 u16 status;
149 u32 input_buffer;
150 u32 output_buffer;
151
152 spin_lock(&pcc_lock);
153
154 dprintk("get: get_freq for CPU %d\n", cpu);
155 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
156
157 input_buffer = 0x1;
158 iowrite32(input_buffer,
159 (pcch_virt_addr + pcc_cpu_data->input_offset));
160 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
161
162 pcc_cmd();
163
164 output_buffer =
165 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
166
167 /* Clear the input buffer - we are done with the current command */
168 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
169
170 status = ioread16(&pcch_hdr->status);
171 if (status != CMD_COMPLETE) {
172 dprintk("get: FAILED: for CPU %d, status is %d\n",
173 cpu, status);
174 goto cmd_incomplete;
175 }
176 iowrite16(0, &pcch_hdr->status);
177 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
178 / 100) * 1000);
179
180 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
181 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
182 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
183 output_buffer, curr_freq);
184
185 freq_limit = (output_buffer >> 8) & 0xff;
186 if (freq_limit != 0xff) {
187 dprintk("get: frequency for cpu %d is being temporarily"
188 " capped at %d\n", cpu, curr_freq);
189 }
190
191 spin_unlock(&pcc_lock);
192 return curr_freq;
193
194cmd_incomplete:
195 iowrite16(0, &pcch_hdr->status);
196 spin_unlock(&pcc_lock);
197 return -EINVAL;
198}
199
200static int pcc_cpufreq_target(struct cpufreq_policy *policy,
201 unsigned int target_freq,
202 unsigned int relation)
203{
204 struct pcc_cpu *pcc_cpu_data;
205 struct cpufreq_freqs freqs;
206 u16 status;
207 u32 input_buffer;
208 int cpu;
209
210 spin_lock(&pcc_lock);
211 cpu = policy->cpu;
212 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
213
214 dprintk("target: CPU %d should go to target freq: %d "
215 "(virtual) input_offset is 0x%x\n",
216 cpu, target_freq,
217 (pcch_virt_addr + pcc_cpu_data->input_offset));
218
219 freqs.new = target_freq;
220 freqs.cpu = cpu;
221 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
222
223 input_buffer = 0x1 | (((target_freq * 100)
224 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
225 iowrite32(input_buffer,
226 (pcch_virt_addr + pcc_cpu_data->input_offset));
227 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
228
229 pcc_cmd();
230
231 /* Clear the input buffer - we are done with the current command */
232 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
233
234 status = ioread16(&pcch_hdr->status);
235 if (status != CMD_COMPLETE) {
236 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
237 cpu, status);
238 goto cmd_incomplete;
239 }
240 iowrite16(0, &pcch_hdr->status);
241
242 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
243 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
244 spin_unlock(&pcc_lock);
245
246 return 0;
247
248cmd_incomplete:
249 iowrite16(0, &pcch_hdr->status);
250 spin_unlock(&pcc_lock);
251 return -EINVAL;
252}
253
254static int pcc_get_offset(int cpu)
255{
256 acpi_status status;
257 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
258 union acpi_object *pccp, *offset;
259 struct pcc_cpu *pcc_cpu_data;
260 struct acpi_processor *pr;
261 int ret = 0;
262
263 pr = per_cpu(processors, cpu);
264 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
265
266 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
267 if (ACPI_FAILURE(status))
268 return -ENODEV;
269
270 pccp = buffer.pointer;
271 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
272 ret = -ENODEV;
273 goto out_free;
274 };
275
276 offset = &(pccp->package.elements[0]);
277 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
278 ret = -ENODEV;
279 goto out_free;
280 }
281
282 pcc_cpu_data->input_offset = offset->integer.value;
283
284 offset = &(pccp->package.elements[1]);
285 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
286 ret = -ENODEV;
287 goto out_free;
288 }
289
290 pcc_cpu_data->output_offset = offset->integer.value;
291
292 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
293 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
294
295 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
296 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
297 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
298out_free:
299 kfree(buffer.pointer);
300 return ret;
301}
302
303static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
304{
305 acpi_status status;
306 struct acpi_object_list input;
307 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
308 union acpi_object in_params[4];
309 union acpi_object *out_obj;
310 u32 capabilities[2];
311 u32 errors;
312 u32 supported;
313 int ret = 0;
314
315 input.count = 4;
316 input.pointer = in_params;
317 input.count = 4;
318 input.pointer = in_params;
319 in_params[0].type = ACPI_TYPE_BUFFER;
320 in_params[0].buffer.length = 16;
321 in_params[0].buffer.pointer = OSC_UUID;
322 in_params[1].type = ACPI_TYPE_INTEGER;
323 in_params[1].integer.value = 1;
324 in_params[2].type = ACPI_TYPE_INTEGER;
325 in_params[2].integer.value = 2;
326 in_params[3].type = ACPI_TYPE_BUFFER;
327 in_params[3].buffer.length = 8;
328 in_params[3].buffer.pointer = (u8 *)&capabilities;
329
330 capabilities[0] = OSC_QUERY_ENABLE;
331 capabilities[1] = 0x1;
332
333 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
334 if (ACPI_FAILURE(status))
335 return -ENODEV;
336
337 if (!output.length)
338 return -ENODEV;
339
340 out_obj = output.pointer;
341 if (out_obj->type != ACPI_TYPE_BUFFER) {
342 ret = -ENODEV;
343 goto out_free;
344 }
345
346 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
347 if (errors) {
348 ret = -ENODEV;
349 goto out_free;
350 }
351
352 supported = *((u32 *)(out_obj->buffer.pointer + 4));
353 if (!(supported & 0x1)) {
354 ret = -ENODEV;
355 goto out_free;
356 }
357
358 kfree(output.pointer);
359 capabilities[0] = 0x0;
360 capabilities[1] = 0x1;
361
362 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
363 if (ACPI_FAILURE(status))
364 return -ENODEV;
365
366 if (!output.length)
367 return -ENODEV;
368
369 out_obj = output.pointer;
370 if (out_obj->type != ACPI_TYPE_BUFFER) {
371 ret = -ENODEV;
372 goto out_free;
373 }
374
375 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
376 if (errors) {
377 ret = -ENODEV;
378 goto out_free;
379 }
380
381 supported = *((u32 *)(out_obj->buffer.pointer + 4));
382 if (!(supported & 0x1)) {
383 ret = -ENODEV;
384 goto out_free;
385 }
386
387out_free:
388 kfree(output.pointer);
389 return ret;
390}
391
392static int __init pcc_cpufreq_probe(void)
393{
394 acpi_status status;
395 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
396 struct pcc_memory_resource *mem_resource;
397 struct pcc_register_resource *reg_resource;
398 union acpi_object *out_obj, *member;
399 acpi_handle handle, osc_handle;
400 int ret = 0;
401
402 status = acpi_get_handle(NULL, "\\_SB", &handle);
403 if (ACPI_FAILURE(status))
404 return -ENODEV;
405
406 status = acpi_get_handle(handle, "_OSC", &osc_handle);
407 if (ACPI_SUCCESS(status)) {
408 ret = pcc_cpufreq_do_osc(&osc_handle);
409 if (ret)
410 dprintk("probe: _OSC evaluation did not succeed\n");
411 /* Firmware's use of _OSC is optional */
412 ret = 0;
413 }
414
415 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
416 if (ACPI_FAILURE(status))
417 return -ENODEV;
418
419 out_obj = output.pointer;
420 if (out_obj->type != ACPI_TYPE_PACKAGE) {
421 ret = -ENODEV;
422 goto out_free;
423 }
424
425 member = &out_obj->package.elements[0];
426 if (member->type != ACPI_TYPE_BUFFER) {
427 ret = -ENODEV;
428 goto out_free;
429 }
430
431 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
432
433 dprintk("probe: mem_resource descriptor: 0x%x,"
434 " length: %d, space_id: %d, resource_usage: %d,"
435 " type_specific: %d, granularity: 0x%llx,"
436 " minimum: 0x%llx, maximum: 0x%llx,"
437 " translation_offset: 0x%llx, address_length: 0x%llx\n",
438 mem_resource->descriptor, mem_resource->length,
439 mem_resource->space_id, mem_resource->resource_usage,
440 mem_resource->type_specific, mem_resource->granularity,
441 mem_resource->minimum, mem_resource->maximum,
442 mem_resource->translation_offset,
443 mem_resource->address_length);
444
445 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
446 ret = -ENODEV;
447 goto out_free;
448 }
449
450 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
451 mem_resource->address_length);
452 if (pcch_virt_addr == NULL) {
453 dprintk("probe: could not map shared mem region\n");
454 goto out_free;
455 }
456 pcch_hdr = pcch_virt_addr;
457
458 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
459 dprintk("probe: PCCH header is at physical address: 0x%llx,"
460 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
461 " supported features: 0x%x, command field: 0x%x,"
462 " status field: 0x%x, nominal latency: %d us\n",
463 mem_resource->minimum, ioread32(&pcch_hdr->signature),
464 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
465 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
466 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
467 ioread32(&pcch_hdr->latency));
468
469 dprintk("probe: min time between commands: %d us,"
470 " max time between commands: %d us,"
471 " nominal CPU frequency: %d MHz,"
472 " minimum CPU frequency: %d MHz,"
473 " minimum CPU frequency without throttling: %d MHz\n",
474 ioread32(&pcch_hdr->minimum_time),
475 ioread32(&pcch_hdr->maximum_time),
476 ioread32(&pcch_hdr->nominal),
477 ioread32(&pcch_hdr->throttled_frequency),
478 ioread32(&pcch_hdr->minimum_frequency));
479
480 member = &out_obj->package.elements[1];
481 if (member->type != ACPI_TYPE_BUFFER) {
482 ret = -ENODEV;
483 goto pcch_free;
484 }
485
486 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
487
488 doorbell.space_id = reg_resource->space_id;
489 doorbell.bit_width = reg_resource->bit_width;
490 doorbell.bit_offset = reg_resource->bit_offset;
491 doorbell.access_width = 64;
492 doorbell.address = reg_resource->address;
493
494 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
495 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
496 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
497 doorbell.access_width, reg_resource->address);
498
499 member = &out_obj->package.elements[2];
500 if (member->type != ACPI_TYPE_INTEGER) {
501 ret = -ENODEV;
502 goto pcch_free;
503 }
504
505 doorbell_preserve = member->integer.value;
506
507 member = &out_obj->package.elements[3];
508 if (member->type != ACPI_TYPE_INTEGER) {
509 ret = -ENODEV;
510 goto pcch_free;
511 }
512
513 doorbell_write = member->integer.value;
514
515 dprintk("probe: doorbell_preserve: 0x%llx,"
516 " doorbell_write: 0x%llx\n",
517 doorbell_preserve, doorbell_write);
518
519 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
520 if (!pcc_cpu_info) {
521 ret = -ENOMEM;
522 goto pcch_free;
523 }
524
525 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
526 " limits: %d MHz, %d MHz\n", PCC_VERSION,
527 ioread32(&pcch_hdr->minimum_frequency),
528 ioread32(&pcch_hdr->nominal));
529 kfree(output.pointer);
530 return ret;
531pcch_free:
532 pcc_clear_mapping();
533out_free:
534 kfree(output.pointer);
535 return ret;
536}
537
538static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
539{
540 unsigned int cpu = policy->cpu;
541 unsigned int result = 0;
542
543 if (!pcch_virt_addr) {
544 result = -1;
545 goto pcch_null;
546 }
547
548 result = pcc_get_offset(cpu);
549 if (result) {
550 dprintk("init: PCCP evaluation failed\n");
551 goto free;
552 }
553
554 policy->max = policy->cpuinfo.max_freq =
555 ioread32(&pcch_hdr->nominal) * 1000;
556 policy->min = policy->cpuinfo.min_freq =
557 ioread32(&pcch_hdr->minimum_frequency) * 1000;
558 policy->cur = pcc_get_freq(cpu);
559
560 dprintk("init: policy->max is %d, policy->min is %d\n",
561 policy->max, policy->min);
562
563 return 0;
564free:
565 pcc_clear_mapping();
566 free_percpu(pcc_cpu_info);
567pcch_null:
568 return result;
569}
570
571static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
572{
573 return 0;
574}
575
576static struct cpufreq_driver pcc_cpufreq_driver = {
577 .flags = CPUFREQ_CONST_LOOPS,
578 .get = pcc_get_freq,
579 .verify = pcc_cpufreq_verify,
580 .target = pcc_cpufreq_target,
581 .init = pcc_cpufreq_cpu_init,
582 .exit = pcc_cpufreq_cpu_exit,
583 .name = "pcc-cpufreq",
584 .owner = THIS_MODULE,
585};
586
587static int __init pcc_cpufreq_init(void)
588{
589 int ret;
590
591 if (acpi_disabled)
592 return 0;
593
594 ret = pcc_cpufreq_probe();
595 if (ret) {
596 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
597 return ret;
598 }
599
600 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
601
602 return ret;
603}
604
605static void __exit pcc_cpufreq_exit(void)
606{
607 cpufreq_unregister_driver(&pcc_cpufreq_driver);
608
609 pcc_clear_mapping();
610
611 free_percpu(pcc_cpu_info);
612}
613
614MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
615MODULE_VERSION(PCC_VERSION);
616MODULE_DESCRIPTION("Processor Clocking Control interface driver");
617MODULE_LICENSE("GPL");
618
619late_initcall(pcc_cpufreq_init);
620module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6e44519960c8..d360b56e9825 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, 806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
807 unsigned int index) 807 unsigned int index)
808{ 808{
809 acpi_integer control; 809 u64 control;
810 810
811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
812 return; 812 return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
824{ 824{
825 struct cpufreq_frequency_table *powernow_table; 825 struct cpufreq_frequency_table *powernow_table;
826 int ret_val = -ENODEV; 826 int ret_val = -ENODEV;
827 acpi_integer control, status; 827 u64 control, status;
828 828
829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
830 dprintk("register performance failed: bad ACPI data\n"); 830 dprintk("register performance failed: bad ACPI data\n");
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
948 u32 fid; 948 u32 fid;
949 u32 vid; 949 u32 vid;
950 u32 freq, index; 950 u32 freq, index;
951 acpi_integer status, control; 951 u64 status, control;
952 952
953 if (data->exttype) { 953 if (data->exttype) {
954 status = data->acpi_data.states[i].status; 954 status = data->acpi_data.states[i].status;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc6c8ef92dcc..b3eeb66c0a51 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21#include <asm/smp.h>
21 22
22#define LVL_1_INST 1 23#define LVL_1_INST 1
23#define LVL_1_DATA 2 24#define LVL_1_DATA 2
@@ -31,6 +32,8 @@ struct _cache_table {
31 short size; 32 short size;
32}; 33};
33 34
35#define MB(x) ((x) * 1024)
36
34/* All the cache descriptor types we care about (no TLB or 37/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */ 38 trace cache entries) */
36 39
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
44 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
45 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
46 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
48 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
49 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
51 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
52 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] =
59 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
60 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
61 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
62 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
63 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
64 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
65 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
66 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
67 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
68 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
69 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
70 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
71 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ 74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
72 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
73 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
74 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -77,34 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] =
77 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
78 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
79 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
80 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
81 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
82 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
83 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
84 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
86 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
87 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
88 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
89 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
90 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
91 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
92 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
93 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
94 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ 97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ 98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
96 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ 99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
97 { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ 100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
98 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
99 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ 102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
100 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
101 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ 104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
102 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ 105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
103 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
104 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
105 { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ 108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
106 { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ 109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
107 { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ 110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
108 { 0x00, 0, 0} 111 { 0x00, 0, 0}
109}; 112};
110 113
@@ -150,7 +153,8 @@ struct _cpuid4_info {
150 union _cpuid4_leaf_ebx ebx; 153 union _cpuid4_leaf_ebx ebx;
151 union _cpuid4_leaf_ecx ecx; 154 union _cpuid4_leaf_ecx ecx;
152 unsigned long size; 155 unsigned long size;
153 unsigned long can_disable; 156 bool can_disable;
157 unsigned int l3_indices;
154 DECLARE_BITMAP(shared_cpu_map, NR_CPUS); 158 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
155}; 159};
156 160
@@ -160,7 +164,8 @@ struct _cpuid4_info_regs {
160 union _cpuid4_leaf_ebx ebx; 164 union _cpuid4_leaf_ebx ebx;
161 union _cpuid4_leaf_ecx ecx; 165 union _cpuid4_leaf_ecx ecx;
162 unsigned long size; 166 unsigned long size;
163 unsigned long can_disable; 167 bool can_disable;
168 unsigned int l3_indices;
164}; 169};
165 170
166unsigned short num_cache_leaves; 171unsigned short num_cache_leaves;
@@ -290,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
290 (ebx->split.ways_of_associativity + 1) - 1; 295 (ebx->split.ways_of_associativity + 1) - 1;
291} 296}
292 297
298struct _cache_attr {
299 struct attribute attr;
300 ssize_t (*show)(struct _cpuid4_info *, char *);
301 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
302};
303
304#ifdef CONFIG_CPU_SUP_AMD
305static unsigned int __cpuinit amd_calc_l3_indices(void)
306{
307 /*
308 * We're called over smp_call_function_single() and therefore
309 * are on the correct cpu.
310 */
311 int cpu = smp_processor_id();
312 int node = cpu_to_node(cpu);
313 struct pci_dev *dev = node_to_k8_nb_misc(node);
314 unsigned int sc0, sc1, sc2, sc3;
315 u32 val = 0;
316
317 pci_read_config_dword(dev, 0x1C4, &val);
318
319 /* calculate subcache sizes */
320 sc0 = !(val & BIT(0));
321 sc1 = !(val & BIT(4));
322 sc2 = !(val & BIT(8)) + !(val & BIT(9));
323 sc3 = !(val & BIT(12)) + !(val & BIT(13));
324
325 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
326}
327
293static void __cpuinit 328static void __cpuinit
294amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 329amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
295{ 330{
@@ -299,12 +334,103 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
299 if (boot_cpu_data.x86 == 0x11) 334 if (boot_cpu_data.x86 == 0x11)
300 return; 335 return;
301 336
302 /* see erratum #382 */ 337 /* see errata #382 and #388 */
303 if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) 338 if ((boot_cpu_data.x86 == 0x10) &&
339 ((boot_cpu_data.x86_model < 0x8) ||
340 (boot_cpu_data.x86_mask < 0x1)))
304 return; 341 return;
305 342
306 this_leaf->can_disable = 1; 343 this_leaf->can_disable = true;
344 this_leaf->l3_indices = amd_calc_l3_indices();
345}
346
347static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
348 unsigned int index)
349{
350 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
351 int node = amd_get_nb_id(cpu);
352 struct pci_dev *dev = node_to_k8_nb_misc(node);
353 unsigned int reg = 0;
354
355 if (!this_leaf->can_disable)
356 return -EINVAL;
357
358 if (!dev)
359 return -EINVAL;
360
361 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
362 return sprintf(buf, "0x%08x\n", reg);
363}
364
365#define SHOW_CACHE_DISABLE(index) \
366static ssize_t \
367show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
368{ \
369 return show_cache_disable(this_leaf, buf, index); \
307} 370}
371SHOW_CACHE_DISABLE(0)
372SHOW_CACHE_DISABLE(1)
373
374static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
375 const char *buf, size_t count, unsigned int index)
376{
377 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
378 int node = amd_get_nb_id(cpu);
379 struct pci_dev *dev = node_to_k8_nb_misc(node);
380 unsigned long val = 0;
381
382#define SUBCACHE_MASK (3UL << 20)
383#define SUBCACHE_INDEX 0xfff
384
385 if (!this_leaf->can_disable)
386 return -EINVAL;
387
388 if (!capable(CAP_SYS_ADMIN))
389 return -EPERM;
390
391 if (!dev)
392 return -EINVAL;
393
394 if (strict_strtoul(buf, 10, &val) < 0)
395 return -EINVAL;
396
397 /* do not allow writes outside of allowed bits */
398 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
399 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
400 return -EINVAL;
401
402 val |= BIT(30);
403 pci_write_config_dword(dev, 0x1BC + index * 4, val);
404 /*
405 * We need to WBINVD on a core on the node containing the L3 cache which
406 * indices we disable therefore a simple wbinvd() is not sufficient.
407 */
408 wbinvd_on_cpu(cpu);
409 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
410 return count;
411}
412
413#define STORE_CACHE_DISABLE(index) \
414static ssize_t \
415store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
416 const char *buf, size_t count) \
417{ \
418 return store_cache_disable(this_leaf, buf, count, index); \
419}
420STORE_CACHE_DISABLE(0)
421STORE_CACHE_DISABLE(1)
422
423static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
424 show_cache_disable_0, store_cache_disable_0);
425static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
426 show_cache_disable_1, store_cache_disable_1);
427
428#else /* CONFIG_CPU_SUP_AMD */
429static void __cpuinit
430amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
431{
432};
433#endif /* CONFIG_CPU_SUP_AMD */
308 434
309static int 435static int
310__cpuinit cpuid4_cache_lookup_regs(int index, 436__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -711,82 +837,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
711#define to_object(k) container_of(k, struct _index_kobject, kobj) 837#define to_object(k) container_of(k, struct _index_kobject, kobj)
712#define to_attr(a) container_of(a, struct _cache_attr, attr) 838#define to_attr(a) container_of(a, struct _cache_attr, attr)
713 839
714static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
715 unsigned int index)
716{
717 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
718 int node = cpu_to_node(cpu);
719 struct pci_dev *dev = node_to_k8_nb_misc(node);
720 unsigned int reg = 0;
721
722 if (!this_leaf->can_disable)
723 return -EINVAL;
724
725 if (!dev)
726 return -EINVAL;
727
728 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
729 return sprintf(buf, "%x\n", reg);
730}
731
732#define SHOW_CACHE_DISABLE(index) \
733static ssize_t \
734show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
735{ \
736 return show_cache_disable(this_leaf, buf, index); \
737}
738SHOW_CACHE_DISABLE(0)
739SHOW_CACHE_DISABLE(1)
740
741static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
742 const char *buf, size_t count, unsigned int index)
743{
744 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
745 int node = cpu_to_node(cpu);
746 struct pci_dev *dev = node_to_k8_nb_misc(node);
747 unsigned long val = 0;
748 unsigned int scrubber = 0;
749
750 if (!this_leaf->can_disable)
751 return -EINVAL;
752
753 if (!capable(CAP_SYS_ADMIN))
754 return -EPERM;
755
756 if (!dev)
757 return -EINVAL;
758
759 if (strict_strtoul(buf, 10, &val) < 0)
760 return -EINVAL;
761
762 val |= 0xc0000000;
763
764 pci_read_config_dword(dev, 0x58, &scrubber);
765 scrubber &= ~0x1f000000;
766 pci_write_config_dword(dev, 0x58, scrubber);
767
768 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
769 wbinvd();
770 pci_write_config_dword(dev, 0x1BC + index * 4, val);
771 return count;
772}
773
774#define STORE_CACHE_DISABLE(index) \
775static ssize_t \
776store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
777 const char *buf, size_t count) \
778{ \
779 return store_cache_disable(this_leaf, buf, count, index); \
780}
781STORE_CACHE_DISABLE(0)
782STORE_CACHE_DISABLE(1)
783
784struct _cache_attr {
785 struct attribute attr;
786 ssize_t (*show)(struct _cpuid4_info *, char *);
787 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
788};
789
790#define define_one_ro(_name) \ 840#define define_one_ro(_name) \
791static struct _cache_attr _name = \ 841static struct _cache_attr _name = \
792 __ATTR(_name, 0444, show_##_name, NULL) 842 __ATTR(_name, 0444, show_##_name, NULL)
@@ -801,23 +851,28 @@ define_one_ro(size);
801define_one_ro(shared_cpu_map); 851define_one_ro(shared_cpu_map);
802define_one_ro(shared_cpu_list); 852define_one_ro(shared_cpu_list);
803 853
804static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, 854#define DEFAULT_SYSFS_CACHE_ATTRS \
805 show_cache_disable_0, store_cache_disable_0); 855 &type.attr, \
806static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 856 &level.attr, \
807 show_cache_disable_1, store_cache_disable_1); 857 &coherency_line_size.attr, \
858 &physical_line_partition.attr, \
859 &ways_of_associativity.attr, \
860 &number_of_sets.attr, \
861 &size.attr, \
862 &shared_cpu_map.attr, \
863 &shared_cpu_list.attr
808 864
809static struct attribute *default_attrs[] = { 865static struct attribute *default_attrs[] = {
810 &type.attr, 866 DEFAULT_SYSFS_CACHE_ATTRS,
811 &level.attr, 867 NULL
812 &coherency_line_size.attr, 868};
813 &physical_line_partition.attr, 869
814 &ways_of_associativity.attr, 870static struct attribute *default_l3_attrs[] = {
815 &number_of_sets.attr, 871 DEFAULT_SYSFS_CACHE_ATTRS,
816 &size.attr, 872#ifdef CONFIG_CPU_SUP_AMD
817 &shared_cpu_map.attr,
818 &shared_cpu_list.attr,
819 &cache_disable_0.attr, 873 &cache_disable_0.attr,
820 &cache_disable_1.attr, 874 &cache_disable_1.attr,
875#endif
821 NULL 876 NULL
822}; 877};
823 878
@@ -848,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
848 return ret; 903 return ret;
849} 904}
850 905
851static struct sysfs_ops sysfs_ops = { 906static const struct sysfs_ops sysfs_ops = {
852 .show = show, 907 .show = show,
853 .store = store, 908 .store = store,
854}; 909};
@@ -908,6 +963,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
908 unsigned int cpu = sys_dev->id; 963 unsigned int cpu = sys_dev->id;
909 unsigned long i, j; 964 unsigned long i, j;
910 struct _index_kobject *this_object; 965 struct _index_kobject *this_object;
966 struct _cpuid4_info *this_leaf;
911 int retval; 967 int retval;
912 968
913 retval = cpuid4_cache_sysfs_init(cpu); 969 retval = cpuid4_cache_sysfs_init(cpu);
@@ -926,6 +982,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
926 this_object = INDEX_KOBJECT_PTR(cpu, i); 982 this_object = INDEX_KOBJECT_PTR(cpu, i);
927 this_object->cpu = cpu; 983 this_object->cpu = cpu;
928 this_object->index = i; 984 this_object->index = i;
985
986 this_leaf = CPUID4_INFO_IDX(cpu, i);
987
988 if (this_leaf->can_disable)
989 ktype_cache.default_attrs = default_l3_attrs;
990 else
991 ktype_cache.default_attrs = default_attrs;
992
929 retval = kobject_init_and_add(&(this_object->kobj), 993 retval = kobject_init_and_add(&(this_object->kobj),
930 &ktype_cache, 994 &ktype_cache,
931 per_cpu(ici_cache_kobject, cpu), 995 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a8aacd4b513c..28cba46bf32c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2044,6 +2044,7 @@ static __init void mce_init_banks(void)
2044 struct mce_bank *b = &mce_banks[i]; 2044 struct mce_bank *b = &mce_banks[i];
2045 struct sysdev_attribute *a = &b->attr; 2045 struct sysdev_attribute *a = &b->attr;
2046 2046
2047 sysfs_attr_init(&a->attr);
2047 a->attr.name = b->attrname; 2048 a->attr.name = b->attrname;
2048 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2049 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2049 2050
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..cda932ca3ade 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -388,7 +388,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
388 return ret; 388 return ret;
389} 389}
390 390
391static struct sysfs_ops threshold_ops = { 391static const struct sysfs_ops threshold_ops = {
392 .show = show, 392 .show = show,
393 .store = store, 393 .store = store,
394}; 394};
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b56f8e9..ad9e5ed81181 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o cleanup.o 1obj-y := main.o if.o generic.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 33af14110dfd..92ba9cd31c9a 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
108 return 0; 108 return 0;
109} 109}
110 110
111static struct mtrr_ops amd_mtrr_ops = { 111static const struct mtrr_ops amd_mtrr_ops = {
112 .vendor = X86_VENDOR_AMD, 112 .vendor = X86_VENDOR_AMD,
113 .set = amd_set_mtrr, 113 .set = amd_set_mtrr,
114 .get = amd_get_mtrr, 114 .get = amd_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index de89f14eff3a..316fe3e60a97 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
110 return 0; 110 return 0;
111} 111}
112 112
113static struct mtrr_ops centaur_mtrr_ops = { 113static const struct mtrr_ops centaur_mtrr_ops = {
114 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
115 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
116 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..06130b52f012 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/sort.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
28#include <linux/kvm_para.h> 27#include <linux/kvm_para.h>
28#include <linux/range.h>
29 29
30#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/e820.h> 31#include <asm/e820.h>
@@ -34,11 +34,6 @@
34 34
35#include "mtrr.h" 35#include "mtrr.h"
36 36
37struct res_range {
38 unsigned long start;
39 unsigned long end;
40};
41
42struct var_mtrr_range_state { 37struct var_mtrr_range_state {
43 unsigned long base_pfn; 38 unsigned long base_pfn;
44 unsigned long size_pfn; 39 unsigned long size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
56/* Should be related to MTRR_VAR_RANGES nums */ 51/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256 52#define RANGE_NUM 256
58 53
59static struct res_range __initdata range[RANGE_NUM]; 54static struct range __initdata range[RANGE_NUM];
60static int __initdata nr_range; 55static int __initdata nr_range;
61 56
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 57static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
64static int __initdata debug_print; 59static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 60#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66 61
67
68static int __init
69add_range(struct res_range *range, int nr_range,
70 unsigned long start, unsigned long end)
71{
72 /* Out of slots: */
73 if (nr_range >= RANGE_NUM)
74 return nr_range;
75
76 range[nr_range].start = start;
77 range[nr_range].end = end;
78
79 nr_range++;
80
81 return nr_range;
82}
83
84static int __init
85add_range_with_merge(struct res_range *range, int nr_range,
86 unsigned long start, unsigned long end)
87{
88 int i;
89
90 /* Try to merge it with old one: */
91 for (i = 0; i < nr_range; i++) {
92 unsigned long final_start, final_end;
93 unsigned long common_start, common_end;
94
95 if (!range[i].end)
96 continue;
97
98 common_start = max(range[i].start, start);
99 common_end = min(range[i].end, end);
100 if (common_start > common_end + 1)
101 continue;
102
103 final_start = min(range[i].start, start);
104 final_end = max(range[i].end, end);
105
106 range[i].start = final_start;
107 range[i].end = final_end;
108 return nr_range;
109 }
110
111 /* Need to add it: */
112 return add_range(range, nr_range, start, end);
113}
114
115static void __init
116subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117{
118 int i, j;
119
120 for (j = 0; j < RANGE_NUM; j++) {
121 if (!range[j].end)
122 continue;
123
124 if (start <= range[j].start && end >= range[j].end) {
125 range[j].start = 0;
126 range[j].end = 0;
127 continue;
128 }
129
130 if (start <= range[j].start && end < range[j].end &&
131 range[j].start < end + 1) {
132 range[j].start = end + 1;
133 continue;
134 }
135
136
137 if (start > range[j].start && end >= range[j].end &&
138 range[j].end > start - 1) {
139 range[j].end = start - 1;
140 continue;
141 }
142
143 if (start > range[j].start && end < range[j].end) {
144 /* Find the new spare: */
145 for (i = 0; i < RANGE_NUM; i++) {
146 if (range[i].end == 0)
147 break;
148 }
149 if (i < RANGE_NUM) {
150 range[i].end = range[j].end;
151 range[i].start = end + 1;
152 } else {
153 printk(KERN_ERR "run of slot in ranges\n");
154 }
155 range[j].end = start - 1;
156 continue;
157 }
158 }
159}
160
161static int __init cmp_range(const void *x1, const void *x2)
162{
163 const struct res_range *r1 = x1;
164 const struct res_range *r2 = x2;
165 long start1, start2;
166
167 start1 = r1->start;
168 start2 = r2->start;
169
170 return start1 - start2;
171}
172
173static int __init clean_sort_range(struct res_range *range, int az)
174{
175 int i, j, k = az - 1, nr_range = 0;
176
177 for (i = 0; i < k; i++) {
178 if (range[i].end)
179 continue;
180 for (j = k; j > i; j--) {
181 if (range[j].end) {
182 k = j;
183 break;
184 }
185 }
186 if (j == i)
187 break;
188 range[i].start = range[k].start;
189 range[i].end = range[k].end;
190 range[k].start = 0;
191 range[k].end = 0;
192 k--;
193 }
194 /* count it */
195 for (i = 0; i < az; i++) {
196 if (!range[i].end) {
197 nr_range = i;
198 break;
199 }
200 }
201
202 /* sort them */
203 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
204
205 return nr_range;
206}
207
208#define BIOS_BUG_MSG KERN_WARNING \ 62#define BIOS_BUG_MSG KERN_WARNING \
209 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 63 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
210 64
211static int __init 65static int __init
212x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 66x86_get_mtrr_mem_range(struct range *range, int nr_range,
213 unsigned long extra_remove_base, 67 unsigned long extra_remove_base,
214 unsigned long extra_remove_size) 68 unsigned long extra_remove_size)
215{ 69{
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
223 continue; 77 continue;
224 base = range_state[i].base_pfn; 78 base = range_state[i].base_pfn;
225 size = range_state[i].size_pfn; 79 size = range_state[i].size_pfn;
226 nr_range = add_range_with_merge(range, nr_range, base, 80 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
227 base + size - 1); 81 base, base + size);
228 } 82 }
229 if (debug_print) { 83 if (debug_print) {
230 printk(KERN_DEBUG "After WB checking\n"); 84 printk(KERN_DEBUG "After WB checking\n");
231 for (i = 0; i < nr_range; i++) 85 for (i = 0; i < nr_range; i++)
232 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 86 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
233 range[i].start, range[i].end + 1); 87 range[i].start, range[i].end);
234 } 88 }
235 89
236 /* Take out UC ranges: */ 90 /* Take out UC ranges: */
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
252 size -= (1<<(20-PAGE_SHIFT)) - base; 106 size -= (1<<(20-PAGE_SHIFT)) - base;
253 base = 1<<(20-PAGE_SHIFT); 107 base = 1<<(20-PAGE_SHIFT);
254 } 108 }
255 subtract_range(range, base, base + size - 1); 109 subtract_range(range, RANGE_NUM, base, base + size);
256 } 110 }
257 if (extra_remove_size) 111 if (extra_remove_size)
258 subtract_range(range, extra_remove_base, 112 subtract_range(range, RANGE_NUM, extra_remove_base,
259 extra_remove_base + extra_remove_size - 1); 113 extra_remove_base + extra_remove_size);
260 114
261 if (debug_print) { 115 if (debug_print) {
262 printk(KERN_DEBUG "After UC checking\n"); 116 printk(KERN_DEBUG "After UC checking\n");
263 for (i = 0; i < RANGE_NUM; i++) { 117 for (i = 0; i < RANGE_NUM; i++) {
264 if (!range[i].end) 118 if (!range[i].end)
265 continue; 119 continue;
266 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 120 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
267 range[i].start, range[i].end + 1); 121 range[i].start, range[i].end);
268 } 122 }
269 } 123 }
270 124
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
273 if (debug_print) { 127 if (debug_print) {
274 printk(KERN_DEBUG "After sorting\n"); 128 printk(KERN_DEBUG "After sorting\n");
275 for (i = 0; i < nr_range; i++) 129 for (i = 0; i < nr_range; i++)
276 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 130 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
277 range[i].start, range[i].end + 1); 131 range[i].start, range[i].end);
278 } 132 }
279 133
280 /* clear those is not used */
281 for (i = nr_range; i < RANGE_NUM; i++)
282 memset(&range[i], 0, sizeof(range[i]));
283
284 return nr_range; 134 return nr_range;
285} 135}
286 136
287#ifdef CONFIG_MTRR_SANITIZER 137#ifdef CONFIG_MTRR_SANITIZER
288 138
289static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 139static unsigned long __init sum_ranges(struct range *range, int nr_range)
290{ 140{
291 unsigned long sum = 0; 141 unsigned long sum = 0;
292 int i; 142 int i;
293 143
294 for (i = 0; i < nr_range; i++) 144 for (i = 0; i < nr_range; i++)
295 sum += range[i].end + 1 - range[i].start; 145 sum += range[i].end - range[i].start;
296 146
297 return sum; 147 return sum;
298} 148}
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
621early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 471early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
622 472
623static int __init 473static int __init
624x86_setup_var_mtrrs(struct res_range *range, int nr_range, 474x86_setup_var_mtrrs(struct range *range, int nr_range,
625 u64 chunk_size, u64 gran_size) 475 u64 chunk_size, u64 gran_size)
626{ 476{
627 struct var_mtrr_state var_state; 477 struct var_mtrr_state var_state;
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
639 /* Write the range: */ 489 /* Write the range: */
640 for (i = 0; i < nr_range; i++) { 490 for (i = 0; i < nr_range; i++) {
641 set_var_mtrr_range(&var_state, range[i].start, 491 set_var_mtrr_range(&var_state, range[i].start,
642 range[i].end - range[i].start + 1); 492 range[i].end - range[i].start);
643 } 493 }
644 494
645 /* Write the last range: */ 495 /* Write the last range: */
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
742 unsigned long x_remove_base, 592 unsigned long x_remove_base,
743 unsigned long x_remove_size, int i) 593 unsigned long x_remove_size, int i)
744{ 594{
745 static struct res_range range_new[RANGE_NUM]; 595 static struct range range_new[RANGE_NUM];
746 unsigned long range_sums_new; 596 unsigned long range_sums_new;
747 static int nr_range_new; 597 static int nr_range_new;
748 int num_reg; 598 int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
869 * [0, 1M) should always be covered by var mtrr with WB 719 * [0, 1M) should always be covered by var mtrr with WB
870 * and fixed mtrrs should take effect before var mtrr for it: 720 * and fixed mtrrs should take effect before var mtrr for it:
871 */ 721 */
872 nr_range = add_range_with_merge(range, nr_range, 0, 722 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
873 (1ULL<<(20 - PAGE_SHIFT)) - 1); 723 1ULL<<(20 - PAGE_SHIFT));
874 /* Sort the ranges: */ 724 /* Sort the ranges: */
875 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 725 sort_range(range, nr_range);
876 726
877 range_sums = sum_ranges(range, nr_range); 727 range_sums = sum_ranges(range, nr_range);
878 printk(KERN_INFO "total RAM covered: %ldM\n", 728 printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1089 nr_range = 0; 939 nr_range = 0;
1090 if (mtrr_tom2) { 940 if (mtrr_tom2) {
1091 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 941 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1092 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 942 range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
1093 if (highest_pfn < range[nr_range].end + 1) 943 if (highest_pfn < range[nr_range].end)
1094 highest_pfn = range[nr_range].end + 1; 944 highest_pfn = range[nr_range].end;
1095 nr_range++; 945 nr_range++;
1096 } 946 }
1097 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 947 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1103 953
1104 /* Check the holes: */ 954 /* Check the holes: */
1105 for (i = 0; i < nr_range - 1; i++) { 955 for (i = 0; i < nr_range - 1; i++) {
1106 if (range[i].end + 1 < range[i+1].start) 956 if (range[i].end < range[i+1].start)
1107 total_trim_size += real_trim_memory(range[i].end + 1, 957 total_trim_size += real_trim_memory(range[i].end,
1108 range[i+1].start); 958 range[i+1].start);
1109 } 959 }
1110 960
1111 /* Check the top: */ 961 /* Check the top: */
1112 i = nr_range - 1; 962 i = nr_range - 1;
1113 if (range[i].end + 1 < end_pfn) 963 if (range[i].end < end_pfn)
1114 total_trim_size += real_trim_memory(range[i].end + 1, 964 total_trim_size += real_trim_memory(range[i].end,
1115 end_pfn); 965 end_pfn);
1116 966
1117 if (total_trim_size) { 967 if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 228d982ce09c..68a3343e5798 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -265,7 +265,7 @@ static void cyrix_set_all(void)
265 post_set(); 265 post_set();
266} 266}
267 267
268static struct mtrr_ops cyrix_mtrr_ops = { 268static const struct mtrr_ops cyrix_mtrr_ops = {
269 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
270 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
271 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55da0c5f68dd..9aa5dc76ff4a 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -464,7 +464,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
464 tmp |= ~((1<<(hi - 1)) - 1); 464 tmp |= ~((1<<(hi - 1)) - 1);
465 465
466 if (tmp != mask_lo) { 466 if (tmp != mask_lo) {
467 WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); 467 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
468 mask_lo = tmp; 468 mask_lo = tmp;
469 } 469 }
470 } 470 }
@@ -570,7 +570,7 @@ static unsigned long set_mtrr_state(void)
570 570
571 571
572static unsigned long cr4; 572static unsigned long cr4;
573static DEFINE_SPINLOCK(set_atomicity_lock); 573static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
574 574
575/* 575/*
576 * Since we are disabling the cache don't allow any interrupts, 576 * Since we are disabling the cache don't allow any interrupts,
@@ -590,7 +590,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
590 * changes to the way the kernel boots 590 * changes to the way the kernel boots
591 */ 591 */
592 592
593 spin_lock(&set_atomicity_lock); 593 raw_spin_lock(&set_atomicity_lock);
594 594
595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
596 cr0 = read_cr0() | X86_CR0_CD; 596 cr0 = read_cr0() | X86_CR0_CD;
@@ -627,7 +627,7 @@ static void post_set(void) __releases(set_atomicity_lock)
627 /* Restore value of CR4 */ 627 /* Restore value of CR4 */
628 if (cpu_has_pge) 628 if (cpu_has_pge)
629 write_cr4(cr4); 629 write_cr4(cr4);
630 spin_unlock(&set_atomicity_lock); 630 raw_spin_unlock(&set_atomicity_lock);
631} 631}
632 632
633static void generic_set_all(void) 633static void generic_set_all(void)
@@ -752,7 +752,7 @@ int positive_have_wrcomb(void)
752/* 752/*
753 * Generic structure... 753 * Generic structure...
754 */ 754 */
755struct mtrr_ops generic_mtrr_ops = { 755const struct mtrr_ops generic_mtrr_ops = {
756 .use_intel_if = 1, 756 .use_intel_if = 1,
757 .set_all = generic_set_all, 757 .set_all = generic_set_all,
758 .get = generic_get_mtrr, 758 .get = generic_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 84e83de54575..79556bd9b602 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex);
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init; 61static bool mtrr_aps_delayed_init;
62 62
63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
64 64
65struct mtrr_ops *mtrr_if; 65const struct mtrr_ops *mtrr_if;
66 66
67static void set_mtrr(unsigned int reg, unsigned long base, 67static void set_mtrr(unsigned int reg, unsigned long base,
68 unsigned long size, mtrr_type type); 68 unsigned long size, mtrr_type type);
69 69
70void set_mtrr_ops(struct mtrr_ops *ops) 70void set_mtrr_ops(const struct mtrr_ops *ops)
71{ 71{
72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
73 mtrr_ops[ops->vendor] = ops; 73 mtrr_ops[ops->vendor] = ops;
@@ -145,6 +145,7 @@ struct set_mtrr_data {
145 145
146/** 146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data
148 * 149 *
149 * Returns nothing. 150 * Returns nothing.
150 */ 151 */
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a501dee9a87a..df5e41f31a27 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size,
32extern int generic_validate_add_page(unsigned long base, unsigned long size, 32extern int generic_validate_add_page(unsigned long base, unsigned long size,
33 unsigned int type); 33 unsigned int type);
34 34
35extern struct mtrr_ops generic_mtrr_ops; 35extern const struct mtrr_ops generic_mtrr_ops;
36 36
37extern int positive_have_wrcomb(void); 37extern int positive_have_wrcomb(void);
38 38
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54void get_mtrr_state(void); 54void get_mtrr_state(void);
55 55
56extern void set_mtrr_ops(struct mtrr_ops *ops); 56extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 57
58extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
59extern struct mtrr_ops *mtrr_if; 59extern const struct mtrr_ops *mtrr_if;
60 60
61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
deleted file mode 100644
index dfc80b4e6b0d..000000000000
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,94 +0,0 @@
1#include <linux/init.h>
2#include <linux/io.h>
3#include <linux/mm.h>
4
5#include <asm/processor-cyrix.h>
6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
10#include "mtrr.h"
11
12/* Put the processor into a state where MTRRs can be safely set */
13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
14{
15 unsigned int cr0;
16
17 /* Disable interrupts locally */
18 local_irq_save(ctxt->flags);
19
20 if (use_intel() || is_cpu(CYRIX)) {
21
22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
23 if (cpu_has_pge) {
24 ctxt->cr4val = read_cr4();
25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
26 }
27
28 /*
29 * Disable and flush caches. Note that wbinvd flushes the TLBs
30 * as a side-effect
31 */
32 cr0 = read_cr0() | X86_CR0_CD;
33 wbinvd();
34 write_cr0(cr0);
35 wbinvd();
36
37 if (use_intel()) {
38 /* Save MTRR state */
39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
40 } else {
41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
47 }
48}
49
50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
51{
52 if (use_intel()) {
53 /* Disable MTRRs, and set the default type to uncached */
54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
55 ctxt->deftype_hi);
56 } else {
57 if (is_cpu(CYRIX)) {
58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
62}
63
64/* Restore the processor after a set_mtrr_prepare */
65void set_mtrr_done(struct set_mtrr_context *ctxt)
66{
67 if (use_intel() || is_cpu(CYRIX)) {
68
69 /* Flush caches and TLBs */
70 wbinvd();
71
72 /* Restore MTRRdefType */
73 if (use_intel()) {
74 /* Intel (P6) standard MTRRs */
75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
76 ctxt->deftype_hi);
77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
84
85 /* Enable caches */
86 write_cr0(read_cr0() & 0xbfffffff);
87
88 /* Restore value of CR4 */
89 if (cpu_has_pge)
90 write_cr4(ctxt->cr4val);
91 }
92 /* Re-enable interrupts locally (if enabled previously) */
93 local_irq_restore(ctxt->flags);
94}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bfc43fa208bc..97cddbf32936 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -679,7 +679,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
679 if (c->weight != w) 679 if (c->weight != w)
680 continue; 680 continue;
681 681
682 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 682 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
683 if (!test_bit(j, used_mask)) 683 if (!test_bit(j, used_mask))
684 break; 684 break;
685 } 685 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 4fbdfe5708d9..73102df8bfc1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -767,7 +767,7 @@ again:
767 767
768 inc_irq_stat(apic_perf_irqs); 768 inc_irq_stat(apic_perf_irqs);
769 ack = status; 769 ack = status;
770 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 770 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
771 struct perf_event *event = cpuc->events[bit]; 771 struct perf_event *event = cpuc->events[bit];
772 772
773 clear_bit(bit, (unsigned long *) &status); 773 clear_bit(bit, (unsigned long *) &status);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 2012a4ed2727..d5e2a2ebb627 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -292,6 +292,7 @@ void show_registers(struct pt_regs *regs)
292 292
293 sp = regs->sp; 293 sp = regs->sp;
294 printk("CPU %d ", cpu); 294 printk("CPU %d ", cpu);
295 print_modules();
295 __show_regs(regs, 1); 296 __show_regs(regs, 1);
296 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 297 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
297 cur->comm, cur->pid, task_thread_info(cur), cur); 298 cur->comm, cur->pid, task_thread_info(cur), cur);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a1a7876cadcb..740b440fbd73 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -12,21 +12,13 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/bootmem.h> 14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h> 15#include <linux/pfn.h>
21#include <linux/suspend.h> 16#include <linux/suspend.h>
22#include <linux/firmware-map.h> 17#include <linux/firmware-map.h>
23 18
24#include <asm/pgtable.h>
25#include <asm/page.h>
26#include <asm/e820.h> 19#include <asm/e820.h>
27#include <asm/proto.h> 20#include <asm/proto.h>
28#include <asm/setup.h> 21#include <asm/setup.h>
29#include <asm/trampoline.h>
30 22
31/* 23/*
32 * The e820 map is the map that gets modified e.g. with command line parameters 24 * The e820 map is the map that gets modified e.g. with command line parameters
@@ -517,11 +509,19 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
517 int checktype) 509 int checktype)
518{ 510{
519 int i; 511 int i;
512 u64 end;
520 u64 real_removed_size = 0; 513 u64 real_removed_size = 0;
521 514
522 if (size > (ULLONG_MAX - start)) 515 if (size > (ULLONG_MAX - start))
523 size = ULLONG_MAX - start; 516 size = ULLONG_MAX - start;
524 517
518 end = start + size;
519 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
520 (unsigned long long) start,
521 (unsigned long long) end);
522 e820_print_type(old_type);
523 printk(KERN_CONT "\n");
524
525 for (i = 0; i < e820.nr_map; i++) { 525 for (i = 0; i < e820.nr_map; i++) {
526 struct e820entry *ei = &e820.map[i]; 526 struct e820entry *ei = &e820.map[i];
527 u64 final_start, final_end; 527 u64 final_start, final_end;
@@ -722,319 +722,44 @@ core_initcall(e820_mark_nvs_memory);
722#endif 722#endif
723 723
724/* 724/*
725 * Early reserved memory areas. 725 * Find a free area with specified alignment in a specific range.
726 */
727#define MAX_EARLY_RES 32
728
729struct early_res {
730 u64 start, end;
731 char name[16];
732 char overlap_ok;
733};
734static struct early_res early_res[MAX_EARLY_RES] __initdata = {
735 { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
736#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
737 /*
738 * But first pinch a few for the stack/trampoline stuff
739 * FIXME: Don't need the extra page at 4K, but need to fix
740 * trampoline before removing it. (see the GDT stuff)
741 */
742 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
743#endif
744
745 {}
746};
747
748static int __init find_overlapped_early(u64 start, u64 end)
749{
750 int i;
751 struct early_res *r;
752
753 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
754 r = &early_res[i];
755 if (end > r->start && start < r->end)
756 break;
757 }
758
759 return i;
760}
761
762/*
763 * Drop the i-th range from the early reservation map,
764 * by copying any higher ranges down one over it, and
765 * clearing what had been the last slot.
766 */
767static void __init drop_range(int i)
768{
769 int j;
770
771 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
772 ;
773
774 memmove(&early_res[i], &early_res[i + 1],
775 (j - 1 - i) * sizeof(struct early_res));
776
777 early_res[j - 1].end = 0;
778}
779
780/*
781 * Split any existing ranges that:
782 * 1) are marked 'overlap_ok', and
783 * 2) overlap with the stated range [start, end)
784 * into whatever portion (if any) of the existing range is entirely
785 * below or entirely above the stated range. Drop the portion
786 * of the existing range that overlaps with the stated range,
787 * which will allow the caller of this routine to then add that
788 * stated range without conflicting with any existing range.
789 */ 726 */
790static void __init drop_overlaps_that_are_ok(u64 start, u64 end) 727u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
791{ 728{
792 int i; 729 int i;
793 struct early_res *r;
794 u64 lower_start, lower_end;
795 u64 upper_start, upper_end;
796 char name[16];
797 730
798 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 731 for (i = 0; i < e820.nr_map; i++) {
799 r = &early_res[i]; 732 struct e820entry *ei = &e820.map[i];
733 u64 addr;
734 u64 ei_start, ei_last;
800 735
801 /* Continue past non-overlapping ranges */ 736 if (ei->type != E820_RAM)
802 if (end <= r->start || start >= r->end)
803 continue; 737 continue;
804 738
805 /* 739 ei_last = ei->addr + ei->size;
806 * Leave non-ok overlaps as is; let caller 740 ei_start = ei->addr;
807 * panic "Overlapping early reservations" 741 addr = find_early_area(ei_start, ei_last, start, end,
808 * when it hits this overlap. 742 size, align);
809 */
810 if (!r->overlap_ok)
811 return;
812
813 /*
814 * We have an ok overlap. We will drop it from the early
815 * reservation map, and add back in any non-overlapping
816 * portions (lower or upper) as separate, overlap_ok,
817 * non-overlapping ranges.
818 */
819
820 /* 1. Note any non-overlapping (lower or upper) ranges. */
821 strncpy(name, r->name, sizeof(name) - 1);
822
823 lower_start = lower_end = 0;
824 upper_start = upper_end = 0;
825 if (r->start < start) {
826 lower_start = r->start;
827 lower_end = start;
828 }
829 if (r->end > end) {
830 upper_start = end;
831 upper_end = r->end;
832 }
833
834 /* 2. Drop the original ok overlapping range */
835 drop_range(i);
836
837 i--; /* resume for-loop on copied down entry */
838
839 /* 3. Add back in any non-overlapping ranges. */
840 if (lower_end)
841 reserve_early_overlap_ok(lower_start, lower_end, name);
842 if (upper_end)
843 reserve_early_overlap_ok(upper_start, upper_end, name);
844 }
845}
846
847static void __init __reserve_early(u64 start, u64 end, char *name,
848 int overlap_ok)
849{
850 int i;
851 struct early_res *r;
852
853 i = find_overlapped_early(start, end);
854 if (i >= MAX_EARLY_RES)
855 panic("Too many early reservations");
856 r = &early_res[i];
857 if (r->end)
858 panic("Overlapping early reservations "
859 "%llx-%llx %s to %llx-%llx %s\n",
860 start, end - 1, name?name:"", r->start,
861 r->end - 1, r->name);
862 r->start = start;
863 r->end = end;
864 r->overlap_ok = overlap_ok;
865 if (name)
866 strncpy(r->name, name, sizeof(r->name) - 1);
867}
868
869/*
870 * A few early reservtations come here.
871 *
872 * The 'overlap_ok' in the name of this routine does -not- mean it
873 * is ok for these reservations to overlap an earlier reservation.
874 * Rather it means that it is ok for subsequent reservations to
875 * overlap this one.
876 *
877 * Use this entry point to reserve early ranges when you are doing
878 * so out of "Paranoia", reserving perhaps more memory than you need,
879 * just in case, and don't mind a subsequent overlapping reservation
880 * that is known to be needed.
881 *
882 * The drop_overlaps_that_are_ok() call here isn't really needed.
883 * It would be needed if we had two colliding 'overlap_ok'
884 * reservations, so that the second such would not panic on the
885 * overlap with the first. We don't have any such as of this
886 * writing, but might as well tolerate such if it happens in
887 * the future.
888 */
889void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
890{
891 drop_overlaps_that_are_ok(start, end);
892 __reserve_early(start, end, name, 1);
893}
894
895/*
896 * Most early reservations come here.
897 *
898 * We first have drop_overlaps_that_are_ok() drop any pre-existing
899 * 'overlap_ok' ranges, so that we can then reserve this memory
900 * range without risk of panic'ing on an overlapping overlap_ok
901 * early reservation.
902 */
903void __init reserve_early(u64 start, u64 end, char *name)
904{
905 if (start >= end)
906 return;
907
908 drop_overlaps_that_are_ok(start, end);
909 __reserve_early(start, end, name, 0);
910}
911
912void __init free_early(u64 start, u64 end)
913{
914 struct early_res *r;
915 int i;
916
917 i = find_overlapped_early(start, end);
918 r = &early_res[i];
919 if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
920 panic("free_early on not reserved area: %llx-%llx!",
921 start, end - 1);
922
923 drop_range(i);
924}
925
926void __init early_res_to_bootmem(u64 start, u64 end)
927{
928 int i, count;
929 u64 final_start, final_end;
930
931 count = 0;
932 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
933 count++;
934
935 printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
936 count, start, end);
937 for (i = 0; i < count; i++) {
938 struct early_res *r = &early_res[i];
939 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
940 r->start, r->end, r->name);
941 final_start = max(start, r->start);
942 final_end = min(end, r->end);
943 if (final_start >= final_end) {
944 printk(KERN_CONT "\n");
945 continue;
946 }
947 printk(KERN_CONT " ==> [%010llx - %010llx]\n",
948 final_start, final_end);
949 reserve_bootmem_generic(final_start, final_end - final_start,
950 BOOTMEM_DEFAULT);
951 }
952}
953 743
954/* Check for already reserved areas */ 744 if (addr != -1ULL)
955static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 745 return addr;
956{
957 int i;
958 u64 addr = *addrp;
959 int changed = 0;
960 struct early_res *r;
961again:
962 i = find_overlapped_early(addr, addr + size);
963 r = &early_res[i];
964 if (i < MAX_EARLY_RES && r->end) {
965 *addrp = addr = round_up(r->end, align);
966 changed = 1;
967 goto again;
968 } 746 }
969 return changed; 747 return -1ULL;
970} 748}
971 749
972/* Check for already reserved areas */ 750u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
973static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
974{ 751{
975 int i; 752 return find_e820_area(start, end, size, align);
976 u64 addr = *addrp, last;
977 u64 size = *sizep;
978 int changed = 0;
979again:
980 last = addr + size;
981 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
982 struct early_res *r = &early_res[i];
983 if (last > r->start && addr < r->start) {
984 size = r->start - addr;
985 changed = 1;
986 goto again;
987 }
988 if (last > r->end && addr < r->end) {
989 addr = round_up(r->end, align);
990 size = last - addr;
991 changed = 1;
992 goto again;
993 }
994 if (last <= r->end && addr >= r->start) {
995 (*sizep)++;
996 return 0;
997 }
998 }
999 if (changed) {
1000 *addrp = addr;
1001 *sizep = size;
1002 }
1003 return changed;
1004} 753}
1005 754
1006/* 755u64 __init get_max_mapped(void)
1007 * Find a free area with specified alignment in a specific range.
1008 */
1009u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1010{ 756{
1011 int i; 757 u64 end = max_pfn_mapped;
1012 758
1013 for (i = 0; i < e820.nr_map; i++) { 759 end <<= PAGE_SHIFT;
1014 struct e820entry *ei = &e820.map[i];
1015 u64 addr, last;
1016 u64 ei_last;
1017 760
1018 if (ei->type != E820_RAM) 761 return end;
1019 continue;
1020 addr = round_up(ei->addr, align);
1021 ei_last = ei->addr + ei->size;
1022 if (addr < start)
1023 addr = round_up(start, align);
1024 if (addr >= ei_last)
1025 continue;
1026 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1027 ;
1028 last = addr + size;
1029 if (last > ei_last)
1030 continue;
1031 if (last > end)
1032 continue;
1033 return addr;
1034 }
1035 return -1ULL;
1036} 762}
1037
1038/* 763/*
1039 * Find next free range after *start 764 * Find next free range after *start
1040 */ 765 */
@@ -1044,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
1044 769
1045 for (i = 0; i < e820.nr_map; i++) { 770 for (i = 0; i < e820.nr_map; i++) {
1046 struct e820entry *ei = &e820.map[i]; 771 struct e820entry *ei = &e820.map[i];
1047 u64 addr, last; 772 u64 addr;
1048 u64 ei_last; 773 u64 ei_start, ei_last;
1049 774
1050 if (ei->type != E820_RAM) 775 if (ei->type != E820_RAM)
1051 continue; 776 continue;
1052 addr = round_up(ei->addr, align); 777
1053 ei_last = ei->addr + ei->size; 778 ei_last = ei->addr + ei->size;
1054 if (addr < start) 779 ei_start = ei->addr;
1055 addr = round_up(start, align); 780 addr = find_early_area_size(ei_start, ei_last, start,
1056 if (addr >= ei_last) 781 sizep, align);
1057 continue; 782
1058 *sizep = ei_last - addr; 783 if (addr != -1ULL)
1059 while (bad_addr_size(&addr, sizep, align) && 784 return addr;
1060 addr + *sizep <= ei_last)
1061 ;
1062 last = addr + *sizep;
1063 if (last > ei_last)
1064 continue;
1065 return addr;
1066 } 785 }
1067 786
1068 return -1ULL; 787 return -1ULL;
@@ -1421,6 +1140,8 @@ void __init e820_reserve_resources_late(void)
1421 end = MAX_RESOURCE_SIZE; 1140 end = MAX_RESOURCE_SIZE;
1422 if (start >= end) 1141 if (start >= end)
1423 continue; 1142 continue;
1143 printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1144 start, end);
1424 reserve_region_with_split(&iomem_resource, start, end, 1145 reserve_region_with_split(&iomem_resource, start, end,
1425 "RAM buffer"); 1146 "RAM buffer");
1426 } 1147 }
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index cdcfb122f256..c2fa9b8b497e 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -362,7 +362,7 @@ void __init efi_init(void)
362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); 362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
363 early_iounmap(tmp, 2); 363 early_iounmap(tmp, 2);
364 364
365 printk(KERN_INFO "EFI v%u.%.02u by %s \n", 365 printk(KERN_INFO "EFI v%u.%.02u by %s\n",
366 efi.systab->hdr.revision >> 16, 366 efi.systab->hdr.revision >> 16,
367 efi.systab->hdr.revision & 0xffff, vendor); 367 efi.systab->hdr.revision & 0xffff, vendor);
368 368
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 309689245431..cd37469b54ee 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -30,14 +30,32 @@
30 30
31#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
32 32
33/*
34 * modifying_code is set to notify NMIs that they need to use
35 * memory barriers when entering or exiting. But we don't want
36 * to burden NMIs with unnecessary memory barriers when code
37 * modification is not being done (which is most of the time).
38 *
39 * A mutex is already held when ftrace_arch_code_modify_prepare
40 * and post_process are called. No locks need to be taken here.
41 *
42 * Stop machine will make sure currently running NMIs are done
43 * and new NMIs will see the updated variable before we need
44 * to worry about NMIs doing memory barriers.
45 */
46static int modifying_code __read_mostly;
47static DEFINE_PER_CPU(int, save_modifying_code);
48
33int ftrace_arch_code_modify_prepare(void) 49int ftrace_arch_code_modify_prepare(void)
34{ 50{
35 set_kernel_text_rw(); 51 set_kernel_text_rw();
52 modifying_code = 1;
36 return 0; 53 return 0;
37} 54}
38 55
39int ftrace_arch_code_modify_post_process(void) 56int ftrace_arch_code_modify_post_process(void)
40{ 57{
58 modifying_code = 0;
41 set_kernel_text_ro(); 59 set_kernel_text_ro();
42 return 0; 60 return 0;
43} 61}
@@ -149,6 +167,11 @@ static void ftrace_mod_code(void)
149 167
150void ftrace_nmi_enter(void) 168void ftrace_nmi_enter(void)
151{ 169{
170 __get_cpu_var(save_modifying_code) = modifying_code;
171
172 if (!__get_cpu_var(save_modifying_code))
173 return;
174
152 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
153 smp_rmb(); 176 smp_rmb();
154 ftrace_mod_code(); 177 ftrace_mod_code();
@@ -160,6 +183,9 @@ void ftrace_nmi_enter(void)
160 183
161void ftrace_nmi_exit(void) 184void ftrace_nmi_exit(void)
162{ 185{
186 if (!__get_cpu_var(save_modifying_code))
187 return;
188
163 /* Finish all executions before clearing nmi_running */ 189 /* Finish all executions before clearing nmi_running */
164 smp_mb(); 190 smp_mb();
165 atomic_dec(&nmi_running); 191 atomic_dec(&nmi_running);
@@ -484,13 +510,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
484 } 510 }
485} 511}
486#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 512#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
487
488#ifdef CONFIG_FTRACE_SYSCALLS
489
490extern unsigned long *sys_call_table;
491
492unsigned long __init arch_syscall_addr(int nr)
493{
494 return (unsigned long)(&sys_call_table)[nr];
495}
496#endif
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 5051b94c9069..adedeef1dedc 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void)
29 29
30void __init i386_start_kernel(void) 30void __init i386_start_kernel(void)
31{ 31{
32#ifdef CONFIG_X86_TRAMPOLINE
33 /*
34 * But first pinch a few for the stack/trampoline stuff
35 * FIXME: Don't need the extra page at 4K, but need to fix
36 * trampoline before removing it. (see the GDT stuff)
37 */
38 reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
39 "EX TRAMPOLINE");
40#endif
41
32 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 42 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
33 43
34#ifdef CONFIG_BLK_DEV_INITRD 44#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7fd318bac59c..37c3d4b17d85 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP
442 */ 442 */
443 cmpb $0,ready 443 cmpb $0,ready
444 jne 1f 444 jne 1f
445 movl $per_cpu__gdt_page,%eax 445 movl $gdt_page,%eax
446 movl $per_cpu__stack_canary,%ecx 446 movl $stack_canary,%ecx
447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
448 shrl $16, %ecx 448 shrl $16, %ecx
449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -706,7 +706,7 @@ idt_descr:
706 .word 0 # 32 bit align gdt_desc.address 706 .word 0 # 32 bit align gdt_desc.address
707ENTRY(early_gdt_descr) 707ENTRY(early_gdt_descr)
708 .word GDT_ENTRIES*8-1 708 .word GDT_ENTRIES*8-1
709 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 709 .long gdt_page /* Overwritten for secondary CPUs */
710 710
711/* 711/*
712 * The boot_gdt must mirror the equivalent in setup.S and is 712 * The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad80a1c718c6..ee4fa1bfcb33 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -266,7 +266,7 @@ static void hpet_resume_device(void)
266 force_hpet_resume(); 266 force_hpet_resume();
267} 267}
268 268
269static void hpet_resume_counter(void) 269static void hpet_resume_counter(struct clocksource *cs)
270{ 270{
271 hpet_resume_device(); 271 hpet_resume_device();
272 hpet_restart_counter(); 272 hpet_restart_counter();
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f2f8540a7f3d..c01a2b846d47 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -164,6 +164,11 @@ int init_fpu(struct task_struct *tsk)
164 return 0; 164 return 0;
165} 165}
166 166
167/*
168 * The xstateregs_active() routine is the same as the fpregs_active() routine,
169 * as the "regset->n" for the xstate regset will be updated based on the feature
170 * capabilites supported by the xsave.
171 */
167int fpregs_active(struct task_struct *target, const struct user_regset *regset) 172int fpregs_active(struct task_struct *target, const struct user_regset *regset)
168{ 173{
169 return tsk_used_math(target) ? regset->n : 0; 174 return tsk_used_math(target) ? regset->n : 0;
@@ -204,8 +209,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
204 if (ret) 209 if (ret)
205 return ret; 210 return ret;
206 211
207 set_stopped_child_used_math(target);
208
209 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 212 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
210 &target->thread.xstate->fxsave, 0, -1); 213 &target->thread.xstate->fxsave, 0, -1);
211 214
@@ -224,6 +227,68 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
224 return ret; 227 return ret;
225} 228}
226 229
230int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
231 unsigned int pos, unsigned int count,
232 void *kbuf, void __user *ubuf)
233{
234 int ret;
235
236 if (!cpu_has_xsave)
237 return -ENODEV;
238
239 ret = init_fpu(target);
240 if (ret)
241 return ret;
242
243 /*
244 * Copy the 48bytes defined by the software first into the xstate
245 * memory layout in the thread struct, so that we can copy the entire
246 * xstateregs to the user using one user_regset_copyout().
247 */
248 memcpy(&target->thread.xstate->fxsave.sw_reserved,
249 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
250
251 /*
252 * Copy the xstate memory layout.
253 */
254 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
255 &target->thread.xstate->xsave, 0, -1);
256 return ret;
257}
258
259int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
260 unsigned int pos, unsigned int count,
261 const void *kbuf, const void __user *ubuf)
262{
263 int ret;
264 struct xsave_hdr_struct *xsave_hdr;
265
266 if (!cpu_has_xsave)
267 return -ENODEV;
268
269 ret = init_fpu(target);
270 if (ret)
271 return ret;
272
273 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
274 &target->thread.xstate->xsave, 0, -1);
275
276 /*
277 * mxcsr reserved bits must be masked to zero for security reasons.
278 */
279 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
280
281 xsave_hdr = &target->thread.xstate->xsave.xsave_hdr;
282
283 xsave_hdr->xstate_bv &= pcntxt_mask;
284 /*
285 * These bits must be zero.
286 */
287 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
288
289 return ret;
290}
291
227#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 292#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
228 293
229/* 294/*
@@ -404,8 +469,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
404 if (ret) 469 if (ret)
405 return ret; 470 return ret;
406 471
407 set_stopped_child_used_math(target);
408
409 if (!HAVE_HWFP) 472 if (!HAVE_HWFP)
410 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 473 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
411 474
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index df89102bef80..fb725ee15f55 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,8 +32,14 @@
32 */ 32 */
33 33
34static int i8259A_auto_eoi; 34static int i8259A_auto_eoi;
35DEFINE_SPINLOCK(i8259A_lock); 35DEFINE_RAW_SPINLOCK(i8259A_lock);
36static void mask_and_ack_8259A(unsigned int); 36static void mask_and_ack_8259A(unsigned int);
37static void mask_8259A(void);
38static void unmask_8259A(void);
39static void disable_8259A_irq(unsigned int irq);
40static void enable_8259A_irq(unsigned int irq);
41static void init_8259A(int auto_eoi);
42static int i8259A_irq_pending(unsigned int irq);
37 43
38struct irq_chip i8259A_chip = { 44struct irq_chip i8259A_chip = {
39 .name = "XT-PIC", 45 .name = "XT-PIC",
@@ -63,51 +69,51 @@ unsigned int cached_irq_mask = 0xffff;
63 */ 69 */
64unsigned long io_apic_irqs; 70unsigned long io_apic_irqs;
65 71
66void disable_8259A_irq(unsigned int irq) 72static void disable_8259A_irq(unsigned int irq)
67{ 73{
68 unsigned int mask = 1 << irq; 74 unsigned int mask = 1 << irq;
69 unsigned long flags; 75 unsigned long flags;
70 76
71 spin_lock_irqsave(&i8259A_lock, flags); 77 raw_spin_lock_irqsave(&i8259A_lock, flags);
72 cached_irq_mask |= mask; 78 cached_irq_mask |= mask;
73 if (irq & 8) 79 if (irq & 8)
74 outb(cached_slave_mask, PIC_SLAVE_IMR); 80 outb(cached_slave_mask, PIC_SLAVE_IMR);
75 else 81 else
76 outb(cached_master_mask, PIC_MASTER_IMR); 82 outb(cached_master_mask, PIC_MASTER_IMR);
77 spin_unlock_irqrestore(&i8259A_lock, flags); 83 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
78} 84}
79 85
80void enable_8259A_irq(unsigned int irq) 86static void enable_8259A_irq(unsigned int irq)
81{ 87{
82 unsigned int mask = ~(1 << irq); 88 unsigned int mask = ~(1 << irq);
83 unsigned long flags; 89 unsigned long flags;
84 90
85 spin_lock_irqsave(&i8259A_lock, flags); 91 raw_spin_lock_irqsave(&i8259A_lock, flags);
86 cached_irq_mask &= mask; 92 cached_irq_mask &= mask;
87 if (irq & 8) 93 if (irq & 8)
88 outb(cached_slave_mask, PIC_SLAVE_IMR); 94 outb(cached_slave_mask, PIC_SLAVE_IMR);
89 else 95 else
90 outb(cached_master_mask, PIC_MASTER_IMR); 96 outb(cached_master_mask, PIC_MASTER_IMR);
91 spin_unlock_irqrestore(&i8259A_lock, flags); 97 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
92} 98}
93 99
94int i8259A_irq_pending(unsigned int irq) 100static int i8259A_irq_pending(unsigned int irq)
95{ 101{
96 unsigned int mask = 1<<irq; 102 unsigned int mask = 1<<irq;
97 unsigned long flags; 103 unsigned long flags;
98 int ret; 104 int ret;
99 105
100 spin_lock_irqsave(&i8259A_lock, flags); 106 raw_spin_lock_irqsave(&i8259A_lock, flags);
101 if (irq < 8) 107 if (irq < 8)
102 ret = inb(PIC_MASTER_CMD) & mask; 108 ret = inb(PIC_MASTER_CMD) & mask;
103 else 109 else
104 ret = inb(PIC_SLAVE_CMD) & (mask >> 8); 110 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
105 spin_unlock_irqrestore(&i8259A_lock, flags); 111 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
106 112
107 return ret; 113 return ret;
108} 114}
109 115
110void make_8259A_irq(unsigned int irq) 116static void make_8259A_irq(unsigned int irq)
111{ 117{
112 disable_irq_nosync(irq); 118 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 119 io_apic_irqs &= ~(1<<irq);
@@ -150,7 +156,7 @@ static void mask_and_ack_8259A(unsigned int irq)
150 unsigned int irqmask = 1 << irq; 156 unsigned int irqmask = 1 << irq;
151 unsigned long flags; 157 unsigned long flags;
152 158
153 spin_lock_irqsave(&i8259A_lock, flags); 159 raw_spin_lock_irqsave(&i8259A_lock, flags);
154 /* 160 /*
155 * Lightweight spurious IRQ detection. We do not want 161 * Lightweight spurious IRQ detection. We do not want
156 * to overdo spurious IRQ handling - it's usually a sign 162 * to overdo spurious IRQ handling - it's usually a sign
@@ -183,7 +189,7 @@ handle_real_irq:
183 outb(cached_master_mask, PIC_MASTER_IMR); 189 outb(cached_master_mask, PIC_MASTER_IMR);
184 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ 190 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
185 } 191 }
186 spin_unlock_irqrestore(&i8259A_lock, flags); 192 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
187 return; 193 return;
188 194
189spurious_8259A_irq: 195spurious_8259A_irq:
@@ -281,37 +287,37 @@ static int __init i8259A_init_sysfs(void)
281 287
282device_initcall(i8259A_init_sysfs); 288device_initcall(i8259A_init_sysfs);
283 289
284void mask_8259A(void) 290static void mask_8259A(void)
285{ 291{
286 unsigned long flags; 292 unsigned long flags;
287 293
288 spin_lock_irqsave(&i8259A_lock, flags); 294 raw_spin_lock_irqsave(&i8259A_lock, flags);
289 295
290 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 296 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
291 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 297 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
292 298
293 spin_unlock_irqrestore(&i8259A_lock, flags); 299 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
294} 300}
295 301
296void unmask_8259A(void) 302static void unmask_8259A(void)
297{ 303{
298 unsigned long flags; 304 unsigned long flags;
299 305
300 spin_lock_irqsave(&i8259A_lock, flags); 306 raw_spin_lock_irqsave(&i8259A_lock, flags);
301 307
302 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 308 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
303 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 309 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
304 310
305 spin_unlock_irqrestore(&i8259A_lock, flags); 311 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
306} 312}
307 313
308void init_8259A(int auto_eoi) 314static void init_8259A(int auto_eoi)
309{ 315{
310 unsigned long flags; 316 unsigned long flags;
311 317
312 i8259A_auto_eoi = auto_eoi; 318 i8259A_auto_eoi = auto_eoi;
313 319
314 spin_lock_irqsave(&i8259A_lock, flags); 320 raw_spin_lock_irqsave(&i8259A_lock, flags);
315 321
316 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 322 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
317 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 323 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
@@ -356,5 +362,49 @@ void init_8259A(int auto_eoi)
356 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 362 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
357 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 363 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
358 364
359 spin_unlock_irqrestore(&i8259A_lock, flags); 365 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
360} 366}
367
368/*
369 * make i8259 a driver so that we can select pic functions at run time. the goal
370 * is to make x86 binary compatible among pc compatible and non-pc compatible
371 * platforms, such as x86 MID.
372 */
373
374static void legacy_pic_noop(void) { };
375static void legacy_pic_uint_noop(unsigned int unused) { };
376static void legacy_pic_int_noop(int unused) { };
377
378static struct irq_chip dummy_pic_chip = {
379 .name = "dummy pic",
380 .mask = legacy_pic_uint_noop,
381 .unmask = legacy_pic_uint_noop,
382 .disable = legacy_pic_uint_noop,
383 .mask_ack = legacy_pic_uint_noop,
384};
385static int legacy_pic_irq_pending_noop(unsigned int irq)
386{
387 return 0;
388}
389
390struct legacy_pic null_legacy_pic = {
391 .nr_legacy_irqs = 0,
392 .chip = &dummy_pic_chip,
393 .mask_all = legacy_pic_noop,
394 .restore_mask = legacy_pic_noop,
395 .init = legacy_pic_int_noop,
396 .irq_pending = legacy_pic_irq_pending_noop,
397 .make_irq = legacy_pic_uint_noop,
398};
399
400struct legacy_pic default_legacy_pic = {
401 .nr_legacy_irqs = NR_IRQS_LEGACY,
402 .chip = &i8259A_chip,
403 .mask_all = mask_8259A,
404 .restore_mask = unmask_8259A,
405 .init = init_8259A,
406 .irq_pending = i8259A_irq_pending,
407 .make_irq = make_8259A_irq,
408};
409
410struct legacy_pic *legacy_pic = &default_legacy_pic;
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index d5932226614f..ef257fc2921b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -84,24 +84,7 @@ static struct irqaction irq2 = {
84}; 84};
85 85
86DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 86DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
87 [0 ... IRQ0_VECTOR - 1] = -1, 87 [0 ... NR_VECTORS - 1] = -1,
88 [IRQ0_VECTOR] = 0,
89 [IRQ1_VECTOR] = 1,
90 [IRQ2_VECTOR] = 2,
91 [IRQ3_VECTOR] = 3,
92 [IRQ4_VECTOR] = 4,
93 [IRQ5_VECTOR] = 5,
94 [IRQ6_VECTOR] = 6,
95 [IRQ7_VECTOR] = 7,
96 [IRQ8_VECTOR] = 8,
97 [IRQ9_VECTOR] = 9,
98 [IRQ10_VECTOR] = 10,
99 [IRQ11_VECTOR] = 11,
100 [IRQ12_VECTOR] = 12,
101 [IRQ13_VECTOR] = 13,
102 [IRQ14_VECTOR] = 14,
103 [IRQ15_VECTOR] = 15,
104 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
105}; 88};
106 89
107int vector_used_by_percpu_irq(unsigned int vector) 90int vector_used_by_percpu_irq(unsigned int vector)
@@ -123,12 +106,12 @@ void __init init_ISA_irqs(void)
123#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 106#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
124 init_bsp_APIC(); 107 init_bsp_APIC();
125#endif 108#endif
126 init_8259A(0); 109 legacy_pic->init(0);
127 110
128 /* 111 /*
129 * 16 old-style INTA-cycle interrupts: 112 * 16 old-style INTA-cycle interrupts:
130 */ 113 */
131 for (i = 0; i < NR_IRQS_LEGACY; i++) { 114 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
132 struct irq_desc *desc = irq_to_desc(i); 115 struct irq_desc *desc = irq_to_desc(i);
133 116
134 desc->status = IRQ_DISABLED; 117 desc->status = IRQ_DISABLED;
@@ -142,6 +125,19 @@ void __init init_ISA_irqs(void)
142 125
143void __init init_IRQ(void) 126void __init init_IRQ(void)
144{ 127{
128 int i;
129
130 /*
131 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
132 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
133 * then this configuration will likely be static after the boot. If
134 * these IRQ's are handled by more mordern controllers like IO-APIC,
135 * then this vector space can be freed and re-used dynamically as the
136 * irq's migrate etc.
137 */
138 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
139 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
140
145 x86_init.irqs.intr_init(); 141 x86_init.irqs.intr_init();
146} 142}
147 143
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a9c3fd..b43bbaebe2c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h> 51#include <linux/kallsyms.h>
52#include <linux/ftrace.h>
52 53
53#include <asm/cacheflush.h> 54#include <asm/cacheflush.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
106}; 107};
107const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 108const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
108 109
109/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 110static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
110static void __kprobes set_jmp_op(void *from, void *to)
111{ 111{
112 struct __arch_jmp_op { 112 struct __arch_relative_insn {
113 char op; 113 u8 op;
114 s32 raddr; 114 s32 raddr;
115 } __attribute__((packed)) * jop; 115 } __attribute__((packed)) *insn;
116 jop = (struct __arch_jmp_op *)from; 116
117 jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); 117 insn = (struct __arch_relative_insn *)from;
118 jop->op = RELATIVEJUMP_INSTRUCTION; 118 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
119 insn->op = op;
120}
121
122/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
123static void __kprobes synthesize_reljump(void *from, void *to)
124{
125 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
119} 126}
120 127
121/* 128/*
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
202 /* 209 /*
203 * Basically, kp->ainsn.insn has an original instruction. 210 * Basically, kp->ainsn.insn has an original instruction.
204 * However, RIP-relative instruction can not do single-stepping 211 * However, RIP-relative instruction can not do single-stepping
205 * at different place, fix_riprel() tweaks the displacement of 212 * at different place, __copy_instruction() tweaks the displacement of
206 * that instruction. In that case, we can't recover the instruction 213 * that instruction. In that case, we can't recover the instruction
207 * from the kp->ainsn.insn. 214 * from the kp->ainsn.insn.
208 * 215 *
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
284} 291}
285 292
286/* 293/*
287 * Adjust the displacement if the instruction uses the %rip-relative 294 * Copy an instruction and adjust the displacement if the instruction
288 * addressing mode. 295 * uses the %rip-relative addressing mode.
289 * If it does, Return the address of the 32-bit displacement word. 296 * If it does, Return the address of the 32-bit displacement word.
290 * If not, return null. 297 * If not, return null.
291 * Only applicable to 64-bit x86. 298 * Only applicable to 64-bit x86.
292 */ 299 */
293static void __kprobes fix_riprel(struct kprobe *p) 300static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
294{ 301{
295#ifdef CONFIG_X86_64
296 struct insn insn; 302 struct insn insn;
297 kernel_insn_init(&insn, p->ainsn.insn); 303 int ret;
304 kprobe_opcode_t buf[MAX_INSN_SIZE];
298 305
306 kernel_insn_init(&insn, src);
307 if (recover) {
308 insn_get_opcode(&insn);
309 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
310 ret = recover_probed_instruction(buf,
311 (unsigned long)src);
312 if (ret)
313 return 0;
314 kernel_insn_init(&insn, buf);
315 }
316 }
317 insn_get_length(&insn);
318 memcpy(dest, insn.kaddr, insn.length);
319
320#ifdef CONFIG_X86_64
299 if (insn_rip_relative(&insn)) { 321 if (insn_rip_relative(&insn)) {
300 s64 newdisp; 322 s64 newdisp;
301 u8 *disp; 323 u8 *disp;
324 kernel_insn_init(&insn, dest);
302 insn_get_displacement(&insn); 325 insn_get_displacement(&insn);
303 /* 326 /*
304 * The copied instruction uses the %rip-relative addressing 327 * The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
312 * extension of the original signed 32-bit displacement would 335 * extension of the original signed 32-bit displacement would
313 * have given. 336 * have given.
314 */ 337 */
315 newdisp = (u8 *) p->addr + (s64) insn.displacement.value - 338 newdisp = (u8 *) src + (s64) insn.displacement.value -
316 (u8 *) p->ainsn.insn; 339 (u8 *) dest;
317 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 340 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
318 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); 341 disp = (u8 *) dest + insn_offset_displacement(&insn);
319 *(s32 *) disp = (s32) newdisp; 342 *(s32 *) disp = (s32) newdisp;
320 } 343 }
321#endif 344#endif
345 return insn.length;
322} 346}
323 347
324static void __kprobes arch_copy_kprobe(struct kprobe *p) 348static void __kprobes arch_copy_kprobe(struct kprobe *p)
325{ 349{
326 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 350 /*
327 351 * Copy an instruction without recovering int3, because it will be
328 fix_riprel(p); 352 * put by another subsystem.
353 */
354 __copy_instruction(p->ainsn.insn, p->addr, 0);
329 355
330 if (can_boost(p->addr)) 356 if (can_boost(p->addr))
331 p->ainsn.boostable = 0; 357 p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void)
406 update_debugctlmsr(current->thread.debugctlmsr); 432 update_debugctlmsr(current->thread.debugctlmsr);
407} 433}
408 434
409static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
410{
411 clear_btf();
412 regs->flags |= X86_EFLAGS_TF;
413 regs->flags &= ~X86_EFLAGS_IF;
414 /* single step inline if the instruction is an int3 */
415 if (p->opcode == BREAKPOINT_INSTRUCTION)
416 regs->ip = (unsigned long)p->addr;
417 else
418 regs->ip = (unsigned long)p->ainsn.insn;
419}
420
421void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
422 struct pt_regs *regs) 436 struct pt_regs *regs)
423{ 437{
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429 *sara = (unsigned long) &kretprobe_trampoline; 443 *sara = (unsigned long) &kretprobe_trampoline;
430} 444}
431 445
446#ifdef CONFIG_OPTPROBES
447static int __kprobes setup_detour_execution(struct kprobe *p,
448 struct pt_regs *regs,
449 int reenter);
450#else
451#define setup_detour_execution(p, regs, reenter) (0)
452#endif
453
432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 454static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
433 struct kprobe_ctlblk *kcb) 455 struct kprobe_ctlblk *kcb, int reenter)
434{ 456{
457 if (setup_detour_execution(p, regs, reenter))
458 return;
459
435#if !defined(CONFIG_PREEMPT) 460#if !defined(CONFIG_PREEMPT)
436 if (p->ainsn.boostable == 1 && !p->post_handler) { 461 if (p->ainsn.boostable == 1 && !p->post_handler) {
437 /* Boost up -- we can execute copied instructions directly */ 462 /* Boost up -- we can execute copied instructions directly */
438 reset_current_kprobe(); 463 if (!reenter)
464 reset_current_kprobe();
465 /*
466 * Reentering boosted probe doesn't reset current_kprobe,
467 * nor set current_kprobe, because it doesn't use single
468 * stepping.
469 */
439 regs->ip = (unsigned long)p->ainsn.insn; 470 regs->ip = (unsigned long)p->ainsn.insn;
440 preempt_enable_no_resched(); 471 preempt_enable_no_resched();
441 return; 472 return;
442 } 473 }
443#endif 474#endif
444 prepare_singlestep(p, regs); 475 if (reenter) {
445 kcb->kprobe_status = KPROBE_HIT_SS; 476 save_previous_kprobe(kcb);
477 set_current_kprobe(p, regs, kcb);
478 kcb->kprobe_status = KPROBE_REENTER;
479 } else
480 kcb->kprobe_status = KPROBE_HIT_SS;
481 /* Prepare real single stepping */
482 clear_btf();
483 regs->flags |= X86_EFLAGS_TF;
484 regs->flags &= ~X86_EFLAGS_IF;
485 /* single step inline if the instruction is an int3 */
486 if (p->opcode == BREAKPOINT_INSTRUCTION)
487 regs->ip = (unsigned long)p->addr;
488 else
489 regs->ip = (unsigned long)p->ainsn.insn;
446} 490}
447 491
448/* 492/*
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
456 switch (kcb->kprobe_status) { 500 switch (kcb->kprobe_status) {
457 case KPROBE_HIT_SSDONE: 501 case KPROBE_HIT_SSDONE:
458 case KPROBE_HIT_ACTIVE: 502 case KPROBE_HIT_ACTIVE:
459 save_previous_kprobe(kcb);
460 set_current_kprobe(p, regs, kcb);
461 kprobes_inc_nmissed_count(p); 503 kprobes_inc_nmissed_count(p);
462 prepare_singlestep(p, regs); 504 setup_singlestep(p, regs, kcb, 1);
463 kcb->kprobe_status = KPROBE_REENTER;
464 break; 505 break;
465 case KPROBE_HIT_SS: 506 case KPROBE_HIT_SS:
466 /* A probe has been hit in the codepath leading up to, or just 507 /* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
535 * more here. 576 * more here.
536 */ 577 */
537 if (!p->pre_handler || !p->pre_handler(p, regs)) 578 if (!p->pre_handler || !p->pre_handler(p, regs))
538 setup_singlestep(p, regs, kcb); 579 setup_singlestep(p, regs, kcb, 0);
539 return 1; 580 return 1;
540 } 581 }
541 } else if (kprobe_running()) { 582 } else if (kprobe_running()) {
542 p = __get_cpu_var(current_kprobe); 583 p = __get_cpu_var(current_kprobe);
543 if (p->break_handler && p->break_handler(p, regs)) { 584 if (p->break_handler && p->break_handler(p, regs)) {
544 setup_singlestep(p, regs, kcb); 585 setup_singlestep(p, regs, kcb, 0);
545 return 1; 586 return 1;
546 } 587 }
547 } /* else: not a kprobe fault; let the kernel handle it */ 588 } /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
550 return 0; 591 return 0;
551} 592}
552 593
594#ifdef CONFIG_X86_64
595#define SAVE_REGS_STRING \
596 /* Skip cs, ip, orig_ax. */ \
597 " subq $24, %rsp\n" \
598 " pushq %rdi\n" \
599 " pushq %rsi\n" \
600 " pushq %rdx\n" \
601 " pushq %rcx\n" \
602 " pushq %rax\n" \
603 " pushq %r8\n" \
604 " pushq %r9\n" \
605 " pushq %r10\n" \
606 " pushq %r11\n" \
607 " pushq %rbx\n" \
608 " pushq %rbp\n" \
609 " pushq %r12\n" \
610 " pushq %r13\n" \
611 " pushq %r14\n" \
612 " pushq %r15\n"
613#define RESTORE_REGS_STRING \
614 " popq %r15\n" \
615 " popq %r14\n" \
616 " popq %r13\n" \
617 " popq %r12\n" \
618 " popq %rbp\n" \
619 " popq %rbx\n" \
620 " popq %r11\n" \
621 " popq %r10\n" \
622 " popq %r9\n" \
623 " popq %r8\n" \
624 " popq %rax\n" \
625 " popq %rcx\n" \
626 " popq %rdx\n" \
627 " popq %rsi\n" \
628 " popq %rdi\n" \
629 /* Skip orig_ax, ip, cs */ \
630 " addq $24, %rsp\n"
631#else
632#define SAVE_REGS_STRING \
633 /* Skip cs, ip, orig_ax and gs. */ \
634 " subl $16, %esp\n" \
635 " pushl %fs\n" \
636 " pushl %ds\n" \
637 " pushl %es\n" \
638 " pushl %eax\n" \
639 " pushl %ebp\n" \
640 " pushl %edi\n" \
641 " pushl %esi\n" \
642 " pushl %edx\n" \
643 " pushl %ecx\n" \
644 " pushl %ebx\n"
645#define RESTORE_REGS_STRING \
646 " popl %ebx\n" \
647 " popl %ecx\n" \
648 " popl %edx\n" \
649 " popl %esi\n" \
650 " popl %edi\n" \
651 " popl %ebp\n" \
652 " popl %eax\n" \
653 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
654 " addl $24, %esp\n"
655#endif
656
553/* 657/*
554 * When a retprobed function returns, this code saves registers and 658 * When a retprobed function returns, this code saves registers and
555 * calls trampoline_handler() runs, which calls the kretprobe's handler. 659 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
563 /* We don't bother saving the ss register */ 667 /* We don't bother saving the ss register */
564 " pushq %rsp\n" 668 " pushq %rsp\n"
565 " pushfq\n" 669 " pushfq\n"
566 /* 670 SAVE_REGS_STRING
567 * Skip cs, ip, orig_ax.
568 * trampoline_handler() will plug in these values
569 */
570 " subq $24, %rsp\n"
571 " pushq %rdi\n"
572 " pushq %rsi\n"
573 " pushq %rdx\n"
574 " pushq %rcx\n"
575 " pushq %rax\n"
576 " pushq %r8\n"
577 " pushq %r9\n"
578 " pushq %r10\n"
579 " pushq %r11\n"
580 " pushq %rbx\n"
581 " pushq %rbp\n"
582 " pushq %r12\n"
583 " pushq %r13\n"
584 " pushq %r14\n"
585 " pushq %r15\n"
586 " movq %rsp, %rdi\n" 671 " movq %rsp, %rdi\n"
587 " call trampoline_handler\n" 672 " call trampoline_handler\n"
588 /* Replace saved sp with true return address. */ 673 /* Replace saved sp with true return address. */
589 " movq %rax, 152(%rsp)\n" 674 " movq %rax, 152(%rsp)\n"
590 " popq %r15\n" 675 RESTORE_REGS_STRING
591 " popq %r14\n"
592 " popq %r13\n"
593 " popq %r12\n"
594 " popq %rbp\n"
595 " popq %rbx\n"
596 " popq %r11\n"
597 " popq %r10\n"
598 " popq %r9\n"
599 " popq %r8\n"
600 " popq %rax\n"
601 " popq %rcx\n"
602 " popq %rdx\n"
603 " popq %rsi\n"
604 " popq %rdi\n"
605 /* Skip orig_ax, ip, cs */
606 " addq $24, %rsp\n"
607 " popfq\n" 676 " popfq\n"
608#else 677#else
609 " pushf\n" 678 " pushf\n"
610 /* 679 SAVE_REGS_STRING
611 * Skip cs, ip, orig_ax and gs.
612 * trampoline_handler() will plug in these values
613 */
614 " subl $16, %esp\n"
615 " pushl %fs\n"
616 " pushl %es\n"
617 " pushl %ds\n"
618 " pushl %eax\n"
619 " pushl %ebp\n"
620 " pushl %edi\n"
621 " pushl %esi\n"
622 " pushl %edx\n"
623 " pushl %ecx\n"
624 " pushl %ebx\n"
625 " movl %esp, %eax\n" 680 " movl %esp, %eax\n"
626 " call trampoline_handler\n" 681 " call trampoline_handler\n"
627 /* Move flags to cs */ 682 /* Move flags to cs */
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
629 " movl %edx, 52(%esp)\n" 684 " movl %edx, 52(%esp)\n"
630 /* Replace saved flags with true return address. */ 685 /* Replace saved flags with true return address. */
631 " movl %eax, 56(%esp)\n" 686 " movl %eax, 56(%esp)\n"
632 " popl %ebx\n" 687 RESTORE_REGS_STRING
633 " popl %ecx\n"
634 " popl %edx\n"
635 " popl %esi\n"
636 " popl %edi\n"
637 " popl %ebp\n"
638 " popl %eax\n"
639 /* Skip ds, es, fs, gs, orig_ax and ip */
640 " addl $24, %esp\n"
641 " popf\n" 688 " popf\n"
642#endif 689#endif
643 " ret\n"); 690 " ret\n");
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p,
805 * These instructions can be executed directly if it 852 * These instructions can be executed directly if it
806 * jumps back to correct address. 853 * jumps back to correct address.
807 */ 854 */
808 set_jmp_op((void *)regs->ip, 855 synthesize_reljump((void *)regs->ip,
809 (void *)orig_ip + (regs->ip - copy_ip)); 856 (void *)orig_ip + (regs->ip - copy_ip));
810 p->ainsn.boostable = 1; 857 p->ainsn.boostable = 1;
811 } else { 858 } else {
812 p->ainsn.boostable = -1; 859 p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1033 return 0; 1080 return 0;
1034} 1081}
1035 1082
1083
1084#ifdef CONFIG_OPTPROBES
1085
1086/* Insert a call instruction at address 'from', which calls address 'to'.*/
1087static void __kprobes synthesize_relcall(void *from, void *to)
1088{
1089 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1090}
1091
1092/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1093static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1094 unsigned long val)
1095{
1096#ifdef CONFIG_X86_64
1097 *addr++ = 0x48;
1098 *addr++ = 0xbf;
1099#else
1100 *addr++ = 0xb8;
1101#endif
1102 *(unsigned long *)addr = val;
1103}
1104
1105void __kprobes kprobes_optinsn_template_holder(void)
1106{
1107 asm volatile (
1108 ".global optprobe_template_entry\n"
1109 "optprobe_template_entry: \n"
1110#ifdef CONFIG_X86_64
1111 /* We don't bother saving the ss register */
1112 " pushq %rsp\n"
1113 " pushfq\n"
1114 SAVE_REGS_STRING
1115 " movq %rsp, %rsi\n"
1116 ".global optprobe_template_val\n"
1117 "optprobe_template_val: \n"
1118 ASM_NOP5
1119 ASM_NOP5
1120 ".global optprobe_template_call\n"
1121 "optprobe_template_call: \n"
1122 ASM_NOP5
1123 /* Move flags to rsp */
1124 " movq 144(%rsp), %rdx\n"
1125 " movq %rdx, 152(%rsp)\n"
1126 RESTORE_REGS_STRING
1127 /* Skip flags entry */
1128 " addq $8, %rsp\n"
1129 " popfq\n"
1130#else /* CONFIG_X86_32 */
1131 " pushf\n"
1132 SAVE_REGS_STRING
1133 " movl %esp, %edx\n"
1134 ".global optprobe_template_val\n"
1135 "optprobe_template_val: \n"
1136 ASM_NOP5
1137 ".global optprobe_template_call\n"
1138 "optprobe_template_call: \n"
1139 ASM_NOP5
1140 RESTORE_REGS_STRING
1141 " addl $4, %esp\n" /* skip cs */
1142 " popf\n"
1143#endif
1144 ".global optprobe_template_end\n"
1145 "optprobe_template_end: \n");
1146}
1147
1148#define TMPL_MOVE_IDX \
1149 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1150#define TMPL_CALL_IDX \
1151 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1152#define TMPL_END_IDX \
1153 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1154
1155#define INT3_SIZE sizeof(kprobe_opcode_t)
1156
1157/* Optimized kprobe call back function: called from optinsn */
1158static void __kprobes optimized_callback(struct optimized_kprobe *op,
1159 struct pt_regs *regs)
1160{
1161 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1162
1163 preempt_disable();
1164 if (kprobe_running()) {
1165 kprobes_inc_nmissed_count(&op->kp);
1166 } else {
1167 /* Save skipped registers */
1168#ifdef CONFIG_X86_64
1169 regs->cs = __KERNEL_CS;
1170#else
1171 regs->cs = __KERNEL_CS | get_kernel_rpl();
1172 regs->gs = 0;
1173#endif
1174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1175 regs->orig_ax = ~0UL;
1176
1177 __get_cpu_var(current_kprobe) = &op->kp;
1178 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1179 opt_pre_handler(&op->kp, regs);
1180 __get_cpu_var(current_kprobe) = NULL;
1181 }
1182 preempt_enable_no_resched();
1183}
1184
1185static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1186{
1187 int len = 0, ret;
1188
1189 while (len < RELATIVEJUMP_SIZE) {
1190 ret = __copy_instruction(dest + len, src + len, 1);
1191 if (!ret || !can_boost(dest + len))
1192 return -EINVAL;
1193 len += ret;
1194 }
1195 /* Check whether the address range is reserved */
1196 if (ftrace_text_reserved(src, src + len - 1) ||
1197 alternatives_text_reserved(src, src + len - 1))
1198 return -EBUSY;
1199
1200 return len;
1201}
1202
1203/* Check whether insn is indirect jump */
1204static int __kprobes insn_is_indirect_jump(struct insn *insn)
1205{
1206 return ((insn->opcode.bytes[0] == 0xff &&
1207 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1208 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1209}
1210
1211/* Check whether insn jumps into specified address range */
1212static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1213{
1214 unsigned long target = 0;
1215
1216 switch (insn->opcode.bytes[0]) {
1217 case 0xe0: /* loopne */
1218 case 0xe1: /* loope */
1219 case 0xe2: /* loop */
1220 case 0xe3: /* jcxz */
1221 case 0xe9: /* near relative jump */
1222 case 0xeb: /* short relative jump */
1223 break;
1224 case 0x0f:
1225 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1226 break;
1227 return 0;
1228 default:
1229 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1230 break;
1231 return 0;
1232 }
1233 target = (unsigned long)insn->next_byte + insn->immediate.value;
1234
1235 return (start <= target && target <= start + len);
1236}
1237
1238/* Decode whole function to ensure any instructions don't jump into target */
1239static int __kprobes can_optimize(unsigned long paddr)
1240{
1241 int ret;
1242 unsigned long addr, size = 0, offset = 0;
1243 struct insn insn;
1244 kprobe_opcode_t buf[MAX_INSN_SIZE];
1245 /* Dummy buffers for lookup_symbol_attrs */
1246 static char __dummy_buf[KSYM_NAME_LEN];
1247
1248 /* Lookup symbol including addr */
1249 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
1250 return 0;
1251
1252 /* Check there is enough space for a relative jump. */
1253 if (size - offset < RELATIVEJUMP_SIZE)
1254 return 0;
1255
1256 /* Decode instructions */
1257 addr = paddr - offset;
1258 while (addr < paddr - offset + size) { /* Decode until function end */
1259 if (search_exception_tables(addr))
1260 /*
1261 * Since some fixup code will jumps into this function,
1262 * we can't optimize kprobe in this function.
1263 */
1264 return 0;
1265 kernel_insn_init(&insn, (void *)addr);
1266 insn_get_opcode(&insn);
1267 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1268 ret = recover_probed_instruction(buf, addr);
1269 if (ret)
1270 return 0;
1271 kernel_insn_init(&insn, buf);
1272 }
1273 insn_get_length(&insn);
1274 /* Recover address */
1275 insn.kaddr = (void *)addr;
1276 insn.next_byte = (void *)(addr + insn.length);
1277 /* Check any instructions don't jump into target */
1278 if (insn_is_indirect_jump(&insn) ||
1279 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1280 RELATIVE_ADDR_SIZE))
1281 return 0;
1282 addr += insn.length;
1283 }
1284
1285 return 1;
1286}
1287
1288/* Check optimized_kprobe can actually be optimized. */
1289int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1290{
1291 int i;
1292 struct kprobe *p;
1293
1294 for (i = 1; i < op->optinsn.size; i++) {
1295 p = get_kprobe(op->kp.addr + i);
1296 if (p && !kprobe_disabled(p))
1297 return -EEXIST;
1298 }
1299
1300 return 0;
1301}
1302
1303/* Check the addr is within the optimized instructions. */
1304int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1305 unsigned long addr)
1306{
1307 return ((unsigned long)op->kp.addr <= addr &&
1308 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1309}
1310
1311/* Free optimized instruction slot */
1312static __kprobes
1313void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1314{
1315 if (op->optinsn.insn) {
1316 free_optinsn_slot(op->optinsn.insn, dirty);
1317 op->optinsn.insn = NULL;
1318 op->optinsn.size = 0;
1319 }
1320}
1321
1322void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1323{
1324 __arch_remove_optimized_kprobe(op, 1);
1325}
1326
1327/*
1328 * Copy replacing target instructions
1329 * Target instructions MUST be relocatable (checked inside)
1330 */
1331int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1332{
1333 u8 *buf;
1334 int ret;
1335 long rel;
1336
1337 if (!can_optimize((unsigned long)op->kp.addr))
1338 return -EILSEQ;
1339
1340 op->optinsn.insn = get_optinsn_slot();
1341 if (!op->optinsn.insn)
1342 return -ENOMEM;
1343
1344 /*
1345 * Verify if the address gap is in 2GB range, because this uses
1346 * a relative jump.
1347 */
1348 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1349 if (abs(rel) > 0x7fffffff)
1350 return -ERANGE;
1351
1352 buf = (u8 *)op->optinsn.insn;
1353
1354 /* Copy instructions into the out-of-line buffer */
1355 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1356 if (ret < 0) {
1357 __arch_remove_optimized_kprobe(op, 0);
1358 return ret;
1359 }
1360 op->optinsn.size = ret;
1361
1362 /* Copy arch-dep-instance from template */
1363 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1364
1365 /* Set probe information */
1366 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1367
1368 /* Set probe function call */
1369 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1370
1371 /* Set returning jmp instruction at the tail of out-of-line buffer */
1372 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1373 (u8 *)op->kp.addr + op->optinsn.size);
1374
1375 flush_icache_range((unsigned long) buf,
1376 (unsigned long) buf + TMPL_END_IDX +
1377 op->optinsn.size + RELATIVEJUMP_SIZE);
1378 return 0;
1379}
1380
1381/* Replace a breakpoint (int3) with a relative jump. */
1382int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1383{
1384 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1385 s32 rel = (s32)((long)op->optinsn.insn -
1386 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1387
1388 /* Backup instructions which will be replaced by jump address */
1389 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1390 RELATIVE_ADDR_SIZE);
1391
1392 jmp_code[0] = RELATIVEJUMP_OPCODE;
1393 *(s32 *)(&jmp_code[1]) = rel;
1394
1395 /*
1396 * text_poke_smp doesn't support NMI/MCE code modifying.
1397 * However, since kprobes itself also doesn't support NMI/MCE
1398 * code probing, it's not a problem.
1399 */
1400 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
1401 return 0;
1402}
1403
1404/* Replace a relative jump with a breakpoint (int3). */
1405void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1406{
1407 u8 buf[RELATIVEJUMP_SIZE];
1408
1409 /* Set int3 to first byte for kprobes */
1410 buf[0] = BREAKPOINT_INSTRUCTION;
1411 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1412 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1413}
1414
1415static int __kprobes setup_detour_execution(struct kprobe *p,
1416 struct pt_regs *regs,
1417 int reenter)
1418{
1419 struct optimized_kprobe *op;
1420
1421 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1422 /* This kprobe is really able to run optimized path. */
1423 op = container_of(p, struct optimized_kprobe, kp);
1424 /* Detour through copied instructions */
1425 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1426 if (!reenter)
1427 reset_current_kprobe();
1428 preempt_enable_no_resched();
1429 return 1;
1430 }
1431 return 0;
1432}
1433#endif
1434
1036int __init arch_init_kprobes(void) 1435int __init arch_init_kprobes(void)
1037{ 1436{
1038 return 0; 1437 return 0;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index ebd193e476ca..85a343e28937 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -328,7 +328,7 @@ static int apply_microcode(int cpu)
328 cpu_num, mc_intel->hdr.rev); 328 cpu_num, mc_intel->hdr.rev);
329 return -1; 329 return -1;
330 } 330 }
331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", 331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
332 cpu_num, val[1], 332 cpu_num, val[1],
333 mc_intel->hdr.date & 0xffff, 333 mc_intel->hdr.date & 0xffff,
334 mc_intel->hdr.date >> 24, 334 mc_intel->hdr.date >> 24,
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/dmi.h> 9#include <linux/dmi.h>
10#include <linux/range.h>
11
10#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
11#include <linux/sort.h> 13#include <linux/sort.h>
12#include <asm/io.h> 14#include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
30 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, 32 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
31}; 33};
32 34
33struct range {
34 u64 start;
35 u64 end;
36};
37
38static int __cpuinit cmp_range(const void *x1, const void *x2) 35static int __cpuinit cmp_range(const void *x1, const void *x2)
39{ 36{
40 const struct range *r1 = x1; 37 const struct range *r1 = x1;
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 3b7078abc871..0aad8670858e 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -10,8 +10,211 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
13 17
14#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
29static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
30int sfi_mtimer_num;
31
32struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
33EXPORT_SYMBOL_GPL(sfi_mrtc_array);
34int sfi_mrtc_num;
35
36static inline void assign_to_mp_irq(struct mpc_intsrc *m,
37 struct mpc_intsrc *mp_irq)
38{
39 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
40}
41
42static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
43 struct mpc_intsrc *m)
44{
45 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
46}
47
48static void save_mp_irq(struct mpc_intsrc *m)
49{
50 int i;
51
52 for (i = 0; i < mp_irq_entries; i++) {
53 if (!mp_irq_cmp(&mp_irqs[i], m))
54 return;
55 }
56
57 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
58 if (++mp_irq_entries == MAX_IRQ_SOURCES)
59 panic("Max # of irq sources exceeded!!\n");
60}
61
62/* parse all the mtimer info to a static mtimer array */
63static int __init sfi_parse_mtmr(struct sfi_table_header *table)
64{
65 struct sfi_table_simple *sb;
66 struct sfi_timer_table_entry *pentry;
67 struct mpc_intsrc mp_irq;
68 int totallen;
69
70 sb = (struct sfi_table_simple *)table;
71 if (!sfi_mtimer_num) {
72 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
73 struct sfi_timer_table_entry);
74 pentry = (struct sfi_timer_table_entry *) sb->pentry;
75 totallen = sfi_mtimer_num * sizeof(*pentry);
76 memcpy(sfi_mtimer_array, pentry, totallen);
77 }
78
79 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
80 pentry = sfi_mtimer_array;
81 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
82 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
83 " irq = %d\n", totallen, (u32)pentry->phys_addr,
84 pentry->freq_hz, pentry->irq);
85 if (!pentry->irq)
86 continue;
87 mp_irq.type = MP_IOAPIC;
88 mp_irq.irqtype = mp_INT;
89/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
90 mp_irq.irqflag = 5;
91 mp_irq.srcbus = 0;
92 mp_irq.srcbusirq = pentry->irq; /* IRQ */
93 mp_irq.dstapic = MP_APIC_ALL;
94 mp_irq.dstirq = pentry->irq;
95 save_mp_irq(&mp_irq);
96 }
97
98 return 0;
99}
100
101struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
102{
103 int i;
104 if (hint < sfi_mtimer_num) {
105 if (!sfi_mtimer_usage[hint]) {
106 pr_debug("hint taken for timer %d irq %d\n",\
107 hint, sfi_mtimer_array[hint].irq);
108 sfi_mtimer_usage[hint] = 1;
109 return &sfi_mtimer_array[hint];
110 }
111 }
112 /* take the first timer available */
113 for (i = 0; i < sfi_mtimer_num;) {
114 if (!sfi_mtimer_usage[i]) {
115 sfi_mtimer_usage[i] = 1;
116 return &sfi_mtimer_array[i];
117 }
118 i++;
119 }
120 return NULL;
121}
122
123void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
124{
125 int i;
126 for (i = 0; i < sfi_mtimer_num;) {
127 if (mtmr->irq == sfi_mtimer_array[i].irq) {
128 sfi_mtimer_usage[i] = 0;
129 return;
130 }
131 i++;
132 }
133}
134
135/* parse all the mrtc info to a global mrtc array */
136int __init sfi_parse_mrtc(struct sfi_table_header *table)
137{
138 struct sfi_table_simple *sb;
139 struct sfi_rtc_table_entry *pentry;
140 struct mpc_intsrc mp_irq;
141
142 int totallen;
143
144 sb = (struct sfi_table_simple *)table;
145 if (!sfi_mrtc_num) {
146 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
147 struct sfi_rtc_table_entry);
148 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
149 totallen = sfi_mrtc_num * sizeof(*pentry);
150 memcpy(sfi_mrtc_array, pentry, totallen);
151 }
152
153 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
154 pentry = sfi_mrtc_array;
155 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
156 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
157 totallen, (u32)pentry->phys_addr, pentry->irq);
158 mp_irq.type = MP_IOAPIC;
159 mp_irq.irqtype = mp_INT;
160 mp_irq.irqflag = 0;
161 mp_irq.srcbus = 0;
162 mp_irq.srcbusirq = pentry->irq; /* IRQ */
163 mp_irq.dstapic = MP_APIC_ALL;
164 mp_irq.dstirq = pentry->irq;
165 save_mp_irq(&mp_irq);
166 }
167 return 0;
168}
169
170/*
171 * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
172 * APBT but cmdline option can also override it.
173 */
174static void __cpuinit mrst_setup_secondary_clock(void)
175{
176 /* restore default lapic clock if disabled by cmdline */
177 if (disable_apbt_percpu)
178 return setup_secondary_APIC_clock();
179 apbt_setup_secondary_clock();
180}
181
182static unsigned long __init mrst_calibrate_tsc(void)
183{
184 unsigned long flags, fast_calibrate;
185
186 local_irq_save(flags);
187 fast_calibrate = apbt_quick_calibrate();
188 local_irq_restore(flags);
189
190 if (fast_calibrate)
191 return fast_calibrate;
192
193 return 0;
194}
195
196void __init mrst_time_init(void)
197{
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
199 pre_init_apic_IRQ0();
200 apbt_time_init();
201}
202
203void __init mrst_rtc_init(void)
204{
205 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
206}
207
208/*
209 * if we use per cpu apb timer, the bootclock already setup. if we use lapic
210 * timer and one apbt timer for broadcast, we need to set up lapic boot clock.
211 */
212static void __init mrst_setup_boot_clock(void)
213{
214 pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu);
215 if (disable_apbt_percpu)
216 setup_boot_APIC_clock();
217};
15 218
16/* 219/*
17 * Moorestown specific x86_init function overrides and early setup 220 * Moorestown specific x86_init function overrides and early setup
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void)
21{ 224{
22 x86_init.resources.probe_roms = x86_init_noop; 225 x86_init.resources.probe_roms = x86_init_noop;
23 x86_init.resources.reserve_resources = x86_init_noop; 226 x86_init.resources.reserve_resources = x86_init_noop;
227
228 x86_init.timers.timer_init = mrst_time_init;
229 x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock;
230
231 x86_init.irqs.pre_vector_init = x86_init_noop;
232
233 x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
234
235 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
236 x86_init.pci.init = pci_mrst_init;
237 x86_init.pci.fixup_irqs = x86_init_noop;
238
239 legacy_pic = &null_legacy_pic;
24} 240}
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 9d1d263f786f..8297160c41b3 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,7 +17,9 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20
20#include <asm/geode.h> 21#include <asm/geode.h>
22#include <asm/setup.h>
21#include <asm/olpc.h> 23#include <asm/olpc.h>
22 24
23#ifdef CONFIG_OPEN_FIRMWARE 25#ifdef CONFIG_OPEN_FIRMWARE
@@ -243,9 +245,11 @@ static int __init olpc_init(void)
243 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, 245 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
244 (unsigned char *) &olpc_platform_info.ecver, 1); 246 (unsigned char *) &olpc_platform_info.ecver, 1);
245 247
246 /* check to see if the VSA exists */ 248#ifdef CONFIG_PCI_OLPC
247 if (cs5535_has_vsa2()) 249 /* If the VSA exists let it emulate PCI, if not emulate in kernel */
248 olpc_platform_info.flags |= OLPC_F_VSA; 250 if (!cs5535_has_vsa2())
251 x86_init.pci.arch_init = pci_olpc_init;
252#endif
249 253
250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", 254 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
251 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", 255 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b1739d16310..1db183ed7c01 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = {
428 .ptep_modify_prot_start = __ptep_modify_prot_start, 428 .ptep_modify_prot_start = __ptep_modify_prot_start,
429 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 429 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
430 430
431#ifdef CONFIG_HIGHPTE
432 .kmap_atomic_pte = kmap_atomic,
433#endif
434
435#if PAGETABLE_LEVELS >= 3 431#if PAGETABLE_LEVELS >= 3
436#ifdef CONFIG_X86_PAE 432#ifdef CONFIG_X86_PAE
437 .set_pte_atomic = native_set_pte_atomic, 433 .set_pte_atomic = native_set_pte_atomic,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 75e14e21f61a..1aa966c565f9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask)
65} 65}
66EXPORT_SYMBOL(dma_set_mask); 66EXPORT_SYMBOL(dma_set_mask);
67 67
68#ifdef CONFIG_X86_64 68#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA)
69static __initdata void *dma32_bootmem_ptr; 69static __initdata void *dma32_bootmem_ptr;
70static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); 70static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
71 71
@@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void)
116 dma32_bootmem_ptr = NULL; 116 dma32_bootmem_ptr = NULL;
117 dma32_bootmem_size = 0; 117 dma32_bootmem_size = 0;
118} 118}
119#else
120void __init dma32_reserve_bootmem(void)
121{
122}
123static void __init dma32_free_bootmem(void)
124{
125}
126
119#endif 127#endif
120 128
121void __init pci_iommu_alloc(void) 129void __init pci_iommu_alloc(void)
122{ 130{
123#ifdef CONFIG_X86_64
124 /* free the range so iommu could get some range less than 4G */ 131 /* free the range so iommu could get some range less than 4G */
125 dma32_free_bootmem(); 132 dma32_free_bootmem();
126#endif 133
127 if (pci_swiotlb_detect()) 134 if (pci_swiotlb_detect())
128 goto out; 135 goto out;
129 136
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c9b3522b6b46..02d678065d7d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -92,6 +92,13 @@ void exit_thread(void)
92 } 92 }
93} 93}
94 94
95void show_regs(struct pt_regs *regs)
96{
97 show_registers(regs);
98 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
99 regs->bp);
100}
101
95void show_regs_common(void) 102void show_regs_common(void)
96{ 103{
97 const char *board, *product; 104 const char *board, *product;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 37ad1e046aae..f6c62667e30c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -174,12 +174,6 @@ void __show_regs(struct pt_regs *regs, int all)
174 d6, d7); 174 d6, d7);
175} 175}
176 176
177void show_regs(struct pt_regs *regs)
178{
179 show_registers(regs);
180 show_trace(NULL, regs, &regs->sp, regs->bp);
181}
182
183void release_thread(struct task_struct *dead_task) 177void release_thread(struct task_struct *dead_task)
184{ 178{
185 BUG_ON(dead_task->mm); 179 BUG_ON(dead_task->mm);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 126f0b493d04..dc9690b4c4cc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -211,12 +211,6 @@ void __show_regs(struct pt_regs *regs, int all)
211 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 211 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
212} 212}
213 213
214void show_regs(struct pt_regs *regs)
215{
216 show_registers(regs);
217 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
218}
219
220void release_thread(struct task_struct *dead_task) 214void release_thread(struct task_struct *dead_task)
221{ 215{
222 if (dead_task->mm) { 216 if (dead_task->mm) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d03146f71b2f..2d96aab82a48 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -48,6 +48,7 @@ enum x86_regset {
48 REGSET_FP, 48 REGSET_FP,
49 REGSET_XFP, 49 REGSET_XFP,
50 REGSET_IOPERM64 = REGSET_XFP, 50 REGSET_IOPERM64 = REGSET_XFP,
51 REGSET_XSTATE,
51 REGSET_TLS, 52 REGSET_TLS,
52 REGSET_IOPERM32, 53 REGSET_IOPERM32,
53}; 54};
@@ -1563,7 +1564,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1563 1564
1564#ifdef CONFIG_X86_64 1565#ifdef CONFIG_X86_64
1565 1566
1566static const struct user_regset x86_64_regsets[] = { 1567static struct user_regset x86_64_regsets[] __read_mostly = {
1567 [REGSET_GENERAL] = { 1568 [REGSET_GENERAL] = {
1568 .core_note_type = NT_PRSTATUS, 1569 .core_note_type = NT_PRSTATUS,
1569 .n = sizeof(struct user_regs_struct) / sizeof(long), 1570 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1576,6 +1577,12 @@ static const struct user_regset x86_64_regsets[] = {
1576 .size = sizeof(long), .align = sizeof(long), 1577 .size = sizeof(long), .align = sizeof(long),
1577 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1578 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1578 }, 1579 },
1580 [REGSET_XSTATE] = {
1581 .core_note_type = NT_X86_XSTATE,
1582 .size = sizeof(u64), .align = sizeof(u64),
1583 .active = xstateregs_active, .get = xstateregs_get,
1584 .set = xstateregs_set
1585 },
1579 [REGSET_IOPERM64] = { 1586 [REGSET_IOPERM64] = {
1580 .core_note_type = NT_386_IOPERM, 1587 .core_note_type = NT_386_IOPERM,
1581 .n = IO_BITMAP_LONGS, 1588 .n = IO_BITMAP_LONGS,
@@ -1601,7 +1608,7 @@ static const struct user_regset_view user_x86_64_view = {
1601#endif /* CONFIG_X86_64 */ 1608#endif /* CONFIG_X86_64 */
1602 1609
1603#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1610#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1604static const struct user_regset x86_32_regsets[] = { 1611static struct user_regset x86_32_regsets[] __read_mostly = {
1605 [REGSET_GENERAL] = { 1612 [REGSET_GENERAL] = {
1606 .core_note_type = NT_PRSTATUS, 1613 .core_note_type = NT_PRSTATUS,
1607 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1614 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1620,6 +1627,12 @@ static const struct user_regset x86_32_regsets[] = {
1620 .size = sizeof(u32), .align = sizeof(u32), 1627 .size = sizeof(u32), .align = sizeof(u32),
1621 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1628 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1622 }, 1629 },
1630 [REGSET_XSTATE] = {
1631 .core_note_type = NT_X86_XSTATE,
1632 .size = sizeof(u64), .align = sizeof(u64),
1633 .active = xstateregs_active, .get = xstateregs_get,
1634 .set = xstateregs_set
1635 },
1623 [REGSET_TLS] = { 1636 [REGSET_TLS] = {
1624 .core_note_type = NT_386_TLS, 1637 .core_note_type = NT_386_TLS,
1625 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, 1638 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
@@ -1642,6 +1655,23 @@ static const struct user_regset_view user_x86_32_view = {
1642}; 1655};
1643#endif 1656#endif
1644 1657
1658/*
1659 * This represents bytes 464..511 in the memory layout exported through
1660 * the REGSET_XSTATE interface.
1661 */
1662u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1663
1664void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1665{
1666#ifdef CONFIG_X86_64
1667 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1668#endif
1669#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1670 x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1671#endif
1672 xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
1673}
1674
1645const struct user_regset_view *task_user_regset_view(struct task_struct *task) 1675const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1646{ 1676{
1647#ifdef CONFIG_IA32_EMULATION 1677#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 704bddcdf64d..8e1aac86b50c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), 461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
462 }, 462 },
463 }, 463 },
464 { /* Handle problems with rebooting on the iMac9,1. */
465 .callback = set_pci_reboot,
466 .ident = "Apple iMac9,1",
467 .matches = {
468 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
469 DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
470 },
471 },
464 { } 472 { }
465}; 473};
466 474
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5d9e40c58628..5d7ba1a449bd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -121,7 +121,9 @@
121unsigned long max_low_pfn_mapped; 121unsigned long max_low_pfn_mapped;
122unsigned long max_pfn_mapped; 122unsigned long max_pfn_mapped;
123 123
124#ifdef CONFIG_DMI
124RESERVE_BRK(dmi_alloc, 65536); 125RESERVE_BRK(dmi_alloc, 65536);
126#endif
125 127
126unsigned int boot_cpu_id __read_mostly; 128unsigned int boot_cpu_id __read_mostly;
127 129
@@ -667,6 +669,23 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
667 {} 669 {}
668}; 670};
669 671
672static void __init trim_bios_range(void)
673{
674 /*
675 * A special case is the first 4Kb of memory;
676 * This is a BIOS owned area, not kernel ram, but generally
677 * not listed as such in the E820 table.
678 */
679 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
680 /*
681 * special case: Some BIOSen report the PC BIOS
682 * area (640->1Mb) as ram even though it is not.
683 * take them out.
684 */
685 e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
686 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
687}
688
670/* 689/*
671 * Determine if we were loaded by an EFI loader. If so, then we have also been 690 * Determine if we were loaded by an EFI loader. If so, then we have also been
672 * passed the efi memmap, systab, etc., so we should use these data structures 691 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -830,7 +849,7 @@ void __init setup_arch(char **cmdline_p)
830 insert_resource(&iomem_resource, &data_resource); 849 insert_resource(&iomem_resource, &data_resource);
831 insert_resource(&iomem_resource, &bss_resource); 850 insert_resource(&iomem_resource, &bss_resource);
832 851
833 852 trim_bios_range();
834#ifdef CONFIG_X86_32 853#ifdef CONFIG_X86_32
835 if (ppro_with_ram_bug()) { 854 if (ppro_with_ram_bug()) {
836 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, 855 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
@@ -950,15 +969,11 @@ void __init setup_arch(char **cmdline_p)
950#endif 969#endif
951 970
952 initmem_init(0, max_pfn, acpi, k8); 971 initmem_init(0, max_pfn, acpi, k8);
972#ifndef CONFIG_NO_BOOTMEM
973 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
974#endif
953 975
954#ifdef CONFIG_X86_64
955 /*
956 * dma32_reserve_bootmem() allocates bootmem which may conflict
957 * with the crashkernel command line, so do that after
958 * reserve_crashkernel()
959 */
960 dma32_reserve_bootmem(); 976 dma32_reserve_bootmem();
961#endif
962 977
963 reserve_ibft_region(); 978 reserve_ibft_region();
964 979
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
137 137
138static void __init pcpu_fc_free(void *ptr, size_t size) 138static void __init pcpu_fc_free(void *ptr, size_t size)
139{ 139{
140#ifdef CONFIG_NO_BOOTMEM
141 u64 start = __pa(ptr);
142 u64 end = start + size;
143 free_early_partial(start, end);
144#else
140 free_bootmem(__pa(ptr), size); 145 free_bootmem(__pa(ptr), size);
146#endif
141} 147}
142 148
143static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 149static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b4e870cbdc60..a02e80c3c54b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -48,6 +48,7 @@
48#include <linux/err.h> 48#include <linux/err.h>
49#include <linux/nmi.h> 49#include <linux/nmi.h>
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h>
51 52
52#include <asm/acpi.h> 53#include <asm/acpi.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
@@ -67,6 +68,7 @@
67#include <linux/mc146818rtc.h> 68#include <linux/mc146818rtc.h>
68 69
69#include <asm/smpboot_hooks.h> 70#include <asm/smpboot_hooks.h>
71#include <asm/i8259.h>
70 72
71#ifdef CONFIG_X86_32 73#ifdef CONFIG_X86_32
72u8 apicid_2_node[MAX_APICID]; 74u8 apicid_2_node[MAX_APICID];
@@ -241,6 +243,11 @@ static void __cpuinit smp_callin(void)
241 map_cpu_to_logical_apicid(); 243 map_cpu_to_logical_apicid();
242 244
243 notify_cpu_starting(cpuid); 245 notify_cpu_starting(cpuid);
246
247 /*
248 * Need to setup vector mappings before we enable interrupts.
249 */
250 __setup_vector_irq(smp_processor_id());
244 /* 251 /*
245 * Get our bogomips. 252 * Get our bogomips.
246 * 253 *
@@ -286,9 +293,9 @@ notrace static void __cpuinit start_secondary(void *unused)
286 check_tsc_sync_target(); 293 check_tsc_sync_target();
287 294
288 if (nmi_watchdog == NMI_IO_APIC) { 295 if (nmi_watchdog == NMI_IO_APIC) {
289 disable_8259A_irq(0); 296 legacy_pic->chip->mask(0);
290 enable_NMI_through_LVT0(); 297 enable_NMI_through_LVT0();
291 enable_8259A_irq(0); 298 legacy_pic->chip->unmask(0);
292 } 299 }
293 300
294#ifdef CONFIG_X86_32 301#ifdef CONFIG_X86_32
@@ -315,15 +322,18 @@ notrace static void __cpuinit start_secondary(void *unused)
315 */ 322 */
316 ipi_call_lock(); 323 ipi_call_lock();
317 lock_vector_lock(); 324 lock_vector_lock();
318 __setup_vector_irq(smp_processor_id());
319 set_cpu_online(smp_processor_id(), true); 325 set_cpu_online(smp_processor_id(), true);
320 unlock_vector_lock(); 326 unlock_vector_lock();
321 ipi_call_unlock(); 327 ipi_call_unlock();
322 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 328 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
329 x86_platform.nmi_init();
323 330
324 /* enable local interrupts */ 331 /* enable local interrupts */
325 local_irq_enable(); 332 local_irq_enable();
326 333
334 /* to prevent fake stack check failure in clock setup */
335 boot_init_stack_canary();
336
327 x86_cpuinit.setup_percpu_clockev(); 337 x86_cpuinit.setup_percpu_clockev();
328 338
329 wmb(); 339 wmb();
@@ -1211,11 +1221,12 @@ __init void prefill_possible_map(void)
1211 1221
1212 total_cpus = max_t(int, possible, num_processors + disabled_cpus); 1222 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1213 1223
1214 if (possible > CONFIG_NR_CPUS) { 1224 /* nr_cpu_ids could be reduced via nr_cpus= */
1225 if (possible > nr_cpu_ids) {
1215 printk(KERN_WARNING 1226 printk(KERN_WARNING
1216 "%d Processors exceeds NR_CPUS limit of %d\n", 1227 "%d Processors exceeds NR_CPUS limit of %d\n",
1217 possible, CONFIG_NR_CPUS); 1228 possible, nr_cpu_ids);
1218 possible = CONFIG_NR_CPUS; 1229 possible = nr_cpu_ids;
1219 } 1230 }
1220 1231
1221 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1232 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be2573448ed9..fb5cc5e14cfa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
70 * manually to deassert NMI lines for the watchdog if run 70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system. 71 * on an 82489DX-based system.
72 */ 72 */
73 spin_lock(&i8259A_lock); 73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3); 74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */ 75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL); 76 inb(PIC_MASTER_POLL);
77 spin_unlock(&i8259A_lock); 77 raw_spin_unlock(&i8259A_lock);
78 } 78 }
79 79
80 global_clock_event->event_handler(global_clock_event); 80 global_clock_event->event_handler(global_clock_event);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 597683aa5ba0..208a857c679f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
740} 740}
741#endif 741#endif
742 742
743static void resume_tsc(void) 743static void resume_tsc(struct clocksource *cs)
744{ 744{
745 clocksource_tsc.cycle_last = 0; 745 clocksource_tsc.cycle_last = 0;
746} 746}
@@ -806,7 +806,7 @@ static void __init check_system_tsc_reliable(void)
806 unsigned long res_low, res_high; 806 unsigned long res_low, res_high;
807 807
808 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 808 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
809 /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ 809 /* Geode_LX - the OLPC CPU has a very reliable TSC */
810 if (res_low & RTSC_SUSP) 810 if (res_low & RTSC_SUSP)
811 tsc_clocksource_reliable = 1; 811 tsc_clocksource_reliable = 1;
812#endif 812#endif
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 36afb98675a4..309c70fb7759 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -54,19 +54,19 @@ static int __init sgi_uv_sysfs_init(void)
54 if (!sgi_uv_kobj) 54 if (!sgi_uv_kobj)
55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); 55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
56 if (!sgi_uv_kobj) { 56 if (!sgi_uv_kobj) {
57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); 57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
58 return -EINVAL; 58 return -EINVAL;
59 } 59 }
60 60
61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); 61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
62 if (ret) { 62 if (ret) {
63 printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); 63 printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
64 return ret; 64 return ret;
65 } 65 }
66 66
67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); 67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
68 if (ret) { 68 if (ret) {
69 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); 69 printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
70 return ret; 70 return ret;
71 } 71 }
72 72
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 34a279a7471d..e680ea52db9b 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -49,11 +49,6 @@ extern int no_broadcast;
49char visws_board_type = -1; 49char visws_board_type = -1;
50char visws_board_rev = -1; 50char visws_board_rev = -1;
51 51
52int is_visws_box(void)
53{
54 return visws_board_type >= 0;
55}
56
57static void __init visws_time_init(void) 52static void __init visws_time_init(void)
58{ 53{
59 printk(KERN_INFO "Starting Cobalt Timer system clock\n"); 54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -242,6 +237,8 @@ void __init visws_early_detect(void)
242 x86_init.irqs.pre_vector_init = visws_pre_intr_init; 237 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
243 x86_init.irqs.trap_init = visws_trap_init; 238 x86_init.irqs.trap_init = visws_trap_init;
244 x86_init.timers.timer_init = visws_time_init; 239 x86_init.timers.timer_init = visws_time_init;
240 x86_init.pci.init = pci_visws_init;
241 x86_init.pci.init_irq = x86_init_noop;
245 242
246 /* 243 /*
247 * Install reboot quirks: 244 * Install reboot quirks:
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = {
508 */ 505 */
509static unsigned int startup_piix4_master_irq(unsigned int irq) 506static unsigned int startup_piix4_master_irq(unsigned int irq)
510{ 507{
511 init_8259A(0); 508 legacy_pic->init(0);
512 509
513 return startup_cobalt_irq(irq); 510 return startup_cobalt_irq(irq);
514} 511}
@@ -532,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = {
532 529
533static struct irq_chip piix4_virtual_irq_type = { 530static struct irq_chip piix4_virtual_irq_type = {
534 .name = "PIIX4-virtual", 531 .name = "PIIX4-virtual",
535 .shutdown = disable_8259A_irq,
536 .enable = enable_8259A_irq,
537 .disable = disable_8259A_irq,
538}; 532};
539 533
540 534
@@ -559,7 +553,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
559 struct irq_desc *desc; 553 struct irq_desc *desc;
560 unsigned long flags; 554 unsigned long flags;
561 555
562 spin_lock_irqsave(&i8259A_lock, flags); 556 raw_spin_lock_irqsave(&i8259A_lock, flags);
563 557
564 /* Find out what's interrupting in the PIIX4 master 8259 */ 558 /* Find out what's interrupting in the PIIX4 master 8259 */
565 outb(0x0c, 0x20); /* OCW3 Poll command */ 559 outb(0x0c, 0x20); /* OCW3 Poll command */
@@ -596,7 +590,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
596 outb(0x60 + realirq, 0x20); 590 outb(0x60 + realirq, 0x20);
597 } 591 }
598 592
599 spin_unlock_irqrestore(&i8259A_lock, flags); 593 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
600 594
601 desc = irq_to_desc(realirq); 595 desc = irq_to_desc(realirq);
602 596
@@ -609,12 +603,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
609 handle_IRQ_event(realirq, desc->action); 603 handle_IRQ_event(realirq, desc->action);
610 604
611 if (!(desc->status & IRQ_DISABLED)) 605 if (!(desc->status & IRQ_DISABLED))
612 enable_8259A_irq(realirq); 606 legacy_pic->chip->unmask(realirq);
613 607
614 return IRQ_HANDLED; 608 return IRQ_HANDLED;
615 609
616out_unlock: 610out_unlock:
617 spin_unlock_irqrestore(&i8259A_lock, flags); 611 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
618 return IRQ_NONE; 612 return IRQ_NONE;
619} 613}
620 614
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = {
628 .name = "cascade", 622 .name = "cascade",
629}; 623};
630 624
625static inline void set_piix4_virtual_irq_type(void)
626{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask;
630}
631 631
632void init_VISWS_APIC_irqs(void) 632void init_VISWS_APIC_irqs(void)
633{ 633{
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void)
653 desc->chip = &piix4_master_irq_type; 653 desc->chip = &piix4_master_irq_type;
654 } 654 }
655 else if (i < CO_IRQ_APIC0) { 655 else if (i < CO_IRQ_APIC0) {
656 set_piix4_virtual_irq_type();
656 desc->chip = &piix4_virtual_irq_type; 657 desc->chip = &piix4_virtual_irq_type;
657 } 658 }
658 else if (IS_CO_APIC(i)) { 659 else if (IS_CO_APIC(i)) {
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index d430e4c30193..7dd599deca4a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -33,6 +33,7 @@
33#include <asm/fixmap.h> 33#include <asm/fixmap.h>
34#include <asm/apicdef.h> 34#include <asm/apicdef.h>
35#include <asm/apic.h> 35#include <asm/apic.h>
36#include <asm/pgalloc.h>
36#include <asm/processor.h> 37#include <asm/processor.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/vmi_time.h> 39#include <asm/vmi_time.h>
@@ -266,30 +267,6 @@ static void vmi_nop(void)
266{ 267{
267} 268}
268 269
269#ifdef CONFIG_HIGHPTE
270static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
271{
272 void *va = kmap_atomic(page, type);
273
274 /*
275 * Internally, the VMI ROM must map virtual addresses to physical
276 * addresses for processing MMU updates. By the time MMU updates
277 * are issued, this information is typically already lost.
278 * Fortunately, the VMI provides a cache of mapping slots for active
279 * page tables.
280 *
281 * We use slot zero for the linear mapping of physical memory, and
282 * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
283 *
284 * args: SLOT VA COUNT PFN
285 */
286 BUG_ON(type != KM_PTE0 && type != KM_PTE1);
287 vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
288
289 return va;
290}
291#endif
292
293static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 270static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
294{ 271{
295 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 272 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -640,6 +617,12 @@ static inline int __init activate_vmi(void)
640 u64 reloc; 617 u64 reloc;
641 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; 618 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
642 619
620 /*
621 * Prevent page tables from being allocated in highmem, even if
622 * CONFIG_HIGHPTE is enabled.
623 */
624 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
625
643 if (call_vrom_func(vmi_rom, vmi_init) != 0) { 626 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
644 printk(KERN_ERR "VMI ROM failed to initialize!"); 627 printk(KERN_ERR "VMI ROM failed to initialize!");
645 return 0; 628 return 0;
@@ -778,10 +761,6 @@ static inline int __init activate_vmi(void)
778 761
779 /* Set linear is needed in all cases */ 762 /* Set linear is needed in all cases */
780 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); 763 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
781#ifdef CONFIG_HIGHPTE
782 if (vmi_ops.set_linear_mapping)
783 pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
784#endif
785 764
786 /* 765 /*
787 * These MUST always be patched. Don't support indirect jumps 766 * These MUST always be patched. Don't support indirect jumps
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 74c92bb194df..2f1ca5614292 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void)
79 79
80static inline unsigned int vmi_get_timer_vector(void) 80static inline unsigned int vmi_get_timer_vector(void)
81{ 81{
82#ifdef CONFIG_X86_IO_APIC 82 return IRQ0_VECTOR;
83 return FIRST_DEVICE_VECTOR;
84#else
85 return FIRST_EXTERNAL_VECTOR;
86#endif
87} 83}
88 84
89/** vmi clockchip */ 85/** vmi clockchip */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f92a0da608cb..44879df55696 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ SECTIONS
341 * Per-cpu symbols which need to be offset from __per_cpu_load 341 * Per-cpu symbols which need to be offset from __per_cpu_load
342 * for the boot processor. 342 * for the boot processor.
343 */ 343 */
344#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load 344#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
345INIT_PER_CPU(gdt_page); 345INIT_PER_CPU(gdt_page);
346INIT_PER_CPU(irq_stack_union); 346INIT_PER_CPU(irq_stack_union);
347 347
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union);
352 "kernel image bigger than KERNEL_IMAGE_SIZE"); 352 "kernel image bigger than KERNEL_IMAGE_SIZE");
353 353
354#ifdef CONFIG_SMP 354#ifdef CONFIG_SMP
355. = ASSERT((per_cpu__irq_stack_union == 0), 355. = ASSERT((irq_stack_union == 0),
356 "irq_stack_union is not at start of per-cpu area"); 356 "irq_stack_union is not at start of per-cpu area");
357#endif 357#endif
358 358
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9055e5872ff0..1c0c6ab9c60f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
301 register_sysctl_table(kernel_root_table2); 301 register_sysctl_table(kernel_root_table2);
302#endif 302#endif
303 on_each_cpu(cpu_vsyscall_init, NULL, 1); 303 on_each_cpu(cpu_vsyscall_init, NULL, 1);
304 hotcpu_notifier(cpu_vsyscall_notifier, 0); 304 /* notifier priority > KVM */
305 hotcpu_notifier(cpu_vsyscall_notifier, 30);
305 return 0; 306 return 0;
306} 307}
307 308
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 619f7f88b8cc..693920b22496 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -26,7 +26,8 @@ EXPORT_SYMBOL(__put_user_2);
26EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
27EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
28 28
29EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic_string);
30EXPORT_SYMBOL(copy_user_generic_unrolled);
30EXPORT_SYMBOL(__copy_user_nocache); 31EXPORT_SYMBOL(__copy_user_nocache);
31EXPORT_SYMBOL(_copy_from_user); 32EXPORT_SYMBOL(_copy_from_user);
32EXPORT_SYMBOL(_copy_to_user); 33EXPORT_SYMBOL(_copy_to_user);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ccd179dec36e..61a1e8c7e19f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -4,9 +4,11 @@
4 * For licencing details see kernel-base/COPYING 4 * For licencing details see kernel-base/COPYING
5 */ 5 */
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h>
7 8
8#include <asm/bios_ebda.h> 9#include <asm/bios_ebda.h>
9#include <asm/paravirt.h> 10#include <asm/paravirt.h>
11#include <asm/pci_x86.h>
10#include <asm/mpspec.h> 12#include <asm/mpspec.h>
11#include <asm/setup.h> 13#include <asm/setup.h>
12#include <asm/apic.h> 14#include <asm/apic.h>
@@ -70,16 +72,25 @@ struct x86_init_ops x86_init __initdata = {
70 .iommu = { 72 .iommu = {
71 .iommu_init = iommu_init_noop, 73 .iommu_init = iommu_init_noop,
72 }, 74 },
75
76 .pci = {
77 .init = x86_default_pci_init,
78 .init_irq = x86_default_pci_init_irq,
79 .fixup_irqs = x86_default_pci_fixup_irqs,
80 },
73}; 81};
74 82
75struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 83struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
76 .setup_percpu_clockev = setup_secondary_APIC_clock, 84 .setup_percpu_clockev = setup_secondary_APIC_clock,
77}; 85};
78 86
87static void default_nmi_init(void) { };
88
79struct x86_platform_ops x86_platform = { 89struct x86_platform_ops x86_platform = {
80 .calibrate_tsc = native_calibrate_tsc, 90 .calibrate_tsc = native_calibrate_tsc,
81 .get_wallclock = mach_get_cmos_time, 91 .get_wallclock = mach_get_cmos_time,
82 .set_wallclock = mach_set_rtc_mmss, 92 .set_wallclock = mach_set_rtc_mmss,
83 .iommu_shutdown = iommu_shutdown_noop, 93 .iommu_shutdown = iommu_shutdown_noop,
84 .is_untracked_pat_range = is_ISA_range, 94 .is_untracked_pat_range = is_ISA_range,
95 .nmi_init = default_nmi_init
85}; 96};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index c5ee17e8c6d9..782c3a362ec6 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void)
337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); 337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
338 xstate_size = ebx; 338 xstate_size = ebx;
339 339
340 update_regset_xstate_info(xstate_size, pcntxt_mask);
340 prepare_fx_sw_frame(); 341 prepare_fx_sw_frame();
341 342
342 setup_xstate_init(); 343 setup_xstate_init();