diff options
Diffstat (limited to 'arch/x86/kernel')
75 files changed, 1996 insertions, 1037 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e77b22083721..0925676266bd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -104,6 +104,7 @@ obj-$(CONFIG_SCx200) += scx200.o | |||
104 | scx200-y += scx200_32.o | 104 | scx200-y += scx200_32.o |
105 | 105 | ||
106 | obj-$(CONFIG_OLPC) += olpc.o | 106 | obj-$(CONFIG_OLPC) += olpc.o |
107 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o | ||
107 | obj-$(CONFIG_X86_MRST) += mrst.o | 108 | obj-$(CONFIG_X86_MRST) += mrst.o |
108 | 109 | ||
109 | microcode-y := microcode_core.o | 110 | microcode-y := microcode_core.o |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 580b4e296010..28595d6df47c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -104,7 +104,7 @@ _start: | |||
104 | movl %eax, %ecx | 104 | movl %eax, %ecx |
105 | orl %edx, %ecx | 105 | orl %edx, %ecx |
106 | jz 1f | 106 | jz 1f |
107 | movl $0xc0000080, %ecx | 107 | movl $MSR_EFER, %ecx |
108 | wrmsr | 108 | wrmsr |
109 | 1: | 109 | 1: |
110 | 110 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fcc3c61fdecc..33cec152070d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * sleep.c - x86-specific ACPI sleep support. | 2 | * sleep.c - x86-specific ACPI sleep support. |
3 | * | 3 | * |
4 | * Copyright (C) 2001-2003 Patrick Mochel | 4 | * Copyright (C) 2001-2003 Patrick Mochel |
5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/acpi.h> | 8 | #include <linux/acpi.h> |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 70237732a6c7..f65ab8b014c4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -214,6 +214,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
214 | u8 *instr = a->instr; | 214 | u8 *instr = a->instr; |
215 | BUG_ON(a->replacementlen > a->instrlen); | 215 | BUG_ON(a->replacementlen > a->instrlen); |
216 | BUG_ON(a->instrlen > sizeof(insnbuf)); | 216 | BUG_ON(a->instrlen > sizeof(insnbuf)); |
217 | BUG_ON(a->cpuid >= NCAPINTS*32); | ||
217 | if (!boot_cpu_has(a->cpuid)) | 218 | if (!boot_cpu_has(a->cpuid)) |
218 | continue; | 219 | continue; |
219 | #ifdef CONFIG_X86_64 | 220 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0d20286d78c6..fa044e1e30a2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -2572,6 +2572,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2572 | static int amd_iommu_domain_has_cap(struct iommu_domain *domain, | 2572 | static int amd_iommu_domain_has_cap(struct iommu_domain *domain, |
2573 | unsigned long cap) | 2573 | unsigned long cap) |
2574 | { | 2574 | { |
2575 | switch (cap) { | ||
2576 | case IOMMU_CAP_CACHE_COHERENCY: | ||
2577 | return 1; | ||
2578 | } | ||
2579 | |||
2575 | return 0; | 2580 | return 0; |
2576 | } | 2581 | } |
2577 | 2582 | ||
@@ -2609,8 +2614,7 @@ int __init amd_iommu_init_passthrough(void) | |||
2609 | 2614 | ||
2610 | pt_domain->mode |= PAGE_MODE_NONE; | 2615 | pt_domain->mode |= PAGE_MODE_NONE; |
2611 | 2616 | ||
2612 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2617 | for_each_pci_dev(dev) { |
2613 | |||
2614 | if (!check_device(&dev->dev)) | 2618 | if (!check_device(&dev->dev)) |
2615 | continue; | 2619 | continue; |
2616 | 2620 | ||
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index a35347501d36..8dd77800ff5d 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -43,10 +43,11 @@ | |||
43 | 43 | ||
44 | #include <asm/fixmap.h> | 44 | #include <asm/fixmap.h> |
45 | #include <asm/apb_timer.h> | 45 | #include <asm/apb_timer.h> |
46 | #include <asm/mrst.h> | ||
46 | 47 | ||
47 | #define APBT_MASK CLOCKSOURCE_MASK(32) | 48 | #define APBT_MASK CLOCKSOURCE_MASK(32) |
48 | #define APBT_SHIFT 22 | 49 | #define APBT_SHIFT 22 |
49 | #define APBT_CLOCKEVENT_RATING 150 | 50 | #define APBT_CLOCKEVENT_RATING 110 |
50 | #define APBT_CLOCKSOURCE_RATING 250 | 51 | #define APBT_CLOCKSOURCE_RATING 250 |
51 | #define APBT_MIN_DELTA_USEC 200 | 52 | #define APBT_MIN_DELTA_USEC 200 |
52 | 53 | ||
@@ -83,8 +84,6 @@ struct apbt_dev { | |||
83 | char name[10]; | 84 | char name[10]; |
84 | }; | 85 | }; |
85 | 86 | ||
86 | int disable_apbt_percpu __cpuinitdata; | ||
87 | |||
88 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); | 87 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); |
89 | 88 | ||
90 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
@@ -195,29 +194,6 @@ static struct clock_event_device apbt_clockevent = { | |||
195 | }; | 194 | }; |
196 | 195 | ||
197 | /* | 196 | /* |
198 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
199 | * than local apic timer and skip the late per cpu timer init. | ||
200 | */ | ||
201 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
202 | { | ||
203 | if (!arg) | ||
204 | return -EINVAL; | ||
205 | |||
206 | if (strcmp("apbt_only", arg) == 0) | ||
207 | disable_apbt_percpu = 0; | ||
208 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
209 | disable_apbt_percpu = 1; | ||
210 | else { | ||
211 | pr_warning("X86 MRST timer option %s not recognised" | ||
212 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
213 | arg); | ||
214 | return -EINVAL; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
219 | |||
220 | /* | ||
221 | * start count down from 0xffff_ffff. this is done by toggling the enable bit | 197 | * start count down from 0xffff_ffff. this is done by toggling the enable bit |
222 | * then load initial load count to ~0. | 198 | * then load initial load count to ~0. |
223 | */ | 199 | */ |
@@ -335,7 +311,7 @@ static int __init apbt_clockevent_register(void) | |||
335 | adev->num = smp_processor_id(); | 311 | adev->num = smp_processor_id(); |
336 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); | 312 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); |
337 | 313 | ||
338 | if (disable_apbt_percpu) { | 314 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { |
339 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; | 315 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; |
340 | global_clock_event = &adev->evt; | 316 | global_clock_event = &adev->evt; |
341 | printk(KERN_DEBUG "%s clockevent registered as global\n", | 317 | printk(KERN_DEBUG "%s clockevent registered as global\n", |
@@ -429,7 +405,8 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
429 | 405 | ||
430 | static __init int apbt_late_init(void) | 406 | static __init int apbt_late_init(void) |
431 | { | 407 | { |
432 | if (disable_apbt_percpu || !apb_timer_block_enabled) | 408 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT || |
409 | !apb_timer_block_enabled) | ||
433 | return 0; | 410 | return 0; |
434 | /* This notifier should be called after workqueue is ready */ | 411 | /* This notifier should be called after workqueue is ready */ |
435 | hotcpu_notifier(apbt_cpuhp_notify, -20); | 412 | hotcpu_notifier(apbt_cpuhp_notify, -20); |
@@ -450,6 +427,8 @@ static void apbt_set_mode(enum clock_event_mode mode, | |||
450 | int timer_num; | 427 | int timer_num; |
451 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | 428 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); |
452 | 429 | ||
430 | BUG_ON(!apbt_virt_address); | ||
431 | |||
453 | timer_num = adev->num; | 432 | timer_num = adev->num; |
454 | pr_debug("%s CPU %d timer %d mode=%d\n", | 433 | pr_debug("%s CPU %d timer %d mode=%d\n", |
455 | __func__, first_cpu(*evt->cpumask), timer_num, mode); | 434 | __func__, first_cpu(*evt->cpumask), timer_num, mode); |
@@ -676,7 +655,7 @@ void __init apbt_time_init(void) | |||
676 | } | 655 | } |
677 | #ifdef CONFIG_SMP | 656 | #ifdef CONFIG_SMP |
678 | /* kernel cmdline disable apb timer, so we will use lapic timers */ | 657 | /* kernel cmdline disable apb timer, so we will use lapic timers */ |
679 | if (disable_apbt_percpu) { | 658 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { |
680 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); | 659 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); |
681 | return; | 660 | return; |
682 | } | 661 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index b5d8b0bcf235..a2e0caf26e17 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -280,7 +280,7 @@ void __init early_gart_iommu_check(void) | |||
280 | * or BIOS forget to put that in reserved. | 280 | * or BIOS forget to put that in reserved. |
281 | * try to update e820 to make that region as reserved. | 281 | * try to update e820 to make that region as reserved. |
282 | */ | 282 | */ |
283 | u32 agp_aper_base = 0, agp_aper_order = 0; | 283 | u32 agp_aper_order = 0; |
284 | int i, fix, slot, valid_agp = 0; | 284 | int i, fix, slot, valid_agp = 0; |
285 | u32 ctl; | 285 | u32 ctl; |
286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | 286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
@@ -291,7 +291,7 @@ void __init early_gart_iommu_check(void) | |||
291 | return; | 291 | return; |
292 | 292 | ||
293 | /* This is mostly duplicate of iommu_hole_init */ | 293 | /* This is mostly duplicate of iommu_hole_init */ |
294 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | 294 | search_agp_bridge(&agp_aper_order, &valid_agp); |
295 | 295 | ||
296 | fix = 0; | 296 | fix = 0; |
297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 565c1bfc507d..910f20b457c4 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,7 +2,12 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o |
6 | ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) | ||
7 | obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o | ||
8 | endif | ||
9 | obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o | ||
10 | |||
6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 11 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
7 | obj-$(CONFIG_SMP) += ipi.o | 12 | obj-$(CONFIG_SMP) += ipi.o |
8 | 13 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a96489ee6cab..e3b534cda49a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -460,7 +460,7 @@ static void lapic_timer_broadcast(const struct cpumask *mask) | |||
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Setup the local APIC timer for this CPU. Copy the initilized values | 463 | * Setup the local APIC timer for this CPU. Copy the initialized values |
464 | * of the boot CPU and register the clock event in the framework. | 464 | * of the boot CPU and register the clock event in the framework. |
465 | */ | 465 | */ |
466 | static void __cpuinit setup_APIC_timer(void) | 466 | static void __cpuinit setup_APIC_timer(void) |
@@ -1606,7 +1606,7 @@ void __init init_apic_mappings(void) | |||
1606 | * acpi lapic path already maps that address in | 1606 | * acpi lapic path already maps that address in |
1607 | * acpi_register_lapic_address() | 1607 | * acpi_register_lapic_address() |
1608 | */ | 1608 | */ |
1609 | if (!acpi_lapic) | 1609 | if (!acpi_lapic && !smp_found_config) |
1610 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | 1610 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); |
1611 | 1611 | ||
1612 | apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", | 1612 | apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 425e53a87feb..8593582d8022 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -129,7 +129,6 @@ int es7000_plat; | |||
129 | * GSI override for ES7000 platforms. | 129 | * GSI override for ES7000 platforms. |
130 | */ | 130 | */ |
131 | 131 | ||
132 | static unsigned int base; | ||
133 | 132 | ||
134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 133 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
135 | { | 134 | { |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c new file mode 100644 index 000000000000..cefd6942f0e9 --- /dev/null +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * HW NMI watchdog support | ||
3 | * | ||
4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | ||
5 | * | ||
6 | * Arch specific calls to support NMI watchdog | ||
7 | * | ||
8 | * Bits copied from original nmi.c file | ||
9 | * | ||
10 | */ | ||
11 | #include <asm/apic.h> | ||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kdebug.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/kprobes.h> | ||
17 | #include <linux/nmi.h> | ||
18 | #include <linux/module.h> | ||
19 | |||
20 | /* For reliability, we're prepared to waste bits here. */ | ||
21 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
22 | |||
23 | u64 hw_nmi_get_sample_period(void) | ||
24 | { | ||
25 | return (u64)(cpu_khz) * 1000 * 60; | ||
26 | } | ||
27 | |||
28 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
29 | void arch_trigger_all_cpu_backtrace(void) | ||
30 | { | ||
31 | int i; | ||
32 | |||
33 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
34 | |||
35 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
36 | apic->send_IPI_all(NMI_VECTOR); | ||
37 | |||
38 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
39 | for (i = 0; i < 10 * 1000; i++) { | ||
40 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
41 | break; | ||
42 | mdelay(1); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static int __kprobes | ||
47 | arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | ||
48 | unsigned long cmd, void *__args) | ||
49 | { | ||
50 | struct die_args *args = __args; | ||
51 | struct pt_regs *regs; | ||
52 | int cpu = smp_processor_id(); | ||
53 | |||
54 | switch (cmd) { | ||
55 | case DIE_NMI: | ||
56 | case DIE_NMI_IPI: | ||
57 | break; | ||
58 | |||
59 | default: | ||
60 | return NOTIFY_DONE; | ||
61 | } | ||
62 | |||
63 | regs = args->regs; | ||
64 | |||
65 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
66 | static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; | ||
67 | |||
68 | arch_spin_lock(&lock); | ||
69 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
70 | show_regs(regs); | ||
71 | dump_stack(); | ||
72 | arch_spin_unlock(&lock); | ||
73 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
74 | return NOTIFY_STOP; | ||
75 | } | ||
76 | |||
77 | return NOTIFY_DONE; | ||
78 | } | ||
79 | |||
80 | static __read_mostly struct notifier_block backtrace_notifier = { | ||
81 | .notifier_call = arch_trigger_all_cpu_backtrace_handler, | ||
82 | .next = NULL, | ||
83 | .priority = 1 | ||
84 | }; | ||
85 | |||
86 | static int __init register_trigger_all_cpu_backtrace(void) | ||
87 | { | ||
88 | register_die_notifier(&backtrace_notifier); | ||
89 | return 0; | ||
90 | } | ||
91 | early_initcall(register_trigger_all_cpu_backtrace); | ||
92 | #endif | ||
93 | |||
94 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
95 | #if defined(CONFIG_X86_LOCAL_APIC) | ||
96 | unsigned int nmi_watchdog = NMI_NONE; | ||
97 | EXPORT_SYMBOL(nmi_watchdog); | ||
98 | void acpi_nmi_enable(void) { return; } | ||
99 | void acpi_nmi_disable(void) { return; } | ||
100 | #endif | ||
101 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
102 | EXPORT_SYMBOL(nmi_active); | ||
103 | int unknown_nmi_panic; | ||
104 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
105 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
106 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
107 | int __init check_nmi_watchdog(void) { return 0; } | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e41ed24ab26d..4dc0084ec1b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3397 | 3397 | ||
3398 | cfg = desc->chip_data; | 3398 | cfg = desc->chip_data; |
3399 | 3399 | ||
3400 | read_msi_msg_desc(desc, &msg); | 3400 | get_cached_msi_msg_desc(desc, &msg); |
3401 | 3401 | ||
3402 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3402 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
3403 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3403 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 1edaf15c0b8e..a43f71cb30f8 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
401 | int cpu = smp_processor_id(); | 401 | int cpu = smp_processor_id(); |
402 | int rc = 0; | 402 | int rc = 0; |
403 | 403 | ||
404 | /* check for other users first */ | ||
405 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
406 | == NOTIFY_STOP) { | ||
407 | rc = 1; | ||
408 | touched = 1; | ||
409 | } | ||
410 | |||
411 | sum = get_timer_irqs(cpu); | 404 | sum = get_timer_irqs(cpu); |
412 | 405 | ||
413 | if (__get_cpu_var(nmi_touch)) { | 406 | if (__get_cpu_var(nmi_touch)) { |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e46f98f36e31..7b598b84c902 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -604,6 +604,10 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
604 | { | 604 | { |
605 | if (reason != DIE_NMI_IPI) | 605 | if (reason != DIE_NMI_IPI) |
606 | return NOTIFY_OK; | 606 | return NOTIFY_OK; |
607 | |||
608 | if (in_crash_kexec) | ||
609 | /* do nothing if entering the crash kernel */ | ||
610 | return NOTIFY_OK; | ||
607 | /* | 611 | /* |
608 | * Use a lock so only one cpu prints at a time | 612 | * Use a lock so only one cpu prints at a time |
609 | * to prevent intermixed output. | 613 | * to prevent intermixed output. |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c4f9182ca3ac..4c9c67bf09b7 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -140,7 +140,7 @@ | |||
140 | * is now the way life works). | 140 | * is now the way life works). |
141 | * Fix thinko in suspend() (wrong return). | 141 | * Fix thinko in suspend() (wrong return). |
142 | * Notify drivers on critical suspend. | 142 | * Notify drivers on critical suspend. |
143 | * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz> | 143 | * Make kapmd absorb more idle time (Pavel Machek <pavel@ucw.cz> |
144 | * modified by sfr). | 144 | * modified by sfr). |
145 | * Disable interrupts while we are suspended (Andy Henroid | 145 | * Disable interrupts while we are suspended (Andy Henroid |
146 | * <andy_henroid@yahoo.com> fixed by sfr). | 146 | * <andy_henroid@yahoo.com> fixed by sfr). |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3a785da34b6f..3f0ebe429a01 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -12,11 +12,11 @@ endif | |||
12 | nostackp := $(call cc-option, -fno-stack-protector) | 12 | nostackp := $(call cc-option, -fno-stack-protector) |
13 | CFLAGS_common.o := $(nostackp) | 13 | CFLAGS_common.o := $(nostackp) |
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
21 | 21 | ||
22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e485825130d2..60a57b13082d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | } | 468 | } |
469 | if (c->x86 == 0x10 || c->x86 == 0x11) | 469 | if (c->x86 >= 0x10) |
470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
471 | 471 | ||
472 | /* get apicid instead of initial apic id from cpuid */ | 472 | /* get apicid instead of initial apic id from cpuid */ |
@@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
529 | num_cache_leaves = 3; | 529 | num_cache_leaves = 3; |
530 | } | 530 | } |
531 | 531 | ||
532 | if (c->x86 >= 0xf && c->x86 <= 0x11) | 532 | if (c->x86 >= 0xf) |
533 | set_cpu_cap(c, X86_FEATURE_K8); | 533 | set_cpu_cap(c, X86_FEATURE_K8); |
534 | 534 | ||
535 | if (cpu_has_xmm2) { | 535 | if (cpu_has_xmm2) { |
@@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
546 | fam10h_check_enable_mmcfg(); | 546 | fam10h_check_enable_mmcfg(); |
547 | } | 547 | } |
548 | 548 | ||
549 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | 549 | if (c == &boot_cpu_data && c->x86 >= 0xf) { |
550 | unsigned long long tseg; | 550 | unsigned long long tseg; |
551 | 551 | ||
552 | /* | 552 | /* |
@@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
609 | }; | 609 | }; |
610 | 610 | ||
611 | cpu_dev_register(amd_cpu_dev); | 611 | cpu_dev_register(amd_cpu_dev); |
612 | |||
613 | /* | ||
614 | * AMD errata checking | ||
615 | * | ||
616 | * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or | ||
617 | * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that | ||
618 | * have an OSVW id assigned, which it takes as first argument. Both take a | ||
619 | * variable number of family-specific model-stepping ranges created by | ||
620 | * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const | ||
621 | * int[] in arch/x86/include/asm/processor.h. | ||
622 | * | ||
623 | * Example: | ||
624 | * | ||
625 | * const int amd_erratum_319[] = | ||
626 | * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), | ||
627 | * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), | ||
628 | * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); | ||
629 | */ | ||
630 | |||
631 | const int amd_erratum_400[] = | ||
632 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), | ||
633 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | ||
634 | EXPORT_SYMBOL_GPL(amd_erratum_400); | ||
635 | |||
636 | const int amd_erratum_383[] = | ||
637 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); | ||
638 | EXPORT_SYMBOL_GPL(amd_erratum_383); | ||
639 | |||
640 | bool cpu_has_amd_erratum(const int *erratum) | ||
641 | { | ||
642 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | ||
643 | int osvw_id = *erratum++; | ||
644 | u32 range; | ||
645 | u32 ms; | ||
646 | |||
647 | /* | ||
648 | * If called early enough that current_cpu_data hasn't been initialized | ||
649 | * yet, fall back to boot_cpu_data. | ||
650 | */ | ||
651 | if (cpu->x86 == 0) | ||
652 | cpu = &boot_cpu_data; | ||
653 | |||
654 | if (cpu->x86_vendor != X86_VENDOR_AMD) | ||
655 | return false; | ||
656 | |||
657 | if (osvw_id >= 0 && osvw_id < 65536 && | ||
658 | cpu_has(cpu, X86_FEATURE_OSVW)) { | ||
659 | u64 osvw_len; | ||
660 | |||
661 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); | ||
662 | if (osvw_id < osvw_len) { | ||
663 | u64 osvw_bits; | ||
664 | |||
665 | rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), | ||
666 | osvw_bits); | ||
667 | return osvw_bits & (1ULL << (osvw_id & 0x3f)); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ | ||
672 | ms = (cpu->x86_model << 8) | cpu->x86_mask; | ||
673 | while ((range = *erratum++)) | ||
674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && | ||
675 | (ms >= AMD_MODEL_RANGE_START(range)) && | ||
676 | (ms <= AMD_MODEL_RANGE_END(range))) | ||
677 | return true; | ||
678 | |||
679 | return false; | ||
680 | } | ||
681 | |||
682 | EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); | ||
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c deleted file mode 100644 index 2056ccf572cc..000000000000 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * cmpxchg*() fallbacks for CPU not supporting these instructions | ||
3 | */ | ||
4 | |||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/module.h> | ||
8 | |||
9 | #ifndef CONFIG_X86_CMPXCHG | ||
10 | unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) | ||
11 | { | ||
12 | u8 prev; | ||
13 | unsigned long flags; | ||
14 | |||
15 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
16 | local_irq_save(flags); | ||
17 | prev = *(u8 *)ptr; | ||
18 | if (prev == old) | ||
19 | *(u8 *)ptr = new; | ||
20 | local_irq_restore(flags); | ||
21 | return prev; | ||
22 | } | ||
23 | EXPORT_SYMBOL(cmpxchg_386_u8); | ||
24 | |||
25 | unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) | ||
26 | { | ||
27 | u16 prev; | ||
28 | unsigned long flags; | ||
29 | |||
30 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
31 | local_irq_save(flags); | ||
32 | prev = *(u16 *)ptr; | ||
33 | if (prev == old) | ||
34 | *(u16 *)ptr = new; | ||
35 | local_irq_restore(flags); | ||
36 | return prev; | ||
37 | } | ||
38 | EXPORT_SYMBOL(cmpxchg_386_u16); | ||
39 | |||
40 | unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | ||
41 | { | ||
42 | u32 prev; | ||
43 | unsigned long flags; | ||
44 | |||
45 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
46 | local_irq_save(flags); | ||
47 | prev = *(u32 *)ptr; | ||
48 | if (prev == old) | ||
49 | *(u32 *)ptr = new; | ||
50 | local_irq_restore(flags); | ||
51 | return prev; | ||
52 | } | ||
53 | EXPORT_SYMBOL(cmpxchg_386_u32); | ||
54 | #endif | ||
55 | |||
56 | #ifndef CONFIG_X86_CMPXCHG64 | ||
57 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
58 | { | ||
59 | u64 prev; | ||
60 | unsigned long flags; | ||
61 | |||
62 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
63 | local_irq_save(flags); | ||
64 | prev = *(u64 *)ptr; | ||
65 | if (prev == old) | ||
66 | *(u64 *)ptr = new; | ||
67 | local_irq_restore(flags); | ||
68 | return prev; | ||
69 | } | ||
70 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
71 | #endif | ||
72 | |||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 68e4a6f2211e..490dac63c2d2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |||
140 | static int __init x86_xsave_setup(char *s) | 140 | static int __init x86_xsave_setup(char *s) |
141 | { | 141 | { |
142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
143 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
143 | return 1; | 144 | return 1; |
144 | } | 145 | } |
145 | __setup("noxsave", x86_xsave_setup); | 146 | __setup("noxsave", x86_xsave_setup); |
146 | 147 | ||
148 | static int __init x86_xsaveopt_setup(char *s) | ||
149 | { | ||
150 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
151 | return 1; | ||
152 | } | ||
153 | __setup("noxsaveopt", x86_xsaveopt_setup); | ||
154 | |||
147 | #ifdef CONFIG_X86_32 | 155 | #ifdef CONFIG_X86_32 |
148 | static int cachesize_override __cpuinitdata = -1; | 156 | static int cachesize_override __cpuinitdata = -1; |
149 | static int disable_x86_serial_nr __cpuinitdata = 1; | 157 | static int disable_x86_serial_nr __cpuinitdata = 1; |
@@ -551,6 +559,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
551 | c->x86_capability[4] = excap; | 559 | c->x86_capability[4] = excap; |
552 | } | 560 | } |
553 | 561 | ||
562 | /* Additional Intel-defined flags: level 0x00000007 */ | ||
563 | if (c->cpuid_level >= 0x00000007) { | ||
564 | u32 eax, ebx, ecx, edx; | ||
565 | |||
566 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
567 | |||
568 | if (eax > 0) | ||
569 | c->x86_capability[9] = ebx; | ||
570 | } | ||
571 | |||
554 | /* AMD-defined flags: level 0x80000001 */ | 572 | /* AMD-defined flags: level 0x80000001 */ |
555 | xlvl = cpuid_eax(0x80000000); | 573 | xlvl = cpuid_eax(0x80000000); |
556 | c->extended_cpuid_level = xlvl; | 574 | c->extended_cpuid_level = xlvl; |
@@ -576,6 +594,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
576 | if (c->extended_cpuid_level >= 0x80000007) | 594 | if (c->extended_cpuid_level >= 0x80000007) |
577 | c->x86_power = cpuid_edx(0x80000007); | 595 | c->x86_power = cpuid_edx(0x80000007); |
578 | 596 | ||
597 | init_scattered_cpuid_features(c); | ||
579 | } | 598 | } |
580 | 599 | ||
581 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | 600 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
@@ -731,7 +750,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
731 | 750 | ||
732 | get_model_name(c); /* Default name */ | 751 | get_model_name(c); /* Default name */ |
733 | 752 | ||
734 | init_scattered_cpuid_features(c); | ||
735 | detect_nopl(c); | 753 | detect_nopl(c); |
736 | } | 754 | } |
737 | 755 | ||
@@ -1192,6 +1210,7 @@ void __cpuinit cpu_init(void) | |||
1192 | dbg_restore_debug_regs(); | 1210 | dbg_restore_debug_regs(); |
1193 | 1211 | ||
1194 | fpu_init(); | 1212 | fpu_init(); |
1213 | xsave_init(); | ||
1195 | 1214 | ||
1196 | raw_local_save_flags(kernel_eflags); | 1215 | raw_local_save_flags(kernel_eflags); |
1197 | 1216 | ||
@@ -1252,12 +1271,7 @@ void __cpuinit cpu_init(void) | |||
1252 | clear_used_math(); | 1271 | clear_used_math(); |
1253 | mxcsr_feature_mask_init(); | 1272 | mxcsr_feature_mask_init(); |
1254 | 1273 | ||
1255 | /* | 1274 | fpu_init(); |
1256 | * Boot processor to setup the FP and extended state context info. | ||
1257 | */ | ||
1258 | if (smp_processor_id() == boot_cpu_id) | ||
1259 | init_thread_xstate(); | ||
1260 | |||
1261 | xsave_init(); | 1275 | xsave_init(); |
1262 | } | 1276 | } |
1263 | #endif | 1277 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1d3cddaa40ee..cd8da247dda1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <trace/events/power.h> | ||
38 | 37 | ||
39 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
40 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -73,7 +72,7 @@ struct acpi_cpufreq_data { | |||
73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
74 | 73 | ||
75 | /* acpi_perf_data is a pointer to percpu data. */ | 74 | /* acpi_perf_data is a pointer to percpu data. */ |
76 | static struct acpi_processor_performance *acpi_perf_data; | 75 | static struct acpi_processor_performance __percpu *acpi_perf_data; |
77 | 76 | ||
78 | static struct cpufreq_driver acpi_cpufreq_driver; | 77 | static struct cpufreq_driver acpi_cpufreq_driver; |
79 | 78 | ||
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
324 | } | 323 | } |
325 | } | 324 | } |
326 | 325 | ||
327 | trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); | ||
328 | |||
329 | switch (data->cpu_feature) { | 326 | switch (data->cpu_feature) { |
330 | case SYSTEM_INTEL_MSR_CAPABLE: | 327 | case SYSTEM_INTEL_MSR_CAPABLE: |
331 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -351,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
351 | 348 | ||
352 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 349 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
353 | freqs.new = data->freq_table[next_state].frequency; | 350 | freqs.new = data->freq_table[next_state].frequency; |
354 | for_each_cpu(i, cmd.mask) { | 351 | for_each_cpu(i, policy->cpus) { |
355 | freqs.cpu = i; | 352 | freqs.cpu = i; |
356 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
357 | } | 354 | } |
@@ -367,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
367 | } | 364 | } |
368 | } | 365 | } |
369 | 366 | ||
370 | for_each_cpu(i, cmd.mask) { | 367 | for_each_cpu(i, policy->cpus) { |
371 | freqs.cpu = i; | 368 | freqs.cpu = i; |
372 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
373 | } | 370 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 16e3483be9e3..32974cf84232 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -169,12 +169,9 @@ static int gx_freq_mult[16] = { | |||
169 | * Low Level chipset interface * | 169 | * Low Level chipset interface * |
170 | ****************************************************************/ | 170 | ****************************************************************/ |
171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | 171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { |
172 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, | 172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, |
173 | PCI_ANY_ID, PCI_ANY_ID }, | 173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, |
174 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, | 174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, |
175 | PCI_ANY_ID, PCI_ANY_ID }, | ||
176 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, | ||
177 | PCI_ANY_ID, PCI_ANY_ID }, | ||
178 | { 0, }, | 175 | { 0, }, |
179 | }; | 176 | }; |
180 | 177 | ||
@@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void) | |||
199 | } | 196 | } |
200 | 197 | ||
201 | /* detect which companion chip is used */ | 198 | /* detect which companion chip is used */ |
202 | while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { | 199 | for_each_pci_dev(gx_pci) { |
203 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | 200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) |
204 | return gx_pci; | 201 | return gx_pci; |
205 | } | 202 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f8261..03162dac6271 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -426,7 +426,7 @@ static int guess_fsb(int mult) | |||
426 | } | 426 | } |
427 | 427 | ||
428 | 428 | ||
429 | static int __init longhaul_get_ranges(void) | 429 | static int __cpuinit longhaul_get_ranges(void) |
430 | { | 430 | { |
431 | unsigned int i, j, k = 0; | 431 | unsigned int i, j, k = 0; |
432 | unsigned int ratio; | 432 | unsigned int ratio; |
@@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void) | |||
530 | } | 530 | } |
531 | 531 | ||
532 | 532 | ||
533 | static void __init longhaul_setup_voltagescaling(void) | 533 | static void __cpuinit longhaul_setup_voltagescaling(void) |
534 | { | 534 | { |
535 | union msr_longhaul longhaul; | 535 | union msr_longhaul longhaul; |
536 | struct mV_pos minvid, maxvid, vid; | 536 | struct mV_pos minvid, maxvid, vid; |
@@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void) | |||
784 | return 0; | 784 | return 0; |
785 | } | 785 | } |
786 | 786 | ||
787 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) |
788 | { | 788 | { |
789 | struct cpuinfo_x86 *c = &cpu_data(0); | 789 | struct cpuinfo_x86 *c = &cpu_data(0); |
790 | char *cpuname = NULL; | 790 | char *cpuname = NULL; |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f79..cbf48fbca881 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h | |||
@@ -56,7 +56,7 @@ union msr_longhaul { | |||
56 | /* | 56 | /* |
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | 57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) |
58 | */ | 58 | */ |
59 | static const int __initdata samuel1_mults[16] = { | 59 | static const int __cpuinitdata samuel1_mults[16] = { |
60 | -1, /* 0000 -> RESERVED */ | 60 | -1, /* 0000 -> RESERVED */ |
61 | 30, /* 0001 -> 3.0x */ | 61 | 30, /* 0001 -> 3.0x */ |
62 | 40, /* 0010 -> 4.0x */ | 62 | 40, /* 0010 -> 4.0x */ |
@@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = { | |||
75 | -1, /* 1111 -> RESERVED */ | 75 | -1, /* 1111 -> RESERVED */ |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static const int __initdata samuel1_eblcr[16] = { | 78 | static const int __cpuinitdata samuel1_eblcr[16] = { |
79 | 50, /* 0000 -> RESERVED */ | 79 | 50, /* 0000 -> RESERVED */ |
80 | 30, /* 0001 -> 3.0x */ | 80 | 30, /* 0001 -> 3.0x */ |
81 | 40, /* 0010 -> 4.0x */ | 81 | 40, /* 0010 -> 4.0x */ |
@@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = { | |||
97 | /* | 97 | /* |
98 | * VIA C3 Samuel2 Stepping 1->15 | 98 | * VIA C3 Samuel2 Stepping 1->15 |
99 | */ | 99 | */ |
100 | static const int __initdata samuel2_eblcr[16] = { | 100 | static const int __cpuinitdata samuel2_eblcr[16] = { |
101 | 50, /* 0000 -> 5.0x */ | 101 | 50, /* 0000 -> 5.0x */ |
102 | 30, /* 0001 -> 3.0x */ | 102 | 30, /* 0001 -> 3.0x */ |
103 | 40, /* 0010 -> 4.0x */ | 103 | 40, /* 0010 -> 4.0x */ |
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { | |||
119 | /* | 119 | /* |
120 | * VIA C3 Ezra | 120 | * VIA C3 Ezra |
121 | */ | 121 | */ |
122 | static const int __initdata ezra_mults[16] = { | 122 | static const int __cpuinitdata ezra_mults[16] = { |
123 | 100, /* 0000 -> 10.0x */ | 123 | 100, /* 0000 -> 10.0x */ |
124 | 30, /* 0001 -> 3.0x */ | 124 | 30, /* 0001 -> 3.0x */ |
125 | 40, /* 0010 -> 4.0x */ | 125 | 40, /* 0010 -> 4.0x */ |
@@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = { | |||
138 | 120, /* 1111 -> 12.0x */ | 138 | 120, /* 1111 -> 12.0x */ |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static const int __initdata ezra_eblcr[16] = { | 141 | static const int __cpuinitdata ezra_eblcr[16] = { |
142 | 50, /* 0000 -> 5.0x */ | 142 | 50, /* 0000 -> 5.0x */ |
143 | 30, /* 0001 -> 3.0x */ | 143 | 30, /* 0001 -> 3.0x */ |
144 | 40, /* 0010 -> 4.0x */ | 144 | 40, /* 0010 -> 4.0x */ |
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { | |||
160 | /* | 160 | /* |
161 | * VIA C3 (Ezra-T) [C5M]. | 161 | * VIA C3 (Ezra-T) [C5M]. |
162 | */ | 162 | */ |
163 | static const int __initdata ezrat_mults[32] = { | 163 | static const int __cpuinitdata ezrat_mults[32] = { |
164 | 100, /* 0000 -> 10.0x */ | 164 | 100, /* 0000 -> 10.0x */ |
165 | 30, /* 0001 -> 3.0x */ | 165 | 30, /* 0001 -> 3.0x */ |
166 | 40, /* 0010 -> 4.0x */ | 166 | 40, /* 0010 -> 4.0x */ |
@@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = { | |||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | 196 | -1, /* 1111 -> RESERVED (12.0x) */ |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static const int __initdata ezrat_eblcr[32] = { | 199 | static const int __cpuinitdata ezrat_eblcr[32] = { |
200 | 50, /* 0000 -> 5.0x */ | 200 | 50, /* 0000 -> 5.0x */ |
201 | 30, /* 0001 -> 3.0x */ | 201 | 30, /* 0001 -> 3.0x */ |
202 | 40, /* 0010 -> 4.0x */ | 202 | 40, /* 0010 -> 4.0x */ |
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static const int __initdata nehemiah_mults[32] = { | 238 | static const int __cpuinitdata nehemiah_mults[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | -1, /* 0001 -> 16.0x */ | 240 | -1, /* 0001 -> 16.0x */ |
241 | 40, /* 0010 -> 4.0x */ | 241 | 40, /* 0010 -> 4.0x */ |
@@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = { | |||
270 | -1, /* 1111 -> 12.0x */ | 270 | -1, /* 1111 -> 12.0x */ |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static const int __initdata nehemiah_eblcr[32] = { | 273 | static const int __cpuinitdata nehemiah_eblcr[32] = { |
274 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
275 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
276 | 40, /* 0010 -> 4.0x */ | 276 | 40, /* 0010 -> 4.0x */ |
@@ -315,7 +315,7 @@ struct mV_pos { | |||
315 | unsigned short pos; | 315 | unsigned short pos; |
316 | }; | 316 | }; |
317 | 317 | ||
318 | static const struct mV_pos __initdata vrm85_mV[32] = { | 318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { |
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | 319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, |
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | 320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, |
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | 321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, |
@@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = { | |||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | 326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} |
327 | }; | 327 | }; |
328 | 328 | ||
329 | static const unsigned char __initdata mV_vrm85[32] = { | 329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { |
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | 330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, |
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | 331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, |
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | 332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, |
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | 333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 |
334 | }; | 334 | }; |
335 | 335 | ||
336 | static const struct mV_pos __initdata mobilevrm_mV[32] = { | 336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { |
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | 337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, |
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | 338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, |
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | 339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, |
@@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = { | |||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | 344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} |
345 | }; | 345 | }; |
346 | 346 | ||
347 | static const unsigned char __initdata mV_mobilevrm[32] = { | 347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { |
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | 348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, |
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | 349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, |
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | 350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index e7b559d74c52..fc09f142d94d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu) | |||
165 | * TMTA rules: | 165 | * TMTA rules: |
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | 166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) |
167 | */ | 167 | */ |
168 | static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | 168 | static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, |
169 | unsigned int *high_freq) | 169 | unsigned int *high_freq) |
170 | { | 170 | { |
171 | u32 msr_lo, msr_hi; | 171 | u32 msr_lo, msr_hi; |
172 | u32 save_lo, save_hi; | 172 | u32 save_lo, save_hi; |
@@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | |||
258 | } | 258 | } |
259 | 259 | ||
260 | 260 | ||
261 | static int __init longrun_cpu_init(struct cpufreq_policy *policy) | 261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) |
262 | { | 262 | { |
263 | int result = 0; | 263 | int result = 0; |
264 | 264 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 7b8a8ba67b07..bd1cac747f67 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
178 | } | 178 | } |
179 | } | 179 | } |
180 | 180 | ||
181 | if (c->x86 != 0xF) { | 181 | if (c->x86 != 0xF) |
182 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
183 | printk(KERN_WARNING PFX "Unknown CPU. " | ||
184 | "Please send an e-mail to " | ||
185 | "<cpufreq@vger.kernel.org>\n"); | ||
186 | return 0; | 182 | return 0; |
187 | } | ||
188 | 183 | ||
189 | /* on P-4s, the TSC runs with constant frequency independent whether | 184 | /* on P-4s, the TSC runs with constant frequency independent whether |
190 | * throttling is active or not. */ | 185 | * throttling is active or not. */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index a36de5bbb622..994230d4dc4e 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -110,7 +110,7 @@ struct pcc_cpu { | |||
110 | u32 output_offset; | 110 | u32 output_offset; |
111 | }; | 111 | }; |
112 | 112 | ||
113 | static struct pcc_cpu *pcc_cpu_info; | 113 | static struct pcc_cpu __percpu *pcc_cpu_info; |
114 | 114 | ||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | 115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) |
116 | { | 116 | { |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e5..4a45fd6e41ba 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy) | |||
569 | * We will then get the same kind of behaviour already tested under | 569 | * We will then get the same kind of behaviour already tested under |
570 | * the "well-known" other OS. | 570 | * the "well-known" other OS. |
571 | */ | 571 | */ |
572 | static int __init fixup_sgtc(void) | 572 | static int __cpuinit fixup_sgtc(void) |
573 | { | 573 | { |
574 | unsigned int sgtc; | 574 | unsigned int sgtc; |
575 | unsigned int m; | 575 | unsigned int m; |
@@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu) | |||
603 | } | 603 | } |
604 | 604 | ||
605 | 605 | ||
606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | 606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) |
607 | { | 607 | { |
608 | printk(KERN_WARNING PFX | 608 | printk(KERN_WARNING PFX |
609 | "%s laptop with broken PST tables in BIOS detected.\n", | 609 | "%s laptop with broken PST tables in BIOS detected.\n", |
@@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | |||
621 | * A BIOS update is all that can save them. | 621 | * A BIOS update is all that can save them. |
622 | * Mention this, and disable cpufreq. | 622 | * Mention this, and disable cpufreq. |
623 | */ | 623 | */ |
624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | 624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { |
625 | { | 625 | { |
626 | .callback = acer_cpufreq_pst, | 626 | .callback = acer_cpufreq_pst, |
627 | .ident = "Acer Aspire", | 627 | .ident = "Acer Aspire", |
@@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { | |||
633 | { } | 633 | { } |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) | 636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) |
637 | { | 637 | { |
638 | union msr_fidvidstatus fidvidstatus; | 638 | union msr_fidvidstatus fidvidstatus; |
639 | int result; | 639 | int result; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3e90cce3dc8b..491977baf6c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
10 | * (C) 2003 Dave Jones on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
12 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> |
13 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 14 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
15 | * | 15 | * |
@@ -806,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
806 | * www.amd.com | 806 | * www.amd.com |
807 | */ | 807 | */ |
808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | 808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); |
809 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
810 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
809 | return -ENODEV; | 811 | return -ENODEV; |
810 | } | 812 | } |
811 | 813 | ||
@@ -910,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
910 | { | 912 | { |
911 | int i; | 913 | int i; |
912 | u32 hi = 0, lo = 0; | 914 | u32 hi = 0, lo = 0; |
913 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 915 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); |
914 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 916 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
915 | 917 | ||
916 | for (i = 0; i < data->acpi_data.state_count; i++) { | 918 | for (i = 0; i < data->acpi_data.state_count; i++) { |
917 | u32 index; | 919 | u32 index; |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index dd531cc56a8f..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -34,6 +34,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
34 | { | 34 | { |
35 | &x86_hyper_vmware, | 35 | &x86_hyper_vmware, |
36 | &x86_hyper_ms_hyperv, | 36 | &x86_hyper_ms_hyperv, |
37 | #ifdef CONFIG_XEN_PVHVM | ||
38 | &x86_hyper_xen_hvm, | ||
39 | #endif | ||
37 | }; | 40 | }; |
38 | 41 | ||
39 | const struct hypervisor_x86 *x86_hyper; | 42 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 33eae2062cf5..898c2f4eab88 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -347,8 +347,8 @@ static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | |||
347 | return l3; | 347 | return l3; |
348 | } | 348 | } |
349 | 349 | ||
350 | static void __cpuinit | 350 | static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, |
351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | int index) |
352 | { | 352 | { |
353 | int node; | 353 | int node; |
354 | 354 | ||
@@ -396,20 +396,39 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | |||
396 | this_leaf->l3 = l3_caches[node]; | 396 | this_leaf->l3 = l3_caches[node]; |
397 | } | 397 | } |
398 | 398 | ||
399 | /* | ||
400 | * check whether a slot used for disabling an L3 index is occupied. | ||
401 | * @l3: L3 cache descriptor | ||
402 | * @slot: slot number (0..1) | ||
403 | * | ||
404 | * @returns: the disabled index if used or negative value if slot free. | ||
405 | */ | ||
406 | int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | ||
407 | { | ||
408 | unsigned int reg = 0; | ||
409 | |||
410 | pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); | ||
411 | |||
412 | /* check whether this slot is activated already */ | ||
413 | if (reg & (3UL << 30)) | ||
414 | return reg & 0xfff; | ||
415 | |||
416 | return -1; | ||
417 | } | ||
418 | |||
399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 419 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
400 | unsigned int slot) | 420 | unsigned int slot) |
401 | { | 421 | { |
402 | struct pci_dev *dev = this_leaf->l3->dev; | 422 | int index; |
403 | unsigned int reg = 0; | ||
404 | 423 | ||
405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 424 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
406 | return -EINVAL; | 425 | return -EINVAL; |
407 | 426 | ||
408 | if (!dev) | 427 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); |
409 | return -EINVAL; | 428 | if (index >= 0) |
429 | return sprintf(buf, "%d\n", index); | ||
410 | 430 | ||
411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); | 431 | return sprintf(buf, "FREE\n"); |
412 | return sprintf(buf, "0x%08x\n", reg); | ||
413 | } | 432 | } |
414 | 433 | ||
415 | #define SHOW_CACHE_DISABLE(slot) \ | 434 | #define SHOW_CACHE_DISABLE(slot) \ |
@@ -451,37 +470,74 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
451 | } | 470 | } |
452 | } | 471 | } |
453 | 472 | ||
454 | 473 | /* | |
455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 474 | * disable a L3 cache index by using a disable-slot |
456 | const char *buf, size_t count, | 475 | * |
457 | unsigned int slot) | 476 | * @l3: L3 cache descriptor |
477 | * @cpu: A CPU on the node containing the L3 cache | ||
478 | * @slot: slot number (0..1) | ||
479 | * @index: index to disable | ||
480 | * | ||
481 | * @return: 0 on success, error status on failure | ||
482 | */ | ||
483 | int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | ||
484 | unsigned long index) | ||
458 | { | 485 | { |
459 | struct pci_dev *dev = this_leaf->l3->dev; | 486 | int ret = 0; |
460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
461 | unsigned long val = 0; | ||
462 | 487 | ||
463 | #define SUBCACHE_MASK (3UL << 20) | 488 | #define SUBCACHE_MASK (3UL << 20) |
464 | #define SUBCACHE_INDEX 0xfff | 489 | #define SUBCACHE_INDEX 0xfff |
465 | 490 | ||
466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 491 | /* |
492 | * check whether this slot is already used or | ||
493 | * the index is already disabled | ||
494 | */ | ||
495 | ret = amd_get_l3_disable_slot(l3, slot); | ||
496 | if (ret >= 0) | ||
467 | return -EINVAL; | 497 | return -EINVAL; |
468 | 498 | ||
499 | /* | ||
500 | * check whether the other slot has disabled the | ||
501 | * same index already | ||
502 | */ | ||
503 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
504 | return -EINVAL; | ||
505 | |||
506 | /* do not allow writes outside of allowed bits */ | ||
507 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
508 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
509 | return -EINVAL; | ||
510 | |||
511 | amd_l3_disable_index(l3, cpu, slot, index); | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
517 | const char *buf, size_t count, | ||
518 | unsigned int slot) | ||
519 | { | ||
520 | unsigned long val = 0; | ||
521 | int cpu, err = 0; | ||
522 | |||
469 | if (!capable(CAP_SYS_ADMIN)) | 523 | if (!capable(CAP_SYS_ADMIN)) |
470 | return -EPERM; | 524 | return -EPERM; |
471 | 525 | ||
472 | if (!dev) | 526 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
473 | return -EINVAL; | 527 | return -EINVAL; |
474 | 528 | ||
475 | if (strict_strtoul(buf, 10, &val) < 0) | 529 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
476 | return -EINVAL; | ||
477 | 530 | ||
478 | /* do not allow writes outside of allowed bits */ | 531 | if (strict_strtoul(buf, 10, &val) < 0) |
479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) | ||
481 | return -EINVAL; | 532 | return -EINVAL; |
482 | 533 | ||
483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); | 534 | err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); |
484 | 535 | if (err) { | |
536 | if (err == -EEXIST) | ||
537 | printk(KERN_WARNING "L3 disable slot %d in use!\n", | ||
538 | slot); | ||
539 | return err; | ||
540 | } | ||
485 | return count; | 541 | return count; |
486 | } | 542 | } |
487 | 543 | ||
@@ -502,7 +558,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | |||
502 | 558 | ||
503 | #else /* CONFIG_CPU_SUP_AMD */ | 559 | #else /* CONFIG_CPU_SUP_AMD */ |
504 | static void __cpuinit | 560 | static void __cpuinit |
505 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) |
506 | { | 562 | { |
507 | }; | 563 | }; |
508 | #endif /* CONFIG_CPU_SUP_AMD */ | 564 | #endif /* CONFIG_CPU_SUP_AMD */ |
@@ -518,7 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
518 | 574 | ||
519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 575 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
520 | amd_cpuid4(index, &eax, &ebx, &ecx); | 576 | amd_cpuid4(index, &eax, &ebx, &ecx); |
521 | amd_check_l3_disable(index, this_leaf); | 577 | amd_check_l3_disable(this_leaf, index); |
522 | } else { | 578 | } else { |
523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 579 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
524 | } | 580 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18cc42562250..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -51,7 +51,7 @@ | |||
51 | static DEFINE_MUTEX(mce_read_mutex); | 51 | static DEFINE_MUTEX(mce_read_mutex); |
52 | 52 | ||
53 | #define rcu_dereference_check_mce(p) \ | 53 | #define rcu_dereference_check_mce(p) \ |
54 | rcu_dereference_check((p), \ | 54 | rcu_dereference_index_check((p), \ |
55 | rcu_read_lock_sched_held() || \ | 55 | rcu_read_lock_sched_held() || \ |
56 | lockdep_is_held(&mce_read_mutex)) | 56 | lockdep_is_held(&mce_read_mutex)) |
57 | 57 | ||
@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | |||
107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, |
108 | void *data) | 108 | void *data) |
109 | { | 109 | { |
110 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 110 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); |
111 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 111 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); |
112 | 112 | ||
113 | return NOTIFY_STOP; | 113 | return NOTIFY_STOP; |
114 | } | 114 | } |
@@ -211,11 +211,11 @@ void mce_log(struct mce *mce) | |||
211 | 211 | ||
212 | static void print_mce(struct mce *m) | 212 | static void print_mce(struct mce *m) |
213 | { | 213 | { |
214 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 214 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
215 | m->extcpu, m->mcgstatus, m->bank, m->status); | 215 | m->extcpu, m->mcgstatus, m->bank, m->status); |
216 | 216 | ||
217 | if (m->ip) { | 217 | if (m->ip) { |
218 | pr_emerg("RIP%s %02x:<%016Lx> ", | 218 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
220 | m->cs, m->ip); | 220 | m->cs, m->ip); |
221 | 221 | ||
@@ -224,14 +224,14 @@ static void print_mce(struct mce *m) | |||
224 | pr_cont("\n"); | 224 | pr_cont("\n"); |
225 | } | 225 | } |
226 | 226 | ||
227 | pr_emerg("TSC %llx ", m->tsc); | 227 | pr_emerg(HW_ERR "TSC %llx ", m->tsc); |
228 | if (m->addr) | 228 | if (m->addr) |
229 | pr_cont("ADDR %llx ", m->addr); | 229 | pr_cont("ADDR %llx ", m->addr); |
230 | if (m->misc) | 230 | if (m->misc) |
231 | pr_cont("MISC %llx ", m->misc); | 231 | pr_cont("MISC %llx ", m->misc); |
232 | 232 | ||
233 | pr_cont("\n"); | 233 | pr_cont("\n"); |
234 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 234 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
236 | 236 | ||
237 | /* | 237 | /* |
@@ -241,16 +241,6 @@ static void print_mce(struct mce *m) | |||
241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
242 | } | 242 | } |
243 | 243 | ||
244 | static void print_mce_head(void) | ||
245 | { | ||
246 | pr_emerg("\nHARDWARE ERROR\n"); | ||
247 | } | ||
248 | |||
249 | static void print_mce_tail(void) | ||
250 | { | ||
251 | pr_emerg("This is not a software problem!\n"); | ||
252 | } | ||
253 | |||
254 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
255 | 245 | ||
256 | static atomic_t mce_paniced; | 246 | static atomic_t mce_paniced; |
@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
291 | if (atomic_inc_return(&mce_fake_paniced) > 1) | 281 | if (atomic_inc_return(&mce_fake_paniced) > 1) |
292 | return; | 282 | return; |
293 | } | 283 | } |
294 | print_mce_head(); | ||
295 | /* First print corrected ones that are still unlogged */ | 284 | /* First print corrected ones that are still unlogged */ |
296 | for (i = 0; i < MCE_LOG_LEN; i++) { | 285 | for (i = 0; i < MCE_LOG_LEN; i++) { |
297 | struct mce *m = &mcelog.entry[i]; | 286 | struct mce *m = &mcelog.entry[i]; |
@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
322 | apei_err = apei_write_mce(final); | 311 | apei_err = apei_write_mce(final); |
323 | } | 312 | } |
324 | if (cpu_missing) | 313 | if (cpu_missing) |
325 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 314 | pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); |
326 | print_mce_tail(); | ||
327 | if (exp) | 315 | if (exp) |
328 | printk(KERN_EMERG "Machine check: %s\n", exp); | 316 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
329 | if (!fake_panic) { | 317 | if (!fake_panic) { |
330 | if (panic_timeout == 0) | 318 | if (panic_timeout == 0) |
331 | panic_timeout = mce_panic_timeout; | 319 | panic_timeout = mce_panic_timeout; |
332 | panic(msg); | 320 | panic(msg); |
333 | } else | 321 | } else |
334 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | 322 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
335 | } | 323 | } |
336 | 324 | ||
337 | /* Support code for software error injection */ | 325 | /* Support code for software error injection */ |
@@ -600,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
600 | */ | 588 | */ |
601 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 589 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
602 | mce_log(&m); | 590 | mce_log(&m); |
591 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
603 | add_taint(TAINT_MACHINE_CHECK); | 592 | add_taint(TAINT_MACHINE_CHECK); |
604 | } | 593 | } |
605 | 594 | ||
@@ -1220,7 +1209,7 @@ int mce_notify_irq(void) | |||
1220 | schedule_work(&mce_trigger_work); | 1209 | schedule_work(&mce_trigger_work); |
1221 | 1210 | ||
1222 | if (__ratelimit(&ratelimit)) | 1211 | if (__ratelimit(&ratelimit)) |
1223 | printk(KERN_INFO "Machine check events logged\n"); | 1212 | pr_info(HW_ERR "Machine check events logged\n"); |
1224 | 1213 | ||
1225 | return 1; | 1214 | return 1; |
1226 | } | 1215 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920a..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) | |||
95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
96 | 96 | ||
97 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
98 | if (val & CMCI_EN) { | 98 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
100 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 102 | continue; |
103 | } | 103 | } |
104 | 104 | ||
105 | val |= CMCI_EN | CMCI_THRESHOLD; | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | ||
106 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
107 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | 109 | ||
109 | /* Did the enable bit stick? -- the bank supports CMCI */ | 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
110 | if (val & CMCI_EN) { | 111 | if (val & MCI_CTL2_CMCI_EN) { |
111 | if (!test_and_set_bit(i, owned) && !boot) | 112 | if (!test_and_set_bit(i, owned) && !boot) |
112 | print_update("CMCI", &hdr, i); | 113 | print_update("CMCI", &hdr, i); |
113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
@@ -155,7 +156,7 @@ void cmci_clear(void) | |||
155 | continue; | 156 | continue; |
156 | /* Disable CMCI */ | 157 | /* Disable CMCI */ |
157 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
159 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
160 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
161 | } | 162 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e1a0a3bf9716..c2a8b26d4fea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,15 +34,25 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | #define THERMAL_THROTTLING_EVENT 0 | ||
38 | #define POWER_LIMIT_EVENT 1 | ||
39 | |||
37 | /* | 40 | /* |
38 | * Current thermal throttling state: | 41 | * Current thermal event state: |
39 | */ | 42 | */ |
40 | struct thermal_state { | 43 | struct _thermal_state { |
41 | bool is_throttled; | 44 | bool new_event; |
42 | 45 | int event; | |
43 | u64 next_check; | 46 | u64 next_check; |
44 | unsigned long throttle_count; | 47 | unsigned long count; |
45 | unsigned long last_throttle_count; | 48 | unsigned long last_count; |
49 | }; | ||
50 | |||
51 | struct thermal_state { | ||
52 | struct _thermal_state core_throttle; | ||
53 | struct _thermal_state core_power_limit; | ||
54 | struct _thermal_state package_throttle; | ||
55 | struct _thermal_state package_power_limit; | ||
46 | }; | 56 | }; |
47 | 57 | ||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; | |||
53 | 63 | ||
54 | #ifdef CONFIG_SYSFS | 64 | #ifdef CONFIG_SYSFS |
55 | #define define_therm_throt_sysdev_one_ro(_name) \ | 65 | #define define_therm_throt_sysdev_one_ro(_name) \ |
56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 66 | static SYSDEV_ATTR(_name, 0444, \ |
67 | therm_throt_sysdev_show_##_name, \ | ||
68 | NULL) \ | ||
57 | 69 | ||
58 | #define define_therm_throt_sysdev_show_func(name) \ | 70 | #define define_therm_throt_sysdev_show_func(event, name) \ |
59 | \ | 71 | \ |
60 | static ssize_t therm_throt_sysdev_show_##name( \ | 72 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ |
61 | struct sys_device *dev, \ | 73 | struct sys_device *dev, \ |
62 | struct sysdev_attribute *attr, \ | 74 | struct sysdev_attribute *attr, \ |
63 | char *buf) \ | 75 | char *buf) \ |
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ | |||
66 | ssize_t ret; \ | 78 | ssize_t ret; \ |
67 | \ | 79 | \ |
68 | preempt_disable(); /* CPU hotplug */ \ | 80 | preempt_disable(); /* CPU hotplug */ \ |
69 | if (cpu_online(cpu)) \ | 81 | if (cpu_online(cpu)) { \ |
70 | ret = sprintf(buf, "%lu\n", \ | 82 | ret = sprintf(buf, "%lu\n", \ |
71 | per_cpu(thermal_state, cpu).name); \ | 83 | per_cpu(thermal_state, cpu).event.name); \ |
72 | else \ | 84 | } else \ |
73 | ret = 0; \ | 85 | ret = 0; \ |
74 | preempt_enable(); \ | 86 | preempt_enable(); \ |
75 | \ | 87 | \ |
76 | return ret; \ | 88 | return ret; \ |
77 | } | 89 | } |
78 | 90 | ||
79 | define_therm_throt_sysdev_show_func(throttle_count); | 91 | define_therm_throt_sysdev_show_func(core_throttle, count); |
80 | define_therm_throt_sysdev_one_ro(throttle_count); | 92 | define_therm_throt_sysdev_one_ro(core_throttle_count); |
93 | |||
94 | define_therm_throt_sysdev_show_func(core_power_limit, count); | ||
95 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | ||
96 | |||
97 | define_therm_throt_sysdev_show_func(package_throttle, count); | ||
98 | define_therm_throt_sysdev_one_ro(package_throttle_count); | ||
99 | |||
100 | define_therm_throt_sysdev_show_func(package_power_limit, count); | ||
101 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | ||
81 | 102 | ||
82 | static struct attribute *thermal_throttle_attrs[] = { | 103 | static struct attribute *thermal_throttle_attrs[] = { |
83 | &attr_throttle_count.attr, | 104 | &attr_core_throttle_count.attr, |
84 | NULL | 105 | NULL |
85 | }; | 106 | }; |
86 | 107 | ||
87 | static struct attribute_group thermal_throttle_attr_group = { | 108 | static struct attribute_group thermal_attr_group = { |
88 | .attrs = thermal_throttle_attrs, | 109 | .attrs = thermal_throttle_attrs, |
89 | .name = "thermal_throttle" | 110 | .name = "thermal_throttle" |
90 | }; | 111 | }; |
91 | #endif /* CONFIG_SYSFS */ | 112 | #endif /* CONFIG_SYSFS */ |
92 | 113 | ||
114 | #define CORE_LEVEL 0 | ||
115 | #define PACKAGE_LEVEL 1 | ||
116 | |||
93 | /*** | 117 | /*** |
94 | * therm_throt_process - Process thermal throttling event from interrupt | 118 | * therm_throt_process - Process thermal throttling event from interrupt |
95 | * @curr: Whether the condition is current or not (boolean), since the | 119 | * @curr: Whether the condition is current or not (boolean), since the |
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
106 | * 1 : Event should be logged further, and a message has been | 130 | * 1 : Event should be logged further, and a message has been |
107 | * printed to the syslog. | 131 | * printed to the syslog. |
108 | */ | 132 | */ |
109 | static int therm_throt_process(bool is_throttled) | 133 | static int therm_throt_process(bool new_event, int event, int level) |
110 | { | 134 | { |
111 | struct thermal_state *state; | 135 | struct _thermal_state *state; |
112 | unsigned int this_cpu; | 136 | unsigned int this_cpu = smp_processor_id(); |
113 | bool was_throttled; | 137 | bool old_event; |
114 | u64 now; | 138 | u64 now; |
139 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
115 | 140 | ||
116 | this_cpu = smp_processor_id(); | ||
117 | now = get_jiffies_64(); | 141 | now = get_jiffies_64(); |
118 | state = &per_cpu(thermal_state, this_cpu); | 142 | if (level == CORE_LEVEL) { |
143 | if (event == THERMAL_THROTTLING_EVENT) | ||
144 | state = &pstate->core_throttle; | ||
145 | else if (event == POWER_LIMIT_EVENT) | ||
146 | state = &pstate->core_power_limit; | ||
147 | else | ||
148 | return 0; | ||
149 | } else if (level == PACKAGE_LEVEL) { | ||
150 | if (event == THERMAL_THROTTLING_EVENT) | ||
151 | state = &pstate->package_throttle; | ||
152 | else if (event == POWER_LIMIT_EVENT) | ||
153 | state = &pstate->package_power_limit; | ||
154 | else | ||
155 | return 0; | ||
156 | } else | ||
157 | return 0; | ||
119 | 158 | ||
120 | was_throttled = state->is_throttled; | 159 | old_event = state->new_event; |
121 | state->is_throttled = is_throttled; | 160 | state->new_event = new_event; |
122 | 161 | ||
123 | if (is_throttled) | 162 | if (new_event) |
124 | state->throttle_count++; | 163 | state->count++; |
125 | 164 | ||
126 | if (time_before64(now, state->next_check) && | 165 | if (time_before64(now, state->next_check) && |
127 | state->throttle_count != state->last_throttle_count) | 166 | state->count != state->last_count) |
128 | return 0; | 167 | return 0; |
129 | 168 | ||
130 | state->next_check = now + CHECK_INTERVAL; | 169 | state->next_check = now + CHECK_INTERVAL; |
131 | state->last_throttle_count = state->throttle_count; | 170 | state->last_count = state->count; |
132 | 171 | ||
133 | /* if we just entered the thermal event */ | 172 | /* if we just entered the thermal event */ |
134 | if (is_throttled) { | 173 | if (new_event) { |
135 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); | 174 | if (event == THERMAL_THROTTLING_EVENT) |
175 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", | ||
176 | this_cpu, | ||
177 | level == CORE_LEVEL ? "Core" : "Package", | ||
178 | state->count); | ||
179 | else | ||
180 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
181 | this_cpu, | ||
182 | level == CORE_LEVEL ? "Core" : "Package", | ||
183 | state->count); | ||
136 | 184 | ||
137 | add_taint(TAINT_MACHINE_CHECK); | 185 | add_taint(TAINT_MACHINE_CHECK); |
138 | return 1; | 186 | return 1; |
139 | } | 187 | } |
140 | if (was_throttled) { | 188 | if (old_event) { |
141 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); | 189 | if (event == THERMAL_THROTTLING_EVENT) |
190 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | ||
191 | this_cpu, | ||
192 | level == CORE_LEVEL ? "Core" : "Package"); | ||
193 | else | ||
194 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
195 | this_cpu, | ||
196 | level == CORE_LEVEL ? "Core" : "Package"); | ||
142 | return 1; | 197 | return 1; |
143 | } | 198 | } |
144 | 199 | ||
@@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled) | |||
149 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
150 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
151 | { | 206 | { |
152 | return sysfs_create_group(&sys_dev->kobj, | 207 | int err; |
153 | &thermal_throttle_attr_group); | 208 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); |
209 | |||
210 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | ||
211 | if (err) | ||
212 | return err; | ||
213 | |||
214 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
215 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
216 | &attr_core_power_limit_count.attr, | ||
217 | thermal_attr_group.name); | ||
218 | if (cpu_has(c, X86_FEATURE_PTS)) | ||
219 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
220 | &attr_package_throttle_count.attr, | ||
221 | thermal_attr_group.name); | ||
222 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
223 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
224 | &attr_package_power_limit_count.attr, | ||
225 | thermal_attr_group.name); | ||
226 | |||
227 | return err; | ||
154 | } | 228 | } |
155 | 229 | ||
156 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 230 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
157 | { | 231 | { |
158 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 232 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); |
159 | } | 233 | } |
160 | 234 | ||
161 | /* Mutex protecting device creation against CPU hotplug: */ | 235 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device); | |||
226 | 300 | ||
227 | #endif /* CONFIG_SYSFS */ | 301 | #endif /* CONFIG_SYSFS */ |
228 | 302 | ||
303 | /* | ||
304 | * Set up the most two significant bit to notify mce log that this thermal | ||
305 | * event type. | ||
306 | * This is a temp solution. May be changed in the future with mce log | ||
307 | * infrasture. | ||
308 | */ | ||
309 | #define CORE_THROTTLED (0) | ||
310 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
311 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
312 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
313 | |||
229 | /* Thermal transition interrupt handler */ | 314 | /* Thermal transition interrupt handler */ |
230 | static void intel_thermal_interrupt(void) | 315 | static void intel_thermal_interrupt(void) |
231 | { | 316 | { |
232 | __u64 msr_val; | 317 | __u64 msr_val; |
318 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
233 | 319 | ||
234 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 320 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
235 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) | 321 | |
236 | mce_log_therm_throt_event(msr_val); | 322 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
323 | THERMAL_THROTTLING_EVENT, | ||
324 | CORE_LEVEL) != 0) | ||
325 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | ||
326 | |||
327 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
328 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | ||
329 | POWER_LIMIT_EVENT, | ||
330 | CORE_LEVEL) != 0) | ||
331 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
332 | |||
333 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
334 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
335 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | ||
336 | THERMAL_THROTTLING_EVENT, | ||
337 | PACKAGE_LEVEL) != 0) | ||
338 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
339 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
340 | if (therm_throt_process(msr_val & | ||
341 | PACKAGE_THERM_STATUS_POWER_LIMIT, | ||
342 | POWER_LIMIT_EVENT, | ||
343 | PACKAGE_LEVEL) != 0) | ||
344 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
345 | | msr_val); | ||
346 | } | ||
237 | } | 347 | } |
238 | 348 | ||
239 | static void unexpected_thermal_interrupt(void) | 349 | static void unexpected_thermal_interrupt(void) |
@@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
335 | apic_write(APIC_LVTTHMR, h); | 445 | apic_write(APIC_LVTTHMR, h); |
336 | 446 | ||
337 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 447 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
338 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 448 | if (cpu_has(c, X86_FEATURE_PLN)) |
339 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 449 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
450 | l | (THERM_INT_LOW_ENABLE | ||
451 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | ||
452 | else | ||
453 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
454 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
455 | |||
456 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
457 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
458 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
459 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
460 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
461 | | PACKAGE_THERM_INT_HIGH_ENABLE | ||
462 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | ||
463 | else | ||
464 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
465 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
466 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | ||
467 | } | ||
340 | 468 | ||
341 | smp_thermal_vector = intel_thermal_interrupt; | 469 | smp_thermal_vector = intel_thermal_interrupt; |
342 | 470 | ||
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 16f41bbe46b6..d944bf6c50e9 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mshyperv.h> | 18 | #include <asm/mshyperv.h> |
19 | 19 | ||
20 | struct ms_hyperv_info ms_hyperv; | 20 | struct ms_hyperv_info ms_hyperv; |
21 | EXPORT_SYMBOL_GPL(ms_hyperv); | ||
21 | 22 | ||
22 | static bool __init ms_hyperv_platform(void) | 23 | static bool __init ms_hyperv_platform(void) |
23 | { | 24 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 06130b52f012..c5f59d071425 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i) | |||
632 | unsigned long gran_base, chunk_base, lose_base; | 632 | unsigned long gran_base, chunk_base, lose_base; |
633 | char gran_factor, chunk_factor, lose_factor; | 633 | char gran_factor, chunk_factor, lose_factor; |
634 | 634 | ||
635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); |
636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); |
637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); |
638 | 638 | ||
639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
640 | result[i].bad ? "*BAD*" : " ", | 640 | result[i].bad ? "*BAD*" : " ", |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fd31a441c61c..7d28d7d03885 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -433,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
433 | { | 433 | { |
434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
435 | unsigned int tmp, hi; | 435 | unsigned int tmp, hi; |
436 | int cpu; | ||
437 | 436 | ||
438 | /* | 437 | /* |
439 | * get_mtrr doesn't need to update mtrr_state, also it could be called | 438 | * get_mtrr doesn't need to update mtrr_state, also it could be called |
440 | * from any cpu, so try to print it out directly. | 439 | * from any cpu, so try to print it out directly. |
441 | */ | 440 | */ |
442 | cpu = get_cpu(); | 441 | get_cpu(); |
443 | 442 | ||
444 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 443 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
445 | 444 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b602..01c0f3ee6cc3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | 36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ |
37 | 37 | ||
38 | #include <linux/stop_machine.h> | ||
38 | #include <linux/kvm_para.h> | 39 | #include <linux/kvm_para.h> |
39 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
@@ -143,22 +144,28 @@ struct set_mtrr_data { | |||
143 | mtrr_type smp_type; | 144 | mtrr_type smp_type; |
144 | }; | 145 | }; |
145 | 146 | ||
147 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
148 | |||
146 | /** | 149 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 150 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | 151 | * @info: pointer to mtrr configuration data |
149 | * | 152 | * |
150 | * Returns nothing. | 153 | * Returns nothing. |
151 | */ | 154 | */ |
152 | static void ipi_handler(void *info) | 155 | static int mtrr_work_handler(void *info) |
153 | { | 156 | { |
154 | #ifdef CONFIG_SMP | 157 | #ifdef CONFIG_SMP |
155 | struct set_mtrr_data *data = info; | 158 | struct set_mtrr_data *data = info; |
156 | unsigned long flags; | 159 | unsigned long flags; |
157 | 160 | ||
161 | atomic_dec(&data->count); | ||
162 | while (!atomic_read(&data->gate)) | ||
163 | cpu_relax(); | ||
164 | |||
158 | local_irq_save(flags); | 165 | local_irq_save(flags); |
159 | 166 | ||
160 | atomic_dec(&data->count); | 167 | atomic_dec(&data->count); |
161 | while (!atomic_read(&data->gate)) | 168 | while (atomic_read(&data->gate)) |
162 | cpu_relax(); | 169 | cpu_relax(); |
163 | 170 | ||
164 | /* The master has cleared me to execute */ | 171 | /* The master has cleared me to execute */ |
@@ -173,12 +180,13 @@ static void ipi_handler(void *info) | |||
173 | } | 180 | } |
174 | 181 | ||
175 | atomic_dec(&data->count); | 182 | atomic_dec(&data->count); |
176 | while (atomic_read(&data->gate)) | 183 | while (!atomic_read(&data->gate)) |
177 | cpu_relax(); | 184 | cpu_relax(); |
178 | 185 | ||
179 | atomic_dec(&data->count); | 186 | atomic_dec(&data->count); |
180 | local_irq_restore(flags); | 187 | local_irq_restore(flags); |
181 | #endif | 188 | #endif |
189 | return 0; | ||
182 | } | 190 | } |
183 | 191 | ||
184 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) | 192 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
@@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
198 | * | 206 | * |
199 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 207 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
200 | * | 208 | * |
201 | * 1. Send IPI to do the following: | 209 | * 1. Queue work to do the following on all processors: |
202 | * 2. Disable Interrupts | 210 | * 2. Disable Interrupts |
203 | * 3. Wait for all procs to do so | 211 | * 3. Wait for all procs to do so |
204 | * 4. Enter no-fill cache mode | 212 | * 4. Enter no-fill cache mode |
@@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
215 | * 15. Enable interrupts. | 223 | * 15. Enable interrupts. |
216 | * | 224 | * |
217 | * What does that mean for us? Well, first we set data.count to the number | 225 | * What does that mean for us? Well, first we set data.count to the number |
218 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 226 | * of CPUs. As each CPU announces that it started the rendezvous handler by |
219 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 227 | * decrementing the count, We reset data.count and set the data.gate flag |
220 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 228 | * allowing all the cpu's to proceed with the work. As each cpu disables |
229 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | ||
230 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
231 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
221 | * CPU goes through the transition of updating MTRRs. | 232 | * CPU goes through the transition of updating MTRRs. |
222 | * The CPU vendors may each do it differently, | 233 | * The CPU vendors may each do it differently, |
223 | * so we call mtrr_if->set() callback and let them take care of it. | 234 | * so we call mtrr_if->set() callback and let them take care of it. |
224 | * When they're done, they again decrement data->count and wait for data.gate | 235 | * When they're done, they again decrement data->count and wait for data.gate |
225 | * to be reset. | 236 | * to be set. |
226 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | 237 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag |
227 | * Everyone then enables interrupts and we all continue on. | 238 | * Everyone then enables interrupts and we all continue on. |
228 | * | 239 | * |
@@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
234 | { | 245 | { |
235 | struct set_mtrr_data data; | 246 | struct set_mtrr_data data; |
236 | unsigned long flags; | 247 | unsigned long flags; |
248 | int cpu; | ||
249 | |||
250 | preempt_disable(); | ||
237 | 251 | ||
238 | data.smp_reg = reg; | 252 | data.smp_reg = reg; |
239 | data.smp_base = base; | 253 | data.smp_base = base; |
@@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
246 | atomic_set(&data.gate, 0); | 260 | atomic_set(&data.gate, 0); |
247 | 261 | ||
248 | /* Start the ball rolling on other CPUs */ | 262 | /* Start the ball rolling on other CPUs */ |
249 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 263 | for_each_online_cpu(cpu) { |
250 | panic("mtrr: timed out waiting for other CPUs\n"); | 264 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); |
265 | |||
266 | if (cpu == smp_processor_id()) | ||
267 | continue; | ||
268 | |||
269 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
270 | } | ||
251 | 271 | ||
252 | local_irq_save(flags); | ||
253 | 272 | ||
254 | while (atomic_read(&data.count)) | 273 | while (atomic_read(&data.count)) |
255 | cpu_relax(); | 274 | cpu_relax(); |
@@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
259 | smp_wmb(); | 278 | smp_wmb(); |
260 | atomic_set(&data.gate, 1); | 279 | atomic_set(&data.gate, 1); |
261 | 280 | ||
281 | local_irq_save(flags); | ||
282 | |||
283 | while (atomic_read(&data.count)) | ||
284 | cpu_relax(); | ||
285 | |||
286 | /* Ok, reset count and toggle gate */ | ||
287 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
288 | smp_wmb(); | ||
289 | atomic_set(&data.gate, 0); | ||
290 | |||
262 | /* Do our MTRR business */ | 291 | /* Do our MTRR business */ |
263 | 292 | ||
264 | /* | 293 | /* |
@@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
279 | 308 | ||
280 | atomic_set(&data.count, num_booting_cpus() - 1); | 309 | atomic_set(&data.count, num_booting_cpus() - 1); |
281 | smp_wmb(); | 310 | smp_wmb(); |
282 | atomic_set(&data.gate, 0); | 311 | atomic_set(&data.gate, 1); |
283 | 312 | ||
284 | /* | 313 | /* |
285 | * Wait here for everyone to have seen the gate change | 314 | * Wait here for everyone to have seen the gate change |
@@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
289 | cpu_relax(); | 318 | cpu_relax(); |
290 | 319 | ||
291 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
321 | preempt_enable(); | ||
292 | } | 322 | } |
293 | 323 | ||
294 | /** | 324 | /** |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -220,6 +220,7 @@ struct x86_pmu { | |||
220 | struct perf_event *event); | 220 | struct perf_event *event); |
221 | struct event_constraint *event_constraints; | 221 | struct event_constraint *event_constraints; |
222 | void (*quirks)(void); | 222 | void (*quirks)(void); |
223 | int perfctr_second_write; | ||
223 | 224 | ||
224 | int (*cpu_prepare)(int cpu); | 225 | int (*cpu_prepare)(int cpu); |
225 | void (*cpu_starting)(int cpu); | 226 | void (*cpu_starting)(int cpu); |
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) | |||
295 | * count to the generic event atomically: | 296 | * count to the generic event atomically: |
296 | */ | 297 | */ |
297 | again: | 298 | again: |
298 | prev_raw_count = atomic64_read(&hwc->prev_count); | 299 | prev_raw_count = local64_read(&hwc->prev_count); |
299 | rdmsrl(hwc->event_base + idx, new_raw_count); | 300 | rdmsrl(hwc->event_base + idx, new_raw_count); |
300 | 301 | ||
301 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 302 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
302 | new_raw_count) != prev_raw_count) | 303 | new_raw_count) != prev_raw_count) |
303 | goto again; | 304 | goto again; |
304 | 305 | ||
@@ -313,8 +314,8 @@ again: | |||
313 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 314 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
314 | delta >>= shift; | 315 | delta >>= shift; |
315 | 316 | ||
316 | atomic64_add(delta, &event->count); | 317 | local64_add(delta, &event->count); |
317 | atomic64_sub(delta, &hwc->period_left); | 318 | local64_sub(delta, &hwc->period_left); |
318 | 319 | ||
319 | return new_raw_count; | 320 | return new_raw_count; |
320 | } | 321 | } |
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
438 | if (!hwc->sample_period) { | 439 | if (!hwc->sample_period) { |
439 | hwc->sample_period = x86_pmu.max_period; | 440 | hwc->sample_period = x86_pmu.max_period; |
440 | hwc->last_period = hwc->sample_period; | 441 | hwc->last_period = hwc->sample_period; |
441 | atomic64_set(&hwc->period_left, hwc->sample_period); | 442 | local64_set(&hwc->period_left, hwc->sample_period); |
442 | } else { | 443 | } else { |
443 | /* | 444 | /* |
444 | * If we have a PMU initialized but no APIC | 445 | * If we have a PMU initialized but no APIC |
@@ -885,7 +886,7 @@ static int | |||
885 | x86_perf_event_set_period(struct perf_event *event) | 886 | x86_perf_event_set_period(struct perf_event *event) |
886 | { | 887 | { |
887 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
888 | s64 left = atomic64_read(&hwc->period_left); | 889 | s64 left = local64_read(&hwc->period_left); |
889 | s64 period = hwc->sample_period; | 890 | s64 period = hwc->sample_period; |
890 | int ret = 0, idx = hwc->idx; | 891 | int ret = 0, idx = hwc->idx; |
891 | 892 | ||
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) | |||
897 | */ | 898 | */ |
898 | if (unlikely(left <= -period)) { | 899 | if (unlikely(left <= -period)) { |
899 | left = period; | 900 | left = period; |
900 | atomic64_set(&hwc->period_left, left); | 901 | local64_set(&hwc->period_left, left); |
901 | hwc->last_period = period; | 902 | hwc->last_period = period; |
902 | ret = 1; | 903 | ret = 1; |
903 | } | 904 | } |
904 | 905 | ||
905 | if (unlikely(left <= 0)) { | 906 | if (unlikely(left <= 0)) { |
906 | left += period; | 907 | left += period; |
907 | atomic64_set(&hwc->period_left, left); | 908 | local64_set(&hwc->period_left, left); |
908 | hwc->last_period = period; | 909 | hwc->last_period = period; |
909 | ret = 1; | 910 | ret = 1; |
910 | } | 911 | } |
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) | |||
923 | * The hw event starts counting from this event offset, | 924 | * The hw event starts counting from this event offset, |
924 | * mark it to be able to extra future deltas: | 925 | * mark it to be able to extra future deltas: |
925 | */ | 926 | */ |
926 | atomic64_set(&hwc->prev_count, (u64)-left); | 927 | local64_set(&hwc->prev_count, (u64)-left); |
927 | 928 | ||
928 | wrmsrl(hwc->event_base + idx, | 929 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
930 | |||
931 | /* | ||
932 | * Due to erratum on certan cpu we need | ||
933 | * a second write to be sure the register | ||
934 | * is updated properly | ||
935 | */ | ||
936 | if (x86_pmu.perfctr_second_write) { | ||
937 | wrmsrl(hwc->event_base + idx, | ||
929 | (u64)(-left) & x86_pmu.cntval_mask); | 938 | (u64)(-left) & x86_pmu.cntval_mask); |
939 | } | ||
930 | 940 | ||
931 | perf_event_update_userpage(event); | 941 | perf_event_update_userpage(event); |
932 | 942 | ||
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
969 | * skip the schedulability test here, it will be peformed | 979 | * skip the schedulability test here, it will be peformed |
970 | * at commit time(->commit_txn) as a whole | 980 | * at commit time(->commit_txn) as a whole |
971 | */ | 981 | */ |
972 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 982 | if (cpuc->group_flag & PERF_EVENT_TXN) |
973 | goto out; | 983 | goto out; |
974 | 984 | ||
975 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 985 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1096 | * The events never got scheduled and ->cancel_txn will truncate | 1106 | * The events never got scheduled and ->cancel_txn will truncate |
1097 | * the event_list. | 1107 | * the event_list. |
1098 | */ | 1108 | */ |
1099 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 1109 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1100 | return; | 1110 | return; |
1101 | 1111 | ||
1102 | x86_pmu_stop(event); | 1112 | x86_pmu_stop(event); |
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
1388 | { | 1398 | { |
1389 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1399 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1390 | 1400 | ||
1391 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | 1401 | cpuc->group_flag |= PERF_EVENT_TXN; |
1392 | cpuc->n_txn = 0; | 1402 | cpuc->n_txn = 0; |
1393 | } | 1403 | } |
1394 | 1404 | ||
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
1401 | { | 1411 | { |
1402 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1412 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1403 | 1413 | ||
1404 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | 1414 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1405 | /* | 1415 | /* |
1406 | * Truncate the collected events. | 1416 | * Truncate the collected events. |
1407 | */ | 1417 | */ |
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
1435 | */ | 1445 | */ |
1436 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1446 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1437 | 1447 | ||
1438 | /* | 1448 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1439 | * Clear out the txn count so that ->cancel_txn() which gets | ||
1440 | * run after ->commit_txn() doesn't undo things. | ||
1441 | */ | ||
1442 | cpuc->n_txn = 0; | ||
1443 | 1449 | ||
1444 | return 0; | 1450 | return 0; |
1445 | } | 1451 | } |
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1607 | .walk_stack = print_context_stack_bp, | 1613 | .walk_stack = print_context_stack_bp, |
1608 | }; | 1614 | }; |
1609 | 1615 | ||
1610 | #include "../dumpstack.h" | ||
1611 | |||
1612 | static void | 1616 | static void |
1613 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1617 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1614 | { | 1618 | { |
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1730 | return entry; | 1734 | return entry; |
1731 | } | 1735 | } |
1732 | 1736 | ||
1733 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | ||
1734 | { | ||
1735 | regs->ip = ip; | ||
1736 | /* | ||
1737 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
1738 | * the skip level | ||
1739 | */ | ||
1740 | regs->bp = rewind_frame_pointer(skip + 1); | ||
1741 | regs->cs = __KERNEL_CS; | ||
1742 | /* | ||
1743 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
1744 | * and the comment with PERF_EFLAGS_EXACT. | ||
1745 | */ | ||
1746 | regs->flags = 0; | ||
1747 | } | ||
1748 | |||
1749 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1737 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
1750 | { | 1738 | { |
1751 | unsigned long ip; | 1739 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..febb12cea795 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -21,22 +21,36 @@ struct p4_event_bind { | |||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct p4_cache_event_bind { | 24 | struct p4_pebs_bind { |
25 | unsigned int metric_pebs; | 25 | unsigned int metric_pebs; |
26 | unsigned int metric_vert; | 26 | unsigned int metric_vert; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | 29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
30 | [P4_CACHE__##name] = { \ | 30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ |
31 | .metric_pebs = P4_PEBS__##name, \ | 31 | [P4_PEBS_METRIC__##name] = { \ |
32 | .metric_vert = P4_VERT__##name, \ | 32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ |
33 | .metric_vert = vert, \ | ||
33 | } | 34 | } |
34 | 35 | ||
35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | 36 | /* |
36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | 37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here |
37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | 38 | * |
38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | 39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of |
39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | 40 | * event configuration to find out which values are to be |
41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
42 | * resgisters | ||
43 | */ | ||
44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
40 | }; | 54 | }; |
41 | 55 | ||
42 | /* | 56 | /* |
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
281 | }, | 295 | }, |
282 | }; | 296 | }; |
283 | 297 | ||
284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | 298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | 299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | 300 | P4_ESCR_EMASK_BIT(event, bit)) | \ |
287 | p4_config_pack_cccr(cache_event | \ | 301 | p4_config_pack_cccr(metric | \ |
288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | 302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
289 | 303 | ||
290 | static __initconst const u64 p4_hw_cache_event_ids | 304 | static __initconst const u64 p4_hw_cache_event_ids |
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
296 | [ C(OP_READ) ] = { | 310 | [ C(OP_READ) ] = { |
297 | [ C(RESULT_ACCESS) ] = 0x0, | 311 | [ C(RESULT_ACCESS) ] = 0x0, |
298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
299 | P4_CACHE__1stl_cache_load_miss_retired), | 313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
300 | }, | 314 | }, |
301 | }, | 315 | }, |
302 | [ C(LL ) ] = { | 316 | [ C(LL ) ] = { |
303 | [ C(OP_READ) ] = { | 317 | [ C(OP_READ) ] = { |
304 | [ C(RESULT_ACCESS) ] = 0x0, | 318 | [ C(RESULT_ACCESS) ] = 0x0, |
305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
306 | P4_CACHE__2ndl_cache_load_miss_retired), | 320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
307 | }, | 321 | }, |
308 | }, | 322 | }, |
309 | [ C(DTLB) ] = { | 323 | [ C(DTLB) ] = { |
310 | [ C(OP_READ) ] = { | 324 | [ C(OP_READ) ] = { |
311 | [ C(RESULT_ACCESS) ] = 0x0, | 325 | [ C(RESULT_ACCESS) ] = 0x0, |
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
313 | P4_CACHE__dtlb_load_miss_retired), | 327 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
314 | }, | 328 | }, |
315 | [ C(OP_WRITE) ] = { | 329 | [ C(OP_WRITE) ] = { |
316 | [ C(RESULT_ACCESS) ] = 0x0, | 330 | [ C(RESULT_ACCESS) ] = 0x0, |
317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
318 | P4_CACHE__dtlb_store_miss_retired), | 332 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
319 | }, | 333 | }, |
320 | }, | 334 | }, |
321 | [ C(ITLB) ] = { | 335 | [ C(ITLB) ] = { |
322 | [ C(OP_READ) ] = { | 336 | [ C(OP_READ) ] = { |
323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | 337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
324 | P4_CACHE__itlb_reference_hit), | 338 | P4_PEBS_METRIC__none), |
325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | 339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
326 | P4_CACHE__itlb_reference_miss), | 340 | P4_PEBS_METRIC__none), |
327 | }, | 341 | }, |
328 | [ C(OP_WRITE) ] = { | 342 | [ C(OP_WRITE) ] = { |
329 | [ C(RESULT_ACCESS) ] = -1, | 343 | [ C(RESULT_ACCESS) ] = -1, |
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event) | |||
414 | return config; | 428 | return config; |
415 | } | 429 | } |
416 | 430 | ||
431 | static int p4_validate_raw_event(struct perf_event *event) | ||
432 | { | ||
433 | unsigned int v; | ||
434 | |||
435 | /* user data may have out-of-bound event index */ | ||
436 | v = p4_config_unpack_event(event->attr.config); | ||
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * it may have some screwed PEBS bits | ||
444 | */ | ||
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | v = p4_config_unpack_metric(event->attr.config); | ||
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
417 | static int p4_hw_config(struct perf_event *event) | 458 | static int p4_hw_config(struct perf_event *event) |
418 | { | 459 | { |
419 | int cpu = get_cpu(); | 460 | int cpu = get_cpu(); |
420 | int rc = 0; | 461 | int rc = 0; |
421 | unsigned int evnt; | ||
422 | u32 escr, cccr; | 462 | u32 escr, cccr; |
423 | 463 | ||
424 | /* | 464 | /* |
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event) | |||
438 | 478 | ||
439 | if (event->attr.type == PERF_TYPE_RAW) { | 479 | if (event->attr.type == PERF_TYPE_RAW) { |
440 | 480 | ||
441 | /* user data may have out-of-bound event index */ | 481 | rc = p4_validate_raw_event(event); |
442 | evnt = p4_config_unpack_event(event->attr.config); | 482 | if (rc) |
443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
444 | rc = -EINVAL; | ||
445 | goto out; | 483 | goto out; |
446 | } | ||
447 | 484 | ||
448 | /* | 485 | /* |
449 | * We don't control raw events so it's up to the caller | 486 | * We don't control raw events so it's up to the caller |
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event) | |||
451 | * on HT machine but allow HT-compatible specifics to be | 488 | * on HT machine but allow HT-compatible specifics to be |
452 | * passed on) | 489 | * passed on) |
453 | * | 490 | * |
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
492 | * bits since we keep additional info here (for cache events and etc) | ||
493 | * | ||
454 | * XXX: HT wide things should check perf_paranoid_cpu() && | 494 | * XXX: HT wide things should check perf_paranoid_cpu() && |
455 | * CAP_SYS_ADMIN | 495 | * CAP_SYS_ADMIN |
456 | */ | 496 | */ |
457 | event->hw.config |= event->attr.config & | 497 | event->hw.config |= event->attr.config & |
458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | |
459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
460 | } | 500 | } |
461 | 501 | ||
462 | rc = x86_setup_perfctr(event); | 502 | rc = x86_setup_perfctr(event); |
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
482 | return overflow; | 522 | return overflow; |
483 | } | 523 | } |
484 | 524 | ||
525 | static void p4_pmu_disable_pebs(void) | ||
526 | { | ||
527 | /* | ||
528 | * FIXME | ||
529 | * | ||
530 | * It's still allowed that two threads setup same cache | ||
531 | * events so we can't simply clear metrics until we knew | ||
532 | * noone is depending on us, so we need kind of counter | ||
533 | * for "ReplayEvent" users. | ||
534 | * | ||
535 | * What is more complex -- RAW events, if user (for some | ||
536 | * reason) will pass some cache event metric with improper | ||
537 | * event opcode -- it's fine from hardware point of view | ||
538 | * but completely nonsence from "meaning" of such action. | ||
539 | * | ||
540 | * So at moment let leave metrics turned on forever -- it's | ||
541 | * ok for now but need to be revisited! | ||
542 | * | ||
543 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
544 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
545 | */ | ||
546 | } | ||
547 | |||
485 | static inline void p4_pmu_disable_event(struct perf_event *event) | 548 | static inline void p4_pmu_disable_event(struct perf_event *event) |
486 | { | 549 | { |
487 | struct hw_perf_event *hwc = &event->hw; | 550 | struct hw_perf_event *hwc = &event->hw; |
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void) | |||
507 | continue; | 570 | continue; |
508 | p4_pmu_disable_event(event); | 571 | p4_pmu_disable_event(event); |
509 | } | 572 | } |
573 | |||
574 | p4_pmu_disable_pebs(); | ||
575 | } | ||
576 | |||
577 | /* configuration must be valid */ | ||
578 | static void p4_pmu_enable_pebs(u64 config) | ||
579 | { | ||
580 | struct p4_pebs_bind *bind; | ||
581 | unsigned int idx; | ||
582 | |||
583 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
584 | |||
585 | idx = p4_config_unpack_metric(config); | ||
586 | if (idx == P4_PEBS_METRIC__none) | ||
587 | return; | ||
588 | |||
589 | bind = &p4_pebs_bind_map[idx]; | ||
590 | |||
591 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
592 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
510 | } | 593 | } |
511 | 594 | ||
512 | static void p4_pmu_enable_event(struct perf_event *event) | 595 | static void p4_pmu_enable_event(struct perf_event *event) |
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
515 | int thread = p4_ht_config_thread(hwc->config); | 598 | int thread = p4_ht_config_thread(hwc->config); |
516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | 599 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); |
517 | unsigned int idx = p4_config_unpack_event(hwc->config); | 600 | unsigned int idx = p4_config_unpack_event(hwc->config); |
518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
519 | struct p4_event_bind *bind; | 601 | struct p4_event_bind *bind; |
520 | struct p4_cache_event_bind *bind_cache; | ||
521 | u64 escr_addr, cccr; | 602 | u64 escr_addr, cccr; |
522 | 603 | ||
523 | bind = &p4_event_bind_map[idx]; | 604 | bind = &p4_event_bind_map[idx]; |
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
537 | cccr = p4_config_unpack_cccr(hwc->config); | 618 | cccr = p4_config_unpack_cccr(hwc->config); |
538 | 619 | ||
539 | /* | 620 | /* |
540 | * it could be Cache event so that we need to | 621 | * it could be Cache event so we need to write metrics |
541 | * set metrics into additional MSRs | 622 | * into additional MSRs |
542 | */ | 623 | */ |
543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | 624 | p4_pmu_enable_pebs(hwc->config); |
544 | if (idx_cache > P4_CACHE__NONE && | ||
545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
549 | } | ||
550 | 625 | ||
551 | (void)checking_wrmsrl(escr_addr, escr_conf); | 626 | (void)checking_wrmsrl(escr_addr, escr_conf); |
552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 627 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
@@ -581,6 +656,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
581 | cpuc = &__get_cpu_var(cpu_hw_events); | 656 | cpuc = &__get_cpu_var(cpu_hw_events); |
582 | 657 | ||
583 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 658 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
659 | int overflow; | ||
584 | 660 | ||
585 | if (!test_bit(idx, cpuc->active_mask)) | 661 | if (!test_bit(idx, cpuc->active_mask)) |
586 | continue; | 662 | continue; |
@@ -591,12 +667,14 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
591 | WARN_ON_ONCE(hwc->idx != idx); | 667 | WARN_ON_ONCE(hwc->idx != idx); |
592 | 668 | ||
593 | /* it might be unflagged overflow */ | 669 | /* it might be unflagged overflow */ |
594 | handled = p4_pmu_clear_cccr_ovf(hwc); | 670 | overflow = p4_pmu_clear_cccr_ovf(hwc); |
595 | 671 | ||
596 | val = x86_perf_event_update(event); | 672 | val = x86_perf_event_update(event); |
597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) | 673 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
598 | continue; | 674 | continue; |
599 | 675 | ||
676 | handled += overflow; | ||
677 | |||
600 | /* event overflow for sure */ | 678 | /* event overflow for sure */ |
601 | data.period = event->hw.last_period; | 679 | data.period = event->hw.last_period; |
602 | 680 | ||
@@ -612,7 +690,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
612 | inc_irq_stat(apic_perf_irqs); | 690 | inc_irq_stat(apic_perf_irqs); |
613 | } | 691 | } |
614 | 692 | ||
615 | return handled; | 693 | return handled > 0; |
616 | } | 694 | } |
617 | 695 | ||
618 | /* | 696 | /* |
@@ -829,6 +907,15 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
829 | .max_period = (1ULL << 39) - 1, | 907 | .max_period = (1ULL << 39) - 1, |
830 | .hw_config = p4_hw_config, | 908 | .hw_config = p4_hw_config, |
831 | .schedule_events = p4_pmu_schedule_events, | 909 | .schedule_events = p4_pmu_schedule_events, |
910 | /* | ||
911 | * This handles erratum N15 in intel doc 249199-029, | ||
912 | * the counter may not be updated correctly on write | ||
913 | * so we need a second write operation to do the trick | ||
914 | * (the official workaround didn't work) | ||
915 | * | ||
916 | * the former idea is taken from OProfile code | ||
917 | */ | ||
918 | .perfctr_second_write = 1, | ||
832 | }; | 919 | }; |
833 | 920 | ||
834 | static __init int p4_pmu_init(void) | 921 | static __init int p4_pmu_init(void) |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c new file mode 100644 index 000000000000..34b4dad6f0b8 --- /dev/null +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Routines to indentify additional cpu features that are scattered in | ||
3 | * cpuid space. | ||
4 | */ | ||
5 | #include <linux/cpu.h> | ||
6 | |||
7 | #include <asm/pat.h> | ||
8 | #include <asm/processor.h> | ||
9 | |||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | u32 sub_leaf; | ||
18 | }; | ||
19 | |||
20 | enum cpuid_regs { | ||
21 | CR_EAX = 0, | ||
22 | CR_ECX, | ||
23 | CR_EDX, | ||
24 | CR_EBX | ||
25 | }; | ||
26 | |||
27 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
28 | { | ||
29 | u32 max_level; | ||
30 | u32 regs[4]; | ||
31 | const struct cpuid_bit *cb; | ||
32 | |||
33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
34 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | ||
35 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | ||
36 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | ||
37 | { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, | ||
38 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, | ||
39 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, | ||
40 | { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, | ||
41 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, | ||
42 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, | ||
43 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | ||
44 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | ||
45 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | ||
46 | { 0, 0, 0, 0, 0 } | ||
47 | }; | ||
48 | |||
49 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
50 | |||
51 | /* Verify that the level is valid */ | ||
52 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
53 | if (max_level < cb->level || | ||
54 | max_level > (cb->level | 0xffff)) | ||
55 | continue; | ||
56 | |||
57 | cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], | ||
58 | ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); | ||
59 | |||
60 | if (regs[cb->reg] & (1 << cb->bit)) | ||
61 | set_cpu_cap(c, cb->feature); | ||
62 | } | ||
63 | } | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c index 10fa5684a662..4397e987a1cf 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/topology.c | |||
@@ -1,62 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * Routines to indentify additional cpu features that are scattered in | 2 | * Check for extended topology enumeration cpuid leaf 0xb and if it |
3 | * cpuid space. | 3 | * exists, use it for populating initial_apicid and cpu topology |
4 | * detection. | ||
4 | */ | 5 | */ |
5 | #include <linux/cpu.h> | ||
6 | 6 | ||
7 | #include <linux/cpu.h> | ||
8 | #include <asm/apic.h> | ||
7 | #include <asm/pat.h> | 9 | #include <asm/pat.h> |
8 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
9 | 11 | ||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | }; | ||
18 | |||
19 | enum cpuid_regs { | ||
20 | CR_EAX = 0, | ||
21 | CR_ECX, | ||
22 | CR_EDX, | ||
23 | CR_EBX | ||
24 | }; | ||
25 | |||
26 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
27 | { | ||
28 | u32 max_level; | ||
29 | u32 regs[4]; | ||
30 | const struct cpuid_bit *cb; | ||
31 | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | ||
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | ||
35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, | ||
36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, | ||
37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | ||
38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | ||
39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
41 | { 0, 0, 0, 0 } | ||
42 | }; | ||
43 | |||
44 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
45 | |||
46 | /* Verify that the level is valid */ | ||
47 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
48 | if (max_level < cb->level || | ||
49 | max_level > (cb->level | 0xffff)) | ||
50 | continue; | ||
51 | |||
52 | cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], | ||
53 | ®s[CR_ECX], ®s[CR_EDX]); | ||
54 | |||
55 | if (regs[cb->reg] & (1 << cb->bit)) | ||
56 | set_cpu_cap(c, cb->feature); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | /* leaf 0xb SMT level */ | 12 | /* leaf 0xb SMT level */ |
61 | #define SMT_LEVEL 0 | 13 | #define SMT_LEVEL 0 |
62 | 14 | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index b9d1ff588445..227b0448960d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -51,7 +51,7 @@ static inline int __vmware_platform(void) | |||
51 | 51 | ||
52 | static unsigned long vmware_get_tsc_khz(void) | 52 | static unsigned long vmware_get_tsc_khz(void) |
53 | { | 53 | { |
54 | uint64_t tsc_hz; | 54 | uint64_t tsc_hz, lpj; |
55 | uint32_t eax, ebx, ecx, edx; | 55 | uint32_t eax, ebx, ecx, edx; |
56 | 56 | ||
57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
@@ -62,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void) | |||
62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", | 62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", |
63 | (unsigned long) tsc_hz / 1000, | 63 | (unsigned long) tsc_hz / 1000, |
64 | (unsigned long) tsc_hz % 1000); | 64 | (unsigned long) tsc_hz % 1000); |
65 | |||
66 | if (!preset_lpj) { | ||
67 | lpj = ((u64)tsc_hz * 1000); | ||
68 | do_div(lpj, HZ); | ||
69 | preset_lpj = lpj; | ||
70 | } | ||
71 | |||
65 | return tsc_hz; | 72 | return tsc_hz; |
66 | } | 73 | } |
67 | 74 | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ebd4c51d096a..764c7c2b1811 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | 30 | ||
31 | int in_crash_kexec; | ||
32 | |||
31 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
32 | 34 | ||
33 | static void kdump_nmi_callback(int cpu, struct die_args *args) | 35 | static void kdump_nmi_callback(int cpu, struct die_args *args) |
@@ -61,6 +63,7 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) | |||
61 | 63 | ||
62 | static void kdump_nmi_shootdown_cpus(void) | 64 | static void kdump_nmi_shootdown_cpus(void) |
63 | { | 65 | { |
66 | in_crash_kexec = 1; | ||
64 | nmi_shootdown_cpus(kdump_nmi_callback); | 67 | nmi_shootdown_cpus(kdump_nmi_callback); |
65 | 68 | ||
66 | disable_local_APIC(); | 69 | disable_local_APIC(); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386930b7..6e8752c1bd52 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -18,7 +18,6 @@ | |||
18 | 18 | ||
19 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
20 | 20 | ||
21 | #include "dumpstack.h" | ||
22 | 21 | ||
23 | int panic_on_unrecovered_nmi; | 22 | int panic_on_unrecovered_nmi; |
24 | int panic_on_io_nmi; | 23 | int panic_on_io_nmi; |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be4fd44..000000000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null | |||
@@ -1,56 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | #include <linux/uaccess.h> | ||
18 | |||
19 | extern void | ||
20 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
21 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
22 | |||
23 | extern void | ||
24 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern unsigned int code_bytes; | ||
28 | |||
29 | /* The form of the top of the frame on the stack */ | ||
30 | struct stack_frame { | ||
31 | struct stack_frame *next_frame; | ||
32 | unsigned long return_address; | ||
33 | }; | ||
34 | |||
35 | struct stack_frame_ia32 { | ||
36 | u32 next_frame; | ||
37 | u32 return_address; | ||
38 | }; | ||
39 | |||
40 | static inline unsigned long rewind_frame_pointer(int n) | ||
41 | { | ||
42 | struct stack_frame *frame; | ||
43 | |||
44 | get_bp(frame); | ||
45 | |||
46 | #ifdef CONFIG_FRAME_POINTER | ||
47 | while (n--) { | ||
48 | if (probe_kernel_address(&frame->next_frame, frame)) | ||
49 | break; | ||
50 | } | ||
51 | #endif | ||
52 | |||
53 | return (unsigned long)frame; | ||
54 | } | ||
55 | |||
56 | #endif /* DUMPSTACK_H */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a189d93..0f6376ffa2d9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -16,8 +16,6 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | ||
20 | |||
21 | 19 | ||
22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
23 | unsigned long *stack, unsigned long bp, | 21 | unsigned long *stack, unsigned long bp, |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1f05f3..57a21f11c791 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -16,7 +16,6 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | ||
20 | 19 | ||
21 | #define N_EXCEPTION_STACKS_END \ | 20 | #define N_EXCEPTION_STACKS_END \ |
22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | 21 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cd49141cf153..227d00920d2f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -611,14 +611,14 @@ ldt_ss: | |||
611 | * compensating for the offset by changing to the ESPFIX segment with | 611 | * compensating for the offset by changing to the ESPFIX segment with |
612 | * a base address that matches for the difference. | 612 | * a base address that matches for the difference. |
613 | */ | 613 | */ |
614 | #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) | ||
614 | mov %esp, %edx /* load kernel esp */ | 615 | mov %esp, %edx /* load kernel esp */ |
615 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | 616 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ |
616 | mov %dx, %ax /* eax: new kernel esp */ | 617 | mov %dx, %ax /* eax: new kernel esp */ |
617 | sub %eax, %edx /* offset (low word is 0) */ | 618 | sub %eax, %edx /* offset (low word is 0) */ |
618 | PER_CPU(gdt_page, %ebx) | ||
619 | shr $16, %edx | 619 | shr $16, %edx |
620 | mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ | 620 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
621 | mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ | 621 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
622 | pushl $__ESPFIX_SS | 622 | pushl $__ESPFIX_SS |
623 | CFI_ADJUST_CFA_OFFSET 4 | 623 | CFI_ADJUST_CFA_OFFSET 4 |
624 | push %eax /* new kernel esp */ | 624 | push %eax /* new kernel esp */ |
@@ -791,9 +791,8 @@ ptregs_clone: | |||
791 | * normal stack and adjusts ESP with the matching offset. | 791 | * normal stack and adjusts ESP with the matching offset. |
792 | */ | 792 | */ |
793 | /* fixup the stack */ | 793 | /* fixup the stack */ |
794 | PER_CPU(gdt_page, %ebx) | 794 | mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ |
795 | mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ | 795 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ |
796 | mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ | ||
797 | shl $16, %eax | 796 | shl $16, %eax |
798 | addl %esp, %eax /* the adjusted stack pointer */ | 797 | addl %esp, %eax /* the adjusted stack pointer */ |
799 | pushl $__KERNEL_DS | 798 | pushl $__KERNEL_DS |
@@ -914,7 +913,7 @@ ENTRY(simd_coprocessor_error) | |||
914 | .balign 4 | 913 | .balign 4 |
915 | .long 661b | 914 | .long 661b |
916 | .long 663f | 915 | .long 663f |
917 | .byte X86_FEATURE_XMM | 916 | .word X86_FEATURE_XMM |
918 | .byte 662b-661b | 917 | .byte 662b-661b |
919 | .byte 664f-663f | 918 | .byte 664f-663f |
920 | .previous | 919 | .previous |
@@ -1166,6 +1165,9 @@ ENTRY(xen_failsafe_callback) | |||
1166 | .previous | 1165 | .previous |
1167 | ENDPROC(xen_failsafe_callback) | 1166 | ENDPROC(xen_failsafe_callback) |
1168 | 1167 | ||
1168 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, | ||
1169 | xen_evtchn_do_upcall) | ||
1170 | |||
1169 | #endif /* CONFIG_XEN */ | 1171 | #endif /* CONFIG_XEN */ |
1170 | 1172 | ||
1171 | #ifdef CONFIG_FUNCTION_TRACER | 1173 | #ifdef CONFIG_FUNCTION_TRACER |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4db7c4d12ffa..17be5ec7cbba 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1065,6 +1065,7 @@ ENTRY(\sym) | |||
1065 | END(\sym) | 1065 | END(\sym) |
1066 | .endm | 1066 | .endm |
1067 | 1067 | ||
1068 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | ||
1068 | .macro paranoidzeroentry_ist sym do_sym ist | 1069 | .macro paranoidzeroentry_ist sym do_sym ist |
1069 | ENTRY(\sym) | 1070 | ENTRY(\sym) |
1070 | INTR_FRAME | 1071 | INTR_FRAME |
@@ -1076,10 +1077,9 @@ ENTRY(\sym) | |||
1076 | TRACE_IRQS_OFF | 1077 | TRACE_IRQS_OFF |
1077 | movq %rsp,%rdi /* pt_regs pointer */ | 1078 | movq %rsp,%rdi /* pt_regs pointer */ |
1078 | xorl %esi,%esi /* no error code */ | 1079 | xorl %esi,%esi /* no error code */ |
1079 | PER_CPU(init_tss, %r12) | 1080 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1080 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) | ||
1081 | call \do_sym | 1081 | call \do_sym |
1082 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) | 1082 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1083 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1083 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1084 | CFI_ENDPROC | 1084 | CFI_ENDPROC |
1085 | END(\sym) | 1085 | END(\sym) |
@@ -1185,13 +1185,13 @@ END(kernel_thread_helper) | |||
1185 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1185 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
1186 | * | 1186 | * |
1187 | * C extern interface: | 1187 | * C extern interface: |
1188 | * extern long execve(char *name, char **argv, char **envp) | 1188 | * extern long execve(const char *name, char **argv, char **envp) |
1189 | * | 1189 | * |
1190 | * asm input arguments: | 1190 | * asm input arguments: |
1191 | * rdi: name, rsi: argv, rdx: envp | 1191 | * rdi: name, rsi: argv, rdx: envp |
1192 | * | 1192 | * |
1193 | * We want to fallback into: | 1193 | * We want to fallback into: |
1194 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) | 1194 | * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) |
1195 | * | 1195 | * |
1196 | * do_sys_execve asm fallback arguments: | 1196 | * do_sys_execve asm fallback arguments: |
1197 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack | 1197 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack |
@@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback) | |||
1329 | CFI_ENDPROC | 1329 | CFI_ENDPROC |
1330 | END(xen_failsafe_callback) | 1330 | END(xen_failsafe_callback) |
1331 | 1331 | ||
1332 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ | ||
1333 | xen_hvm_callback_vector xen_evtchn_do_upcall | ||
1334 | |||
1332 | #endif /* CONFIG_XEN */ | 1335 | #endif /* CONFIG_XEN */ |
1333 | 1336 | ||
1334 | /* | 1337 | /* |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index b2e246037392..784360c0625c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | static void __init i386_default_early_setup(void) | 21 | static void __init i386_default_early_setup(void) |
22 | { | 22 | { |
23 | /* Initilize 32bit specific setup functions */ | 23 | /* Initialize 32bit specific setup functions */ |
24 | x86_init.resources.probe_roms = probe_roms; | 24 | x86_init.resources.probe_roms = probe_roms; |
25 | x86_init.resources.reserve_resources = i386_reserve_resources; | 25 | x86_init.resources.reserve_resources = i386_reserve_resources; |
26 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 26 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 37c3d4b17d85..ff4c453e13f3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -131,6 +131,12 @@ ENTRY(startup_32) | |||
131 | movsl | 131 | movsl |
132 | 1: | 132 | 1: |
133 | 133 | ||
134 | #ifdef CONFIG_OLPC_OPENFIRMWARE | ||
135 | /* save OFW's pgdir table for later use when calling into OFW */ | ||
136 | movl %cr3, %eax | ||
137 | movl %eax, pa(olpc_ofw_pgd) | ||
138 | #endif | ||
139 | |||
134 | #ifdef CONFIG_PARAVIRT | 140 | #ifdef CONFIG_PARAVIRT |
135 | /* This is can only trip for a broken bootloader... */ | 141 | /* This is can only trip for a broken bootloader... */ |
136 | cmpw $0x207, pa(boot_params + BP_version) | 142 | cmpw $0x207, pa(boot_params + BP_version) |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3d1e6f16b7a6..239046bd447f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -234,9 +234,8 @@ ENTRY(secondary_startup_64) | |||
234 | * init data section till per cpu areas are set up. | 234 | * init data section till per cpu areas are set up. |
235 | */ | 235 | */ |
236 | movl $MSR_GS_BASE,%ecx | 236 | movl $MSR_GS_BASE,%ecx |
237 | movq initial_gs(%rip),%rax | 237 | movl initial_gs(%rip),%eax |
238 | movq %rax,%rdx | 238 | movl initial_gs+4(%rip),%edx |
239 | shrq $32,%rdx | ||
240 | wrmsr | 239 | wrmsr |
241 | 240 | ||
242 | /* esi is pointer to real mode structure with interesting info. | 241 | /* esi is pointer to real mode structure with interesting info. |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba390d731175..351f9c0fea1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
17 | 17 | ||
18 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 18 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
19 | #define HPET_SHIFT 22 | ||
20 | 19 | ||
21 | /* FSEC = 10^-15 | 20 | /* FSEC = 10^-15 |
22 | NSEC = 10^-9 */ | 21 | NSEC = 10^-9 */ |
@@ -583,7 +582,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
583 | * scaled math multiplication factor for nanosecond to hpet tick | 582 | * scaled math multiplication factor for nanosecond to hpet tick |
584 | * conversion. | 583 | * conversion. |
585 | */ | 584 | */ |
586 | hpet_freq = 1000000000000000ULL; | 585 | hpet_freq = FSEC_PER_SEC; |
587 | do_div(hpet_freq, hpet_period); | 586 | do_div(hpet_freq, hpet_period); |
588 | evt->mult = div_sc((unsigned long) hpet_freq, | 587 | evt->mult = div_sc((unsigned long) hpet_freq, |
589 | NSEC_PER_SEC, evt->shift); | 588 | NSEC_PER_SEC, evt->shift); |
@@ -787,7 +786,6 @@ static struct clocksource clocksource_hpet = { | |||
787 | .rating = 250, | 786 | .rating = 250, |
788 | .read = read_hpet, | 787 | .read = read_hpet, |
789 | .mask = HPET_MASK, | 788 | .mask = HPET_MASK, |
790 | .shift = HPET_SHIFT, | ||
791 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 789 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
792 | .resume = hpet_resume_counter, | 790 | .resume = hpet_resume_counter, |
793 | #ifdef CONFIG_X86_64 | 791 | #ifdef CONFIG_X86_64 |
@@ -798,6 +796,7 @@ static struct clocksource clocksource_hpet = { | |||
798 | static int hpet_clocksource_register(void) | 796 | static int hpet_clocksource_register(void) |
799 | { | 797 | { |
800 | u64 start, now; | 798 | u64 start, now; |
799 | u64 hpet_freq; | ||
801 | cycle_t t1; | 800 | cycle_t t1; |
802 | 801 | ||
803 | /* Start the counter */ | 802 | /* Start the counter */ |
@@ -832,9 +831,15 @@ static int hpet_clocksource_register(void) | |||
832 | * mult = (hpet_period * 2^shift)/10^6 | 831 | * mult = (hpet_period * 2^shift)/10^6 |
833 | * mult = (hpet_period << shift)/FSEC_PER_NSEC | 832 | * mult = (hpet_period << shift)/FSEC_PER_NSEC |
834 | */ | 833 | */ |
835 | clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); | ||
836 | 834 | ||
837 | clocksource_register(&clocksource_hpet); | 835 | /* Need to convert hpet_period (fsec/cyc) to cyc/sec: |
836 | * | ||
837 | * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) | ||
838 | * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period | ||
839 | */ | ||
840 | hpet_freq = FSEC_PER_SEC; | ||
841 | do_div(hpet_freq, hpet_period); | ||
842 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); | ||
838 | 843 | ||
839 | return 0; | 844 | return 0; |
840 | } | 845 | } |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..a474ec37c32f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, | |||
208 | { | 208 | { |
209 | /* Len */ | 209 | /* Len */ |
210 | switch (x86_len) { | 210 | switch (x86_len) { |
211 | case X86_BREAKPOINT_LEN_X: | ||
212 | *gen_len = sizeof(long); | ||
213 | break; | ||
211 | case X86_BREAKPOINT_LEN_1: | 214 | case X86_BREAKPOINT_LEN_1: |
212 | *gen_len = HW_BREAKPOINT_LEN_1; | 215 | *gen_len = HW_BREAKPOINT_LEN_1; |
213 | break; | 216 | break; |
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
251 | 254 | ||
252 | info->address = bp->attr.bp_addr; | 255 | info->address = bp->attr.bp_addr; |
253 | 256 | ||
257 | /* Type */ | ||
258 | switch (bp->attr.bp_type) { | ||
259 | case HW_BREAKPOINT_W: | ||
260 | info->type = X86_BREAKPOINT_WRITE; | ||
261 | break; | ||
262 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
263 | info->type = X86_BREAKPOINT_RW; | ||
264 | break; | ||
265 | case HW_BREAKPOINT_X: | ||
266 | info->type = X86_BREAKPOINT_EXECUTE; | ||
267 | /* | ||
268 | * x86 inst breakpoints need to have a specific undefined len. | ||
269 | * But we still need to check userspace is not trying to setup | ||
270 | * an unsupported length, to get a range breakpoint for example. | ||
271 | */ | ||
272 | if (bp->attr.bp_len == sizeof(long)) { | ||
273 | info->len = X86_BREAKPOINT_LEN_X; | ||
274 | return 0; | ||
275 | } | ||
276 | default: | ||
277 | return -EINVAL; | ||
278 | } | ||
279 | |||
254 | /* Len */ | 280 | /* Len */ |
255 | switch (bp->attr.bp_len) { | 281 | switch (bp->attr.bp_len) { |
256 | case HW_BREAKPOINT_LEN_1: | 282 | case HW_BREAKPOINT_LEN_1: |
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
271 | return -EINVAL; | 297 | return -EINVAL; |
272 | } | 298 | } |
273 | 299 | ||
274 | /* Type */ | ||
275 | switch (bp->attr.bp_type) { | ||
276 | case HW_BREAKPOINT_W: | ||
277 | info->type = X86_BREAKPOINT_WRITE; | ||
278 | break; | ||
279 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
280 | info->type = X86_BREAKPOINT_RW; | ||
281 | break; | ||
282 | case HW_BREAKPOINT_X: | ||
283 | info->type = X86_BREAKPOINT_EXECUTE; | ||
284 | break; | ||
285 | default: | ||
286 | return -EINVAL; | ||
287 | } | ||
288 | |||
289 | return 0; | 300 | return 0; |
290 | } | 301 | } |
291 | /* | 302 | /* |
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
305 | ret = -EINVAL; | 316 | ret = -EINVAL; |
306 | 317 | ||
307 | switch (info->len) { | 318 | switch (info->len) { |
319 | case X86_BREAKPOINT_LEN_X: | ||
320 | align = sizeof(long) -1; | ||
321 | break; | ||
308 | case X86_BREAKPOINT_LEN_1: | 322 | case X86_BREAKPOINT_LEN_1: |
309 | align = 0; | 323 | align = 0; |
310 | break; | 324 | break; |
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
466 | 480 | ||
467 | perf_bp_event(bp, args->regs); | 481 | perf_bp_event(bp, args->regs); |
468 | 482 | ||
483 | /* | ||
484 | * Set up resume flag to avoid breakpoint recursion when | ||
485 | * returning back to origin. | ||
486 | */ | ||
487 | if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) | ||
488 | args->regs->flags |= X86_EFLAGS_RF; | ||
489 | |||
469 | rcu_read_unlock(); | 490 | rcu_read_unlock(); |
470 | } | 491 | } |
471 | /* | 492 | /* |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..1f11f5ce668f 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void) | |||
59 | stts(); | 59 | stts(); |
60 | } | 60 | } |
61 | 61 | ||
62 | void __cpuinit init_thread_xstate(void) | 62 | static void __cpuinit init_thread_xstate(void) |
63 | { | 63 | { |
64 | /* | ||
65 | * Note that xstate_size might be overwriten later during | ||
66 | * xsave_init(). | ||
67 | */ | ||
68 | |||
64 | if (!HAVE_HWFP) { | 69 | if (!HAVE_HWFP) { |
65 | xstate_size = sizeof(struct i387_soft_struct); | 70 | xstate_size = sizeof(struct i387_soft_struct); |
66 | return; | 71 | return; |
67 | } | 72 | } |
68 | 73 | ||
69 | if (cpu_has_xsave) { | ||
70 | xsave_cntxt_init(); | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | if (cpu_has_fxsr) | 74 | if (cpu_has_fxsr) |
75 | xstate_size = sizeof(struct i387_fxsave_struct); | 75 | xstate_size = sizeof(struct i387_fxsave_struct); |
76 | #ifdef CONFIG_X86_32 | 76 | #ifdef CONFIG_X86_32 |
@@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void) | |||
84 | * Called at bootup to set up the initial FPU state that is later cloned | 84 | * Called at bootup to set up the initial FPU state that is later cloned |
85 | * into all processes. | 85 | * into all processes. |
86 | */ | 86 | */ |
87 | |||
87 | void __cpuinit fpu_init(void) | 88 | void __cpuinit fpu_init(void) |
88 | { | 89 | { |
89 | unsigned long oldcr0 = read_cr0(); | 90 | unsigned long oldcr0 = read_cr0(); |
@@ -93,21 +94,26 @@ void __cpuinit fpu_init(void) | |||
93 | 94 | ||
94 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ | 95 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ |
95 | 96 | ||
96 | /* | ||
97 | * Boot processor to setup the FP and extended state context info. | ||
98 | */ | ||
99 | if (!smp_processor_id()) | 97 | if (!smp_processor_id()) |
100 | init_thread_xstate(); | 98 | init_thread_xstate(); |
101 | xsave_init(); | ||
102 | 99 | ||
103 | mxcsr_feature_mask_init(); | 100 | mxcsr_feature_mask_init(); |
104 | /* clean state in init */ | 101 | /* clean state in init */ |
105 | current_thread_info()->status = 0; | 102 | current_thread_info()->status = 0; |
106 | clear_used_math(); | 103 | clear_used_math(); |
107 | } | 104 | } |
108 | #endif /* CONFIG_X86_64 */ | ||
109 | 105 | ||
110 | static void fpu_finit(struct fpu *fpu) | 106 | #else /* CONFIG_X86_64 */ |
107 | |||
108 | void __cpuinit fpu_init(void) | ||
109 | { | ||
110 | if (!smp_processor_id()) | ||
111 | init_thread_xstate(); | ||
112 | } | ||
113 | |||
114 | #endif /* CONFIG_X86_32 */ | ||
115 | |||
116 | void fpu_finit(struct fpu *fpu) | ||
111 | { | 117 | { |
112 | #ifdef CONFIG_X86_32 | 118 | #ifdef CONFIG_X86_32 |
113 | if (!HAVE_HWFP) { | 119 | if (!HAVE_HWFP) { |
@@ -132,6 +138,7 @@ static void fpu_finit(struct fpu *fpu) | |||
132 | fp->fos = 0xffff0000u; | 138 | fp->fos = 0xffff0000u; |
133 | } | 139 | } |
134 | } | 140 | } |
141 | EXPORT_SYMBOL_GPL(fpu_finit); | ||
135 | 142 | ||
136 | /* | 143 | /* |
137 | * The _current_ task is using the FPU for the first time | 144 | * The _current_ task is using the FPU for the first time |
@@ -190,6 +197,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
190 | if (ret) | 197 | if (ret) |
191 | return ret; | 198 | return ret; |
192 | 199 | ||
200 | sanitize_i387_state(target); | ||
201 | |||
193 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 202 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
194 | &target->thread.fpu.state->fxsave, 0, -1); | 203 | &target->thread.fpu.state->fxsave, 0, -1); |
195 | } | 204 | } |
@@ -207,6 +216,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
207 | if (ret) | 216 | if (ret) |
208 | return ret; | 217 | return ret; |
209 | 218 | ||
219 | sanitize_i387_state(target); | ||
220 | |||
210 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 221 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
211 | &target->thread.fpu.state->fxsave, 0, -1); | 222 | &target->thread.fpu.state->fxsave, 0, -1); |
212 | 223 | ||
@@ -446,6 +457,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
446 | -1); | 457 | -1); |
447 | } | 458 | } |
448 | 459 | ||
460 | sanitize_i387_state(target); | ||
461 | |||
449 | if (kbuf && pos == 0 && count == sizeof(env)) { | 462 | if (kbuf && pos == 0 && count == sizeof(env)) { |
450 | convert_from_fxsr(kbuf, target); | 463 | convert_from_fxsr(kbuf, target); |
451 | return 0; | 464 | return 0; |
@@ -467,6 +480,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
467 | if (ret) | 480 | if (ret) |
468 | return ret; | 481 | return ret; |
469 | 482 | ||
483 | sanitize_i387_state(target); | ||
484 | |||
470 | if (!HAVE_HWFP) | 485 | if (!HAVE_HWFP) |
471 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 486 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
472 | 487 | ||
@@ -533,6 +548,9 @@ static int save_i387_xsave(void __user *buf) | |||
533 | struct _fpstate_ia32 __user *fx = buf; | 548 | struct _fpstate_ia32 __user *fx = buf; |
534 | int err = 0; | 549 | int err = 0; |
535 | 550 | ||
551 | |||
552 | sanitize_i387_state(tsk); | ||
553 | |||
536 | /* | 554 | /* |
537 | * For legacy compatible, we always set FP/SSE bits in the bit | 555 | * For legacy compatible, we always set FP/SSE bits in the bit |
538 | * vector while saving the state to the user context. | 556 | * vector while saving the state to the user context. |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 01ab17ae2ae7..ef10940e1af0 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -49,55 +49,94 @@ | |||
49 | #include <asm/system.h> | 49 | #include <asm/system.h> |
50 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
51 | 51 | ||
52 | /** | 52 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = |
53 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs | ||
54 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. | ||
55 | * @regs: The &struct pt_regs of the current process. | ||
56 | * | ||
57 | * Convert the pt_regs in @regs into the format for registers that | ||
58 | * GDB expects, stored in @gdb_regs. | ||
59 | */ | ||
60 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | ||
61 | { | 53 | { |
62 | #ifndef CONFIG_X86_32 | 54 | #ifdef CONFIG_X86_32 |
63 | u32 *gdb_regs32 = (u32 *)gdb_regs; | 55 | { "ax", 4, offsetof(struct pt_regs, ax) }, |
56 | { "cx", 4, offsetof(struct pt_regs, cx) }, | ||
57 | { "dx", 4, offsetof(struct pt_regs, dx) }, | ||
58 | { "bx", 4, offsetof(struct pt_regs, bx) }, | ||
59 | { "sp", 4, offsetof(struct pt_regs, sp) }, | ||
60 | { "bp", 4, offsetof(struct pt_regs, bp) }, | ||
61 | { "si", 4, offsetof(struct pt_regs, si) }, | ||
62 | { "di", 4, offsetof(struct pt_regs, di) }, | ||
63 | { "ip", 4, offsetof(struct pt_regs, ip) }, | ||
64 | { "flags", 4, offsetof(struct pt_regs, flags) }, | ||
65 | { "cs", 4, offsetof(struct pt_regs, cs) }, | ||
66 | { "ss", 4, offsetof(struct pt_regs, ss) }, | ||
67 | { "ds", 4, offsetof(struct pt_regs, ds) }, | ||
68 | { "es", 4, offsetof(struct pt_regs, es) }, | ||
69 | { "fs", 4, -1 }, | ||
70 | { "gs", 4, -1 }, | ||
71 | #else | ||
72 | { "ax", 8, offsetof(struct pt_regs, ax) }, | ||
73 | { "bx", 8, offsetof(struct pt_regs, bx) }, | ||
74 | { "cx", 8, offsetof(struct pt_regs, cx) }, | ||
75 | { "dx", 8, offsetof(struct pt_regs, dx) }, | ||
76 | { "si", 8, offsetof(struct pt_regs, dx) }, | ||
77 | { "di", 8, offsetof(struct pt_regs, di) }, | ||
78 | { "bp", 8, offsetof(struct pt_regs, bp) }, | ||
79 | { "sp", 8, offsetof(struct pt_regs, sp) }, | ||
80 | { "r8", 8, offsetof(struct pt_regs, r8) }, | ||
81 | { "r9", 8, offsetof(struct pt_regs, r9) }, | ||
82 | { "r10", 8, offsetof(struct pt_regs, r10) }, | ||
83 | { "r11", 8, offsetof(struct pt_regs, r11) }, | ||
84 | { "r12", 8, offsetof(struct pt_regs, r12) }, | ||
85 | { "r13", 8, offsetof(struct pt_regs, r13) }, | ||
86 | { "r14", 8, offsetof(struct pt_regs, r14) }, | ||
87 | { "r15", 8, offsetof(struct pt_regs, r15) }, | ||
88 | { "ip", 8, offsetof(struct pt_regs, ip) }, | ||
89 | { "flags", 4, offsetof(struct pt_regs, flags) }, | ||
90 | { "cs", 4, offsetof(struct pt_regs, cs) }, | ||
91 | { "ss", 4, offsetof(struct pt_regs, ss) }, | ||
64 | #endif | 92 | #endif |
65 | gdb_regs[GDB_AX] = regs->ax; | 93 | }; |
66 | gdb_regs[GDB_BX] = regs->bx; | 94 | |
67 | gdb_regs[GDB_CX] = regs->cx; | 95 | int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) |
68 | gdb_regs[GDB_DX] = regs->dx; | 96 | { |
69 | gdb_regs[GDB_SI] = regs->si; | 97 | if ( |
70 | gdb_regs[GDB_DI] = regs->di; | ||
71 | gdb_regs[GDB_BP] = regs->bp; | ||
72 | gdb_regs[GDB_PC] = regs->ip; | ||
73 | #ifdef CONFIG_X86_32 | 98 | #ifdef CONFIG_X86_32 |
74 | gdb_regs[GDB_PS] = regs->flags; | 99 | regno == GDB_SS || regno == GDB_FS || regno == GDB_GS || |
75 | gdb_regs[GDB_DS] = regs->ds; | 100 | #endif |
76 | gdb_regs[GDB_ES] = regs->es; | 101 | regno == GDB_SP || regno == GDB_ORIG_AX) |
77 | gdb_regs[GDB_CS] = regs->cs; | 102 | return 0; |
78 | gdb_regs[GDB_FS] = 0xFFFF; | 103 | |
79 | gdb_regs[GDB_GS] = 0xFFFF; | 104 | if (dbg_reg_def[regno].offset != -1) |
80 | if (user_mode_vm(regs)) { | 105 | memcpy((void *)regs + dbg_reg_def[regno].offset, mem, |
81 | gdb_regs[GDB_SS] = regs->ss; | 106 | dbg_reg_def[regno].size); |
82 | gdb_regs[GDB_SP] = regs->sp; | 107 | return 0; |
83 | } else { | 108 | } |
84 | gdb_regs[GDB_SS] = __KERNEL_DS; | 109 | |
85 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | 110 | char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) |
111 | { | ||
112 | if (regno == GDB_ORIG_AX) { | ||
113 | memcpy(mem, ®s->orig_ax, sizeof(regs->orig_ax)); | ||
114 | return "orig_ax"; | ||
86 | } | 115 | } |
87 | #else | 116 | if (regno >= DBG_MAX_REG_NUM || regno < 0) |
88 | gdb_regs[GDB_R8] = regs->r8; | 117 | return NULL; |
89 | gdb_regs[GDB_R9] = regs->r9; | 118 | |
90 | gdb_regs[GDB_R10] = regs->r10; | 119 | if (dbg_reg_def[regno].offset != -1) |
91 | gdb_regs[GDB_R11] = regs->r11; | 120 | memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, |
92 | gdb_regs[GDB_R12] = regs->r12; | 121 | dbg_reg_def[regno].size); |
93 | gdb_regs[GDB_R13] = regs->r13; | 122 | |
94 | gdb_regs[GDB_R14] = regs->r14; | 123 | switch (regno) { |
95 | gdb_regs[GDB_R15] = regs->r15; | 124 | #ifdef CONFIG_X86_32 |
96 | gdb_regs32[GDB_PS] = regs->flags; | 125 | case GDB_SS: |
97 | gdb_regs32[GDB_CS] = regs->cs; | 126 | if (!user_mode_vm(regs)) |
98 | gdb_regs32[GDB_SS] = regs->ss; | 127 | *(unsigned long *)mem = __KERNEL_DS; |
99 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | 128 | break; |
129 | case GDB_SP: | ||
130 | if (!user_mode_vm(regs)) | ||
131 | *(unsigned long *)mem = kernel_stack_pointer(regs); | ||
132 | break; | ||
133 | case GDB_GS: | ||
134 | case GDB_FS: | ||
135 | *(unsigned long *)mem = 0xFFFF; | ||
136 | break; | ||
100 | #endif | 137 | #endif |
138 | } | ||
139 | return dbg_reg_def[regno].name; | ||
101 | } | 140 | } |
102 | 141 | ||
103 | /** | 142 | /** |
@@ -150,54 +189,13 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
150 | gdb_regs[GDB_SP] = p->thread.sp; | 189 | gdb_regs[GDB_SP] = p->thread.sp; |
151 | } | 190 | } |
152 | 191 | ||
153 | /** | ||
154 | * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs. | ||
155 | * @gdb_regs: A pointer to hold the registers we've received from GDB. | ||
156 | * @regs: A pointer to a &struct pt_regs to hold these values in. | ||
157 | * | ||
158 | * Convert the GDB regs in @gdb_regs into the pt_regs, and store them | ||
159 | * in @regs. | ||
160 | */ | ||
161 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | ||
162 | { | ||
163 | #ifndef CONFIG_X86_32 | ||
164 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
165 | #endif | ||
166 | regs->ax = gdb_regs[GDB_AX]; | ||
167 | regs->bx = gdb_regs[GDB_BX]; | ||
168 | regs->cx = gdb_regs[GDB_CX]; | ||
169 | regs->dx = gdb_regs[GDB_DX]; | ||
170 | regs->si = gdb_regs[GDB_SI]; | ||
171 | regs->di = gdb_regs[GDB_DI]; | ||
172 | regs->bp = gdb_regs[GDB_BP]; | ||
173 | regs->ip = gdb_regs[GDB_PC]; | ||
174 | #ifdef CONFIG_X86_32 | ||
175 | regs->flags = gdb_regs[GDB_PS]; | ||
176 | regs->ds = gdb_regs[GDB_DS]; | ||
177 | regs->es = gdb_regs[GDB_ES]; | ||
178 | regs->cs = gdb_regs[GDB_CS]; | ||
179 | #else | ||
180 | regs->r8 = gdb_regs[GDB_R8]; | ||
181 | regs->r9 = gdb_regs[GDB_R9]; | ||
182 | regs->r10 = gdb_regs[GDB_R10]; | ||
183 | regs->r11 = gdb_regs[GDB_R11]; | ||
184 | regs->r12 = gdb_regs[GDB_R12]; | ||
185 | regs->r13 = gdb_regs[GDB_R13]; | ||
186 | regs->r14 = gdb_regs[GDB_R14]; | ||
187 | regs->r15 = gdb_regs[GDB_R15]; | ||
188 | regs->flags = gdb_regs32[GDB_PS]; | ||
189 | regs->cs = gdb_regs32[GDB_CS]; | ||
190 | regs->ss = gdb_regs32[GDB_SS]; | ||
191 | #endif | ||
192 | } | ||
193 | |||
194 | static struct hw_breakpoint { | 192 | static struct hw_breakpoint { |
195 | unsigned enabled; | 193 | unsigned enabled; |
196 | unsigned long addr; | 194 | unsigned long addr; |
197 | int len; | 195 | int len; |
198 | int type; | 196 | int type; |
199 | struct perf_event **pev; | 197 | struct perf_event **pev; |
200 | } breakinfo[4]; | 198 | } breakinfo[HBP_NUM]; |
201 | 199 | ||
202 | static unsigned long early_dr7; | 200 | static unsigned long early_dr7; |
203 | 201 | ||
@@ -205,7 +203,7 @@ static void kgdb_correct_hw_break(void) | |||
205 | { | 203 | { |
206 | int breakno; | 204 | int breakno; |
207 | 205 | ||
208 | for (breakno = 0; breakno < 4; breakno++) { | 206 | for (breakno = 0; breakno < HBP_NUM; breakno++) { |
209 | struct perf_event *bp; | 207 | struct perf_event *bp; |
210 | struct arch_hw_breakpoint *info; | 208 | struct arch_hw_breakpoint *info; |
211 | int val; | 209 | int val; |
@@ -292,10 +290,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
292 | { | 290 | { |
293 | int i; | 291 | int i; |
294 | 292 | ||
295 | for (i = 0; i < 4; i++) | 293 | for (i = 0; i < HBP_NUM; i++) |
296 | if (breakinfo[i].addr == addr && breakinfo[i].enabled) | 294 | if (breakinfo[i].addr == addr && breakinfo[i].enabled) |
297 | break; | 295 | break; |
298 | if (i == 4) | 296 | if (i == HBP_NUM) |
299 | return -1; | 297 | return -1; |
300 | 298 | ||
301 | if (hw_break_release_slot(i)) { | 299 | if (hw_break_release_slot(i)) { |
@@ -313,7 +311,7 @@ static void kgdb_remove_all_hw_break(void) | |||
313 | int cpu = raw_smp_processor_id(); | 311 | int cpu = raw_smp_processor_id(); |
314 | struct perf_event *bp; | 312 | struct perf_event *bp; |
315 | 313 | ||
316 | for (i = 0; i < 4; i++) { | 314 | for (i = 0; i < HBP_NUM; i++) { |
317 | if (!breakinfo[i].enabled) | 315 | if (!breakinfo[i].enabled) |
318 | continue; | 316 | continue; |
319 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
@@ -333,10 +331,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
333 | { | 331 | { |
334 | int i; | 332 | int i; |
335 | 333 | ||
336 | for (i = 0; i < 4; i++) | 334 | for (i = 0; i < HBP_NUM; i++) |
337 | if (!breakinfo[i].enabled) | 335 | if (!breakinfo[i].enabled) |
338 | break; | 336 | break; |
339 | if (i == 4) | 337 | if (i == HBP_NUM) |
340 | return -1; | 338 | return -1; |
341 | 339 | ||
342 | switch (bptype) { | 340 | switch (bptype) { |
@@ -397,7 +395,7 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
397 | 395 | ||
398 | /* Disable hardware debugging while we are in kgdb: */ | 396 | /* Disable hardware debugging while we are in kgdb: */ |
399 | set_debugreg(0UL, 7); | 397 | set_debugreg(0UL, 7); |
400 | for (i = 0; i < 4; i++) { | 398 | for (i = 0; i < HBP_NUM; i++) { |
401 | if (!breakinfo[i].enabled) | 399 | if (!breakinfo[i].enabled) |
402 | continue; | 400 | continue; |
403 | if (dbg_is_early) { | 401 | if (dbg_is_early) { |
@@ -458,7 +456,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
458 | { | 456 | { |
459 | unsigned long addr; | 457 | unsigned long addr; |
460 | char *ptr; | 458 | char *ptr; |
461 | int newPC; | ||
462 | 459 | ||
463 | switch (remcomInBuffer[0]) { | 460 | switch (remcomInBuffer[0]) { |
464 | case 'c': | 461 | case 'c': |
@@ -469,8 +466,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
469 | linux_regs->ip = addr; | 466 | linux_regs->ip = addr; |
470 | case 'D': | 467 | case 'D': |
471 | case 'k': | 468 | case 'k': |
472 | newPC = linux_regs->ip; | ||
473 | |||
474 | /* clear the trace bit */ | 469 | /* clear the trace bit */ |
475 | linux_regs->flags &= ~X86_EFLAGS_TF; | 470 | linux_regs->flags &= ~X86_EFLAGS_TF; |
476 | atomic_set(&kgdb_cpu_doing_single_step, -1); | 471 | atomic_set(&kgdb_cpu_doing_single_step, -1); |
@@ -645,7 +640,7 @@ void kgdb_arch_late(void) | |||
645 | attr.bp_len = HW_BREAKPOINT_LEN_1; | 640 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
646 | attr.bp_type = HW_BREAKPOINT_W; | 641 | attr.bp_type = HW_BREAKPOINT_W; |
647 | attr.disabled = 1; | 642 | attr.disabled = 1; |
648 | for (i = 0; i < 4; i++) { | 643 | for (i = 0; i < HBP_NUM; i++) { |
649 | if (breakinfo[i].pev) | 644 | if (breakinfo[i].pev) |
650 | continue; | 645 | continue; |
651 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 646 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 675879b65ce6..1bfb6cf4dd55 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to) | |||
126 | } | 126 | } |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Check for the REX prefix which can only exist on X86_64 | 129 | * Skip the prefixes of the instruction. |
130 | * X86_32 always returns 0 | ||
131 | */ | 130 | */ |
132 | static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) | 131 | static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) |
133 | { | 132 | { |
133 | insn_attr_t attr; | ||
134 | |||
135 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
136 | while (inat_is_legacy_prefix(attr)) { | ||
137 | insn++; | ||
138 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
139 | } | ||
134 | #ifdef CONFIG_X86_64 | 140 | #ifdef CONFIG_X86_64 |
135 | if ((*insn & 0xf0) == 0x40) | 141 | if (inat_is_rex_prefix(attr)) |
136 | return 1; | 142 | insn++; |
137 | #endif | 143 | #endif |
138 | return 0; | 144 | return insn; |
139 | } | 145 | } |
140 | 146 | ||
141 | /* | 147 | /* |
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr) | |||
272 | */ | 278 | */ |
273 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | 279 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) |
274 | { | 280 | { |
281 | /* Skip prefixes */ | ||
282 | insn = skip_prefixes(insn); | ||
283 | |||
275 | switch (*insn) { | 284 | switch (*insn) { |
276 | case 0xfa: /* cli */ | 285 | case 0xfa: /* cli */ |
277 | case 0xfb: /* sti */ | 286 | case 0xfb: /* sti */ |
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
280 | return 1; | 289 | return 1; |
281 | } | 290 | } |
282 | 291 | ||
283 | /* | ||
284 | * on X86_64, 0x40-0x4f are REX prefixes so we need to look | ||
285 | * at the next byte instead.. but of course not recurse infinitely | ||
286 | */ | ||
287 | if (is_REX_prefix(insn)) | ||
288 | return is_IF_modifier(++insn); | ||
289 | |||
290 | return 0; | 292 | return 0; |
291 | } | 293 | } |
292 | 294 | ||
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
803 | unsigned long orig_ip = (unsigned long)p->addr; | 805 | unsigned long orig_ip = (unsigned long)p->addr; |
804 | kprobe_opcode_t *insn = p->ainsn.insn; | 806 | kprobe_opcode_t *insn = p->ainsn.insn; |
805 | 807 | ||
806 | /*skip the REX prefix*/ | 808 | /* Skip prefixes */ |
807 | if (is_REX_prefix(insn)) | 809 | insn = skip_prefixes(insn); |
808 | insn++; | ||
809 | 810 | ||
810 | regs->flags &= ~X86_EFLAGS_TF; | 811 | regs->flags &= ~X86_EFLAGS_TF; |
811 | switch (*insn) { | 812 | switch (*insn) { |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d86dbf7e54be..d7b6f7fb4fec 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -274,6 +274,18 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) | |||
274 | 274 | ||
275 | void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } | 275 | void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } |
276 | 276 | ||
277 | static void __init smp_register_lapic_address(unsigned long address) | ||
278 | { | ||
279 | mp_lapic_addr = address; | ||
280 | |||
281 | set_fixmap_nocache(FIX_APIC_BASE, address); | ||
282 | if (boot_cpu_physical_apicid == -1U) { | ||
283 | boot_cpu_physical_apicid = read_apic_id(); | ||
284 | apic_version[boot_cpu_physical_apicid] = | ||
285 | GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
286 | } | ||
287 | } | ||
288 | |||
277 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | 289 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) |
278 | { | 290 | { |
279 | char str[16]; | 291 | char str[16]; |
@@ -295,6 +307,10 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
295 | if (early) | 307 | if (early) |
296 | return 1; | 308 | return 1; |
297 | 309 | ||
310 | /* Initialize the lapic mapping */ | ||
311 | if (!acpi_lapic) | ||
312 | smp_register_lapic_address(mpc->lapic); | ||
313 | |||
298 | if (mpc->oemptr) | 314 | if (mpc->oemptr) |
299 | x86_init.mpparse.smp_read_mpc_oem(mpc); | 315 | x86_init.mpparse.smp_read_mpc_oem(mpc); |
300 | 316 | ||
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 5915e0b33303..79ae68154e87 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -25,8 +25,34 @@ | |||
25 | #include <asm/i8259.h> | 25 | #include <asm/i8259.h> |
26 | #include <asm/apb_timer.h> | 26 | #include <asm/apb_timer.h> |
27 | 27 | ||
28 | /* | ||
29 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, | ||
30 | * cmdline option x86_mrst_timer can be used to override the configuration | ||
31 | * to prefer one or the other. | ||
32 | * at runtime, there are basically three timer configurations: | ||
33 | * 1. per cpu apbt clock only | ||
34 | * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only | ||
35 | * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. | ||
36 | * | ||
37 | * by default (without cmdline option), platform code first detects cpu type | ||
38 | * to see if we are on lincroft or penwell, then set up both lapic or apbt | ||
39 | * clocks accordingly. | ||
40 | * i.e. by default, medfield uses configuration #2, moorestown uses #1. | ||
41 | * config #3 is supported but not recommended on medfield. | ||
42 | * | ||
43 | * rating and feature summary: | ||
44 | * lapic (with C3STOP) --------- 100 | ||
45 | * apbt (always-on) ------------ 110 | ||
46 | * lapic (always-on,ARAT) ------ 150 | ||
47 | */ | ||
48 | |||
49 | __cpuinitdata enum mrst_timer_options mrst_timer_options; | ||
50 | |||
28 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; | 51 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; |
29 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; | 52 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; |
53 | enum mrst_cpu_type __mrst_cpu_chip; | ||
54 | EXPORT_SYMBOL_GPL(__mrst_cpu_chip); | ||
55 | |||
30 | int sfi_mtimer_num; | 56 | int sfi_mtimer_num; |
31 | 57 | ||
32 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; | 58 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; |
@@ -167,18 +193,6 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) | |||
167 | return 0; | 193 | return 0; |
168 | } | 194 | } |
169 | 195 | ||
170 | /* | ||
171 | * the secondary clock in Moorestown can be APBT or LAPIC clock, default to | ||
172 | * APBT but cmdline option can also override it. | ||
173 | */ | ||
174 | static void __cpuinit mrst_setup_secondary_clock(void) | ||
175 | { | ||
176 | /* restore default lapic clock if disabled by cmdline */ | ||
177 | if (disable_apbt_percpu) | ||
178 | return setup_secondary_APIC_clock(); | ||
179 | apbt_setup_secondary_clock(); | ||
180 | } | ||
181 | |||
182 | static unsigned long __init mrst_calibrate_tsc(void) | 196 | static unsigned long __init mrst_calibrate_tsc(void) |
183 | { | 197 | { |
184 | unsigned long flags, fast_calibrate; | 198 | unsigned long flags, fast_calibrate; |
@@ -195,6 +209,21 @@ static unsigned long __init mrst_calibrate_tsc(void) | |||
195 | 209 | ||
196 | void __init mrst_time_init(void) | 210 | void __init mrst_time_init(void) |
197 | { | 211 | { |
212 | switch (mrst_timer_options) { | ||
213 | case MRST_TIMER_APBT_ONLY: | ||
214 | break; | ||
215 | case MRST_TIMER_LAPIC_APBT: | ||
216 | x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; | ||
217 | x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; | ||
218 | break; | ||
219 | default: | ||
220 | if (!boot_cpu_has(X86_FEATURE_ARAT)) | ||
221 | break; | ||
222 | x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; | ||
223 | x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; | ||
224 | return; | ||
225 | } | ||
226 | /* we need at least one APB timer */ | ||
198 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | 227 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); |
199 | pre_init_apic_IRQ0(); | 228 | pre_init_apic_IRQ0(); |
200 | apbt_time_init(); | 229 | apbt_time_init(); |
@@ -205,16 +234,21 @@ void __init mrst_rtc_init(void) | |||
205 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | 234 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); |
206 | } | 235 | } |
207 | 236 | ||
208 | /* | 237 | void __cpuinit mrst_arch_setup(void) |
209 | * if we use per cpu apb timer, the bootclock already setup. if we use lapic | ||
210 | * timer and one apbt timer for broadcast, we need to set up lapic boot clock. | ||
211 | */ | ||
212 | static void __init mrst_setup_boot_clock(void) | ||
213 | { | 238 | { |
214 | pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); | 239 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) |
215 | if (disable_apbt_percpu) | 240 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; |
216 | setup_boot_APIC_clock(); | 241 | else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26) |
217 | }; | 242 | __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; |
243 | else { | ||
244 | pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", | ||
245 | boot_cpu_data.x86, boot_cpu_data.x86_model); | ||
246 | __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; | ||
247 | } | ||
248 | pr_debug("Moorestown CPU %s identified\n", | ||
249 | (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ? | ||
250 | "Lincroft" : "Penwell"); | ||
251 | } | ||
218 | 252 | ||
219 | /* MID systems don't have i8042 controller */ | 253 | /* MID systems don't have i8042 controller */ |
220 | static int mrst_i8042_detect(void) | 254 | static int mrst_i8042_detect(void) |
@@ -232,11 +266,13 @@ void __init x86_mrst_early_setup(void) | |||
232 | x86_init.resources.reserve_resources = x86_init_noop; | 266 | x86_init.resources.reserve_resources = x86_init_noop; |
233 | 267 | ||
234 | x86_init.timers.timer_init = mrst_time_init; | 268 | x86_init.timers.timer_init = mrst_time_init; |
235 | x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; | 269 | x86_init.timers.setup_percpu_clockev = x86_init_noop; |
236 | 270 | ||
237 | x86_init.irqs.pre_vector_init = x86_init_noop; | 271 | x86_init.irqs.pre_vector_init = x86_init_noop; |
238 | 272 | ||
239 | x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; | 273 | x86_init.oem.arch_setup = mrst_arch_setup; |
274 | |||
275 | x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; | ||
240 | 276 | ||
241 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; | 277 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; |
242 | x86_platform.i8042_detect = mrst_i8042_detect; | 278 | x86_platform.i8042_detect = mrst_i8042_detect; |
@@ -250,3 +286,26 @@ void __init x86_mrst_early_setup(void) | |||
250 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | 286 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
251 | 287 | ||
252 | } | 288 | } |
289 | |||
290 | /* | ||
291 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
292 | * than local apic timer and skip the late per cpu timer init. | ||
293 | */ | ||
294 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
295 | { | ||
296 | if (!arg) | ||
297 | return -EINVAL; | ||
298 | |||
299 | if (strcmp("apbt_only", arg) == 0) | ||
300 | mrst_timer_options = MRST_TIMER_APBT_ONLY; | ||
301 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
302 | mrst_timer_options = MRST_TIMER_LAPIC_APBT; | ||
303 | else { | ||
304 | pr_warning("X86 MRST timer option %s not recognised" | ||
305 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
306 | arg); | ||
307 | return -EINVAL; | ||
308 | } | ||
309 | return 0; | ||
310 | } | ||
311 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 8297160c41b3..0e0cdde519be 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
@@ -21,10 +21,7 @@ | |||
21 | #include <asm/geode.h> | 21 | #include <asm/geode.h> |
22 | #include <asm/setup.h> | 22 | #include <asm/setup.h> |
23 | #include <asm/olpc.h> | 23 | #include <asm/olpc.h> |
24 | 24 | #include <asm/olpc_ofw.h> | |
25 | #ifdef CONFIG_OPEN_FIRMWARE | ||
26 | #include <asm/ofw.h> | ||
27 | #endif | ||
28 | 25 | ||
29 | struct olpc_platform_t olpc_platform_info; | 26 | struct olpc_platform_t olpc_platform_info; |
30 | EXPORT_SYMBOL_GPL(olpc_platform_info); | 27 | EXPORT_SYMBOL_GPL(olpc_platform_info); |
@@ -145,7 +142,7 @@ restart: | |||
145 | * The OBF flag will sometimes misbehave due to what we believe | 142 | * The OBF flag will sometimes misbehave due to what we believe |
146 | * is a hardware quirk.. | 143 | * is a hardware quirk.. |
147 | */ | 144 | */ |
148 | printk(KERN_DEBUG "olpc-ec: running cmd 0x%x\n", cmd); | 145 | pr_devel("olpc-ec: running cmd 0x%x\n", cmd); |
149 | outb(cmd, 0x6c); | 146 | outb(cmd, 0x6c); |
150 | 147 | ||
151 | if (wait_on_ibf(0x6c, 0)) { | 148 | if (wait_on_ibf(0x6c, 0)) { |
@@ -162,8 +159,7 @@ restart: | |||
162 | " EC accept data!\n"); | 159 | " EC accept data!\n"); |
163 | goto err; | 160 | goto err; |
164 | } | 161 | } |
165 | printk(KERN_DEBUG "olpc-ec: sending cmd arg 0x%x\n", | 162 | pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); |
166 | inbuf[i]); | ||
167 | outb(inbuf[i], 0x68); | 163 | outb(inbuf[i], 0x68); |
168 | } | 164 | } |
169 | } | 165 | } |
@@ -176,8 +172,7 @@ restart: | |||
176 | goto restart; | 172 | goto restart; |
177 | } | 173 | } |
178 | outbuf[i] = inb(0x68); | 174 | outbuf[i] = inb(0x68); |
179 | printk(KERN_DEBUG "olpc-ec: received 0x%x\n", | 175 | pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); |
180 | outbuf[i]); | ||
181 | } | 176 | } |
182 | } | 177 | } |
183 | 178 | ||
@@ -188,14 +183,15 @@ err: | |||
188 | } | 183 | } |
189 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); | 184 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); |
190 | 185 | ||
191 | #ifdef CONFIG_OPEN_FIRMWARE | 186 | #ifdef CONFIG_OLPC_OPENFIRMWARE |
192 | static void __init platform_detect(void) | 187 | static void __init platform_detect(void) |
193 | { | 188 | { |
194 | size_t propsize; | 189 | size_t propsize; |
195 | __be32 rev; | 190 | __be32 rev; |
191 | const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; | ||
192 | void *res[] = { &propsize }; | ||
196 | 193 | ||
197 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, | 194 | if (olpc_ofw("getprop", args, res) || propsize != 4) { |
198 | &propsize) || propsize != 4) { | ||
199 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 195 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
200 | rev = cpu_to_be32(0); | 196 | rev = cpu_to_be32(0); |
201 | } | 197 | } |
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c new file mode 100644 index 000000000000..3218aa71ab5e --- /dev/null +++ b/arch/x86/kernel/olpc_ofw.c | |||
@@ -0,0 +1,106 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <asm/page.h> | ||
5 | #include <asm/setup.h> | ||
6 | #include <asm/io.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | #include <asm/olpc_ofw.h> | ||
9 | |||
10 | /* address of OFW callback interface; will be NULL if OFW isn't found */ | ||
11 | static int (*olpc_ofw_cif)(int *); | ||
12 | |||
13 | /* page dir entry containing OFW's pgdir table; filled in by head_32.S */ | ||
14 | u32 olpc_ofw_pgd __initdata; | ||
15 | |||
16 | static DEFINE_SPINLOCK(ofw_lock); | ||
17 | |||
18 | #define MAXARGS 10 | ||
19 | |||
20 | void __init setup_olpc_ofw_pgd(void) | ||
21 | { | ||
22 | pgd_t *base, *ofw_pde; | ||
23 | |||
24 | if (!olpc_ofw_cif) | ||
25 | return; | ||
26 | |||
27 | /* fetch OFW's PDE */ | ||
28 | base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); | ||
29 | if (!base) { | ||
30 | printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n"); | ||
31 | olpc_ofw_cif = NULL; | ||
32 | return; | ||
33 | } | ||
34 | ofw_pde = &base[OLPC_OFW_PDE_NR]; | ||
35 | |||
36 | /* install OFW's PDE permanently into the kernel's pgtable */ | ||
37 | set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde); | ||
38 | /* implicit optimization barrier here due to uninline function return */ | ||
39 | |||
40 | early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); | ||
41 | } | ||
42 | |||
43 | int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, | ||
44 | void **res) | ||
45 | { | ||
46 | int ofw_args[MAXARGS + 3]; | ||
47 | unsigned long flags; | ||
48 | int ret, i, *p; | ||
49 | |||
50 | BUG_ON(nr_args + nr_res > MAXARGS); | ||
51 | |||
52 | if (!olpc_ofw_cif) | ||
53 | return -EIO; | ||
54 | |||
55 | ofw_args[0] = (int)name; | ||
56 | ofw_args[1] = nr_args; | ||
57 | ofw_args[2] = nr_res; | ||
58 | |||
59 | p = &ofw_args[3]; | ||
60 | for (i = 0; i < nr_args; i++, p++) | ||
61 | *p = (int)args[i]; | ||
62 | |||
63 | /* call into ofw */ | ||
64 | spin_lock_irqsave(&ofw_lock, flags); | ||
65 | ret = olpc_ofw_cif(ofw_args); | ||
66 | spin_unlock_irqrestore(&ofw_lock, flags); | ||
67 | |||
68 | if (!ret) { | ||
69 | for (i = 0; i < nr_res; i++, p++) | ||
70 | *((int *)res[i]) = *p; | ||
71 | } | ||
72 | |||
73 | return ret; | ||
74 | } | ||
75 | EXPORT_SYMBOL_GPL(__olpc_ofw); | ||
76 | |||
77 | /* OFW cif _should_ be above this address */ | ||
78 | #define OFW_MIN 0xff000000 | ||
79 | |||
80 | /* OFW starts on a 1MB boundary */ | ||
81 | #define OFW_BOUND (1<<20) | ||
82 | |||
83 | void __init olpc_ofw_detect(void) | ||
84 | { | ||
85 | struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header; | ||
86 | unsigned long start; | ||
87 | |||
88 | /* ensure OFW booted us by checking for "OFW " string */ | ||
89 | if (hdr->ofw_magic != OLPC_OFW_SIG) | ||
90 | return; | ||
91 | |||
92 | olpc_ofw_cif = (int (*)(int *))hdr->cif_handler; | ||
93 | |||
94 | if ((unsigned long)olpc_ofw_cif < OFW_MIN) { | ||
95 | printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n", | ||
96 | (unsigned long)olpc_ofw_cif); | ||
97 | olpc_ofw_cif = NULL; | ||
98 | return; | ||
99 | } | ||
100 | |||
101 | /* determine where OFW starts in memory */ | ||
102 | start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND); | ||
103 | printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n", | ||
104 | (unsigned long)olpc_ofw_cif, (-start) >> 20); | ||
105 | reserve_top_address(-start); | ||
106 | } | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 4b7e3d8b01dd..9f07cfcbd3a5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
14 | #include <asm/amd_iommu.h> | 14 | #include <asm/amd_iommu.h> |
15 | #include <asm/x86_init.h> | 15 | #include <asm/x86_init.h> |
16 | #include <asm/xen/swiotlb-xen.h> | ||
16 | 17 | ||
17 | static int forbid_dac __read_mostly; | 18 | static int forbid_dac __read_mostly; |
18 | 19 | ||
@@ -132,7 +133,7 @@ void __init pci_iommu_alloc(void) | |||
132 | /* free the range so iommu could get some range less than 4G */ | 133 | /* free the range so iommu could get some range less than 4G */ |
133 | dma32_free_bootmem(); | 134 | dma32_free_bootmem(); |
134 | 135 | ||
135 | if (pci_swiotlb_detect()) | 136 | if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) |
136 | goto out; | 137 | goto out; |
137 | 138 | ||
138 | gart_iommu_hole_init(); | 139 | gart_iommu_hole_init(); |
@@ -144,6 +145,8 @@ void __init pci_iommu_alloc(void) | |||
144 | /* needs to be called after gart_iommu_hole_init */ | 145 | /* needs to be called after gart_iommu_hole_init */ |
145 | amd_iommu_detect(); | 146 | amd_iommu_detect(); |
146 | out: | 147 | out: |
148 | pci_xen_swiotlb_init(); | ||
149 | |||
147 | pci_swiotlb_init(); | 150 | pci_swiotlb_init(); |
148 | } | 151 | } |
149 | 152 | ||
@@ -296,7 +299,7 @@ static int __init pci_iommu_init(void) | |||
296 | #endif | 299 | #endif |
297 | x86_init.iommu.iommu_init(); | 300 | x86_init.iommu.iommu_init(); |
298 | 301 | ||
299 | if (swiotlb) { | 302 | if (swiotlb || xen_swiotlb) { |
300 | printk(KERN_INFO "PCI-DMA: " | 303 | printk(KERN_INFO "PCI-DMA: " |
301 | "Using software bounce buffering for IO (SWIOTLB)\n"); | 304 | "Using software bounce buffering for IO (SWIOTLB)\n"); |
302 | swiotlb_print_info(); | 305 | swiotlb_print_info(); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..64ecaf0af9af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -28,6 +28,7 @@ unsigned long idle_nomwait; | |||
28 | EXPORT_SYMBOL(idle_nomwait); | 28 | EXPORT_SYMBOL(idle_nomwait); |
29 | 29 | ||
30 | struct kmem_cache *task_xstate_cachep; | 30 | struct kmem_cache *task_xstate_cachep; |
31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | ||
31 | 32 | ||
32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
33 | { | 34 | { |
@@ -300,7 +301,7 @@ EXPORT_SYMBOL(kernel_thread); | |||
300 | /* | 301 | /* |
301 | * sys_execve() executes a new program. | 302 | * sys_execve() executes a new program. |
302 | */ | 303 | */ |
303 | long sys_execve(char __user *name, char __user * __user *argv, | 304 | long sys_execve(const char __user *name, char __user * __user *argv, |
304 | char __user * __user *envp, struct pt_regs *regs) | 305 | char __user * __user *envp, struct pt_regs *regs) |
305 | { | 306 | { |
306 | long error; | 307 | long error; |
@@ -371,7 +372,7 @@ static inline int hlt_use_halt(void) | |||
371 | void default_idle(void) | 372 | void default_idle(void) |
372 | { | 373 | { |
373 | if (hlt_use_halt()) { | 374 | if (hlt_use_halt()) { |
374 | trace_power_start(POWER_CSTATE, 1); | 375 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
375 | current_thread_info()->status &= ~TS_POLLING; | 376 | current_thread_info()->status &= ~TS_POLLING; |
376 | /* | 377 | /* |
377 | * TS_POLLING-cleared state must be visible before we | 378 | * TS_POLLING-cleared state must be visible before we |
@@ -441,7 +442,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
441 | */ | 442 | */ |
442 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 443 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
443 | { | 444 | { |
444 | trace_power_start(POWER_CSTATE, (ax>>4)+1); | 445 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); |
445 | if (!need_resched()) { | 446 | if (!need_resched()) { |
446 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 447 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
447 | clflush((void *)¤t_thread_info()->flags); | 448 | clflush((void *)¤t_thread_info()->flags); |
@@ -457,7 +458,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | |||
457 | static void mwait_idle(void) | 458 | static void mwait_idle(void) |
458 | { | 459 | { |
459 | if (!need_resched()) { | 460 | if (!need_resched()) { |
460 | trace_power_start(POWER_CSTATE, 1); | 461 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
461 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 462 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
462 | clflush((void *)¤t_thread_info()->flags); | 463 | clflush((void *)¤t_thread_info()->flags); |
463 | 464 | ||
@@ -478,7 +479,7 @@ static void mwait_idle(void) | |||
478 | */ | 479 | */ |
479 | static void poll_idle(void) | 480 | static void poll_idle(void) |
480 | { | 481 | { |
481 | trace_power_start(POWER_CSTATE, 0); | 482 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); |
482 | local_irq_enable(); | 483 | local_irq_enable(); |
483 | while (!need_resched()) | 484 | while (!need_resched()) |
484 | cpu_relax(); | 485 | cpu_relax(); |
@@ -525,44 +526,10 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |||
525 | return (edx & MWAIT_EDX_C1); | 526 | return (edx & MWAIT_EDX_C1); |
526 | } | 527 | } |
527 | 528 | ||
528 | /* | 529 | bool c1e_detected; |
529 | * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. | 530 | EXPORT_SYMBOL(c1e_detected); |
530 | * For more information see | ||
531 | * - Erratum #400 for NPT family 0xf and family 0x10 CPUs | ||
532 | * - Erratum #365 for family 0x11 (not affected because C1e not in use) | ||
533 | */ | ||
534 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | ||
535 | { | ||
536 | u64 val; | ||
537 | if (c->x86_vendor != X86_VENDOR_AMD) | ||
538 | goto no_c1e_idle; | ||
539 | |||
540 | /* Family 0x0f models < rev F do not have C1E */ | ||
541 | if (c->x86 == 0x0F && c->x86_model >= 0x40) | ||
542 | return 1; | ||
543 | |||
544 | if (c->x86 == 0x10) { | ||
545 | /* | ||
546 | * check OSVW bit for CPUs that are not affected | ||
547 | * by erratum #400 | ||
548 | */ | ||
549 | if (cpu_has(c, X86_FEATURE_OSVW)) { | ||
550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | ||
551 | if (val >= 2) { | ||
552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | ||
553 | if (!(val & BIT(1))) | ||
554 | goto no_c1e_idle; | ||
555 | } | ||
556 | } | ||
557 | return 1; | ||
558 | } | ||
559 | |||
560 | no_c1e_idle: | ||
561 | return 0; | ||
562 | } | ||
563 | 531 | ||
564 | static cpumask_var_t c1e_mask; | 532 | static cpumask_var_t c1e_mask; |
565 | static int c1e_detected; | ||
566 | 533 | ||
567 | void c1e_remove_cpu(int cpu) | 534 | void c1e_remove_cpu(int cpu) |
568 | { | 535 | { |
@@ -584,12 +551,12 @@ static void c1e_idle(void) | |||
584 | u32 lo, hi; | 551 | u32 lo, hi; |
585 | 552 | ||
586 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 553 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
554 | |||
587 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 555 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
588 | c1e_detected = 1; | 556 | c1e_detected = true; |
589 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 557 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
590 | mark_tsc_unstable("TSC halt in AMD C1E"); | 558 | mark_tsc_unstable("TSC halt in AMD C1E"); |
591 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 559 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
592 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
593 | } | 560 | } |
594 | } | 561 | } |
595 | 562 | ||
@@ -638,7 +605,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
638 | */ | 605 | */ |
639 | printk(KERN_INFO "using mwait in idle threads.\n"); | 606 | printk(KERN_INFO "using mwait in idle threads.\n"); |
640 | pm_idle = mwait_idle; | 607 | pm_idle = mwait_idle; |
641 | } else if (check_c1e_idle(c)) { | 608 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { |
609 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | ||
642 | printk(KERN_INFO "using C1E aware idle routine\n"); | 610 | printk(KERN_INFO "using C1E aware idle routine\n"); |
643 | pm_idle = c1e_idle; | 611 | pm_idle = c1e_idle; |
644 | } else | 612 | } else |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af47..96586c3cbbbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,6 +57,8 @@ | |||
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
59 | 59 | ||
60 | #include <trace/events/power.h> | ||
61 | |||
60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
61 | 63 | ||
62 | /* | 64 | /* |
@@ -111,6 +113,8 @@ void cpu_idle(void) | |||
111 | stop_critical_timings(); | 113 | stop_critical_timings(); |
112 | pm_idle(); | 114 | pm_idle(); |
113 | start_critical_timings(); | 115 | start_critical_timings(); |
116 | |||
117 | trace_power_end(smp_processor_id()); | ||
114 | } | 118 | } |
115 | tick_nohz_restart_sched_tick(); | 119 | tick_nohz_restart_sched_tick(); |
116 | preempt_enable_no_resched(); | 120 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a99f1f..3d9ea531ddd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
53 | 53 | ||
54 | #include <trace/events/power.h> | ||
55 | |||
54 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
55 | 57 | ||
56 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
@@ -138,6 +140,9 @@ void cpu_idle(void) | |||
138 | stop_critical_timings(); | 140 | stop_critical_timings(); |
139 | pm_idle(); | 141 | pm_idle(); |
140 | start_critical_timings(); | 142 | start_critical_timings(); |
143 | |||
144 | trace_power_end(smp_processor_id()); | ||
145 | |||
141 | /* In many cases the interrupt that ended idle | 146 | /* In many cases the interrupt that ended idle |
142 | has already called exit_idle. But some idle | 147 | has already called exit_idle. But some idle |
143 | loops can be woken up without interrupt. */ | 148 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4acbd031..b008e7883207 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -102,6 +102,7 @@ | |||
102 | 102 | ||
103 | #include <asm/paravirt.h> | 103 | #include <asm/paravirt.h> |
104 | #include <asm/hypervisor.h> | 104 | #include <asm/hypervisor.h> |
105 | #include <asm/olpc_ofw.h> | ||
105 | 106 | ||
106 | #include <asm/percpu.h> | 107 | #include <asm/percpu.h> |
107 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
@@ -736,10 +737,15 @@ void __init setup_arch(char **cmdline_p) | |||
736 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 737 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
737 | vmi_init(); | 738 | vmi_init(); |
738 | 739 | ||
740 | /* OFW also may relocate the fixmap */ | ||
741 | olpc_ofw_detect(); | ||
742 | |||
739 | early_trap_init(); | 743 | early_trap_init(); |
740 | early_cpu_init(); | 744 | early_cpu_init(); |
741 | early_ioremap_init(); | 745 | early_ioremap_init(); |
742 | 746 | ||
747 | setup_olpc_ofw_pgd(); | ||
748 | |||
743 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 749 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
744 | screen_info = boot_params.screen_info; | 750 | screen_info = boot_params.screen_info; |
745 | edid_info = boot_params.edid_info; | 751 | edid_info = boot_params.edid_info; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c4f33b2e77d6..a5e928b0cb5f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -735,12 +735,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
735 | goto do_rest; | 735 | goto do_rest; |
736 | } | 736 | } |
737 | 737 | ||
738 | if (!keventd_up() || current_is_keventd()) | 738 | schedule_work(&c_idle.work); |
739 | c_idle.work.func(&c_idle.work); | 739 | wait_for_completion(&c_idle.done); |
740 | else { | ||
741 | schedule_work(&c_idle.work); | ||
742 | wait_for_completion(&c_idle.done); | ||
743 | } | ||
744 | 740 | ||
745 | if (IS_ERR(c_idle.idle)) { | 741 | if (IS_ERR(c_idle.idle)) { |
746 | printk("failed fork for CPU %d\n", cpu); | 742 | printk("failed fork for CPU %d\n", cpu); |
@@ -816,6 +812,13 @@ do_rest: | |||
816 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) | 812 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
817 | break; /* It has booted */ | 813 | break; /* It has booted */ |
818 | udelay(100); | 814 | udelay(100); |
815 | /* | ||
816 | * Allow other tasks to run while we wait for the | ||
817 | * AP to come online. This also gives a chance | ||
818 | * for the MTRR work(triggered by the AP coming online) | ||
819 | * to be completed in the stop machine context. | ||
820 | */ | ||
821 | schedule(); | ||
819 | } | 822 | } |
820 | 823 | ||
821 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) | 824 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefbb3f6c..b53c525368a7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name) | |||
23 | return 0; | 23 | return 0; |
24 | } | 24 | } |
25 | 25 | ||
26 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 26 | static void |
27 | __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) | ||
27 | { | 28 | { |
28 | struct stack_trace *trace = data; | 29 | struct stack_trace *trace = data; |
30 | #ifdef CONFIG_FRAME_POINTER | ||
29 | if (!reliable) | 31 | if (!reliable) |
30 | return; | 32 | return; |
33 | #endif | ||
34 | if (nosched && in_sched_functions(addr)) | ||
35 | return; | ||
31 | if (trace->skip > 0) { | 36 | if (trace->skip > 0) { |
32 | trace->skip--; | 37 | trace->skip--; |
33 | return; | 38 | return; |
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) | |||
36 | trace->entries[trace->nr_entries++] = addr; | 41 | trace->entries[trace->nr_entries++] = addr; |
37 | } | 42 | } |
38 | 43 | ||
44 | static void save_stack_address(void *data, unsigned long addr, int reliable) | ||
45 | { | ||
46 | return __save_stack_address(data, addr, reliable, false); | ||
47 | } | ||
48 | |||
39 | static void | 49 | static void |
40 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 50 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) |
41 | { | 51 | { |
42 | struct stack_trace *trace = (struct stack_trace *)data; | 52 | return __save_stack_address(data, addr, reliable, true); |
43 | if (!reliable) | ||
44 | return; | ||
45 | if (in_sched_functions(addr)) | ||
46 | return; | ||
47 | if (trace->skip > 0) { | ||
48 | trace->skip--; | ||
49 | return; | ||
50 | } | ||
51 | if (trace->nr_entries < trace->max_entries) | ||
52 | trace->entries[trace->nr_entries++] = addr; | ||
53 | } | 53 | } |
54 | 54 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 55 | static const struct stacktrace_ops save_stack_ops = { |
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | |||
96 | 96 | ||
97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | 97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ |
98 | 98 | ||
99 | struct stack_frame { | 99 | struct stack_frame_user { |
100 | const void __user *next_fp; | 100 | const void __user *next_fp; |
101 | unsigned long ret_addr; | 101 | unsigned long ret_addr; |
102 | }; | 102 | }; |
103 | 103 | ||
104 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 104 | static int |
105 | copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) | ||
105 | { | 106 | { |
106 | int ret; | 107 | int ret; |
107 | 108 | ||
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace) | |||
126 | trace->entries[trace->nr_entries++] = regs->ip; | 127 | trace->entries[trace->nr_entries++] = regs->ip; |
127 | 128 | ||
128 | while (trace->nr_entries < trace->max_entries) { | 129 | while (trace->nr_entries < trace->max_entries) { |
129 | struct stack_frame frame; | 130 | struct stack_frame_user frame; |
130 | 131 | ||
131 | frame.next_fp = NULL; | 132 | frame.next_fp = NULL; |
132 | frame.ret_addr = 0; | 133 | frame.ret_addr = 0; |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8b3729341216..b35786dc9b8f 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -337,3 +337,6 @@ ENTRY(sys_call_table) | |||
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_event_open | 338 | .long sys_perf_event_open |
339 | .long sys_recvmmsg | 339 | .long sys_recvmmsg |
340 | .long sys_fanotify_init | ||
341 | .long sys_fanotify_mark | ||
342 | .long sys_prlimit64 /* 340 */ | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 7fea555929e2..312ef0292815 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -8,6 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/debugfs.h> | ||
11 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
12 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
13 | 14 | ||
@@ -22,19 +23,37 @@ | |||
22 | #include <asm/irq_vectors.h> | 23 | #include <asm/irq_vectors.h> |
23 | #include <asm/timer.h> | 24 | #include <asm/timer.h> |
24 | 25 | ||
25 | struct msg_desc { | 26 | /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ |
26 | struct bau_payload_queue_entry *msg; | 27 | static int timeout_base_ns[] = { |
27 | int msg_slot; | 28 | 20, |
28 | int sw_ack_slot; | 29 | 160, |
29 | struct bau_payload_queue_entry *va_queue_first; | 30 | 1280, |
30 | struct bau_payload_queue_entry *va_queue_last; | 31 | 10240, |
32 | 81920, | ||
33 | 655360, | ||
34 | 5242880, | ||
35 | 167772160 | ||
31 | }; | 36 | }; |
32 | 37 | static int timeout_us; | |
33 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
34 | |||
35 | static int uv_bau_max_concurrent __read_mostly; | ||
36 | |||
37 | static int nobau; | 38 | static int nobau; |
39 | static int baudisabled; | ||
40 | static spinlock_t disable_lock; | ||
41 | static cycles_t congested_cycles; | ||
42 | |||
43 | /* tunables: */ | ||
44 | static int max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
45 | static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; | ||
46 | static int plugged_delay = PLUGGED_DELAY; | ||
47 | static int plugsb4reset = PLUGSB4RESET; | ||
48 | static int timeoutsb4reset = TIMEOUTSB4RESET; | ||
49 | static int ipi_reset_limit = IPI_RESET_LIMIT; | ||
50 | static int complete_threshold = COMPLETE_THRESHOLD; | ||
51 | static int congested_response_us = CONGESTED_RESPONSE_US; | ||
52 | static int congested_reps = CONGESTED_REPS; | ||
53 | static int congested_period = CONGESTED_PERIOD; | ||
54 | static struct dentry *tunables_dir; | ||
55 | static struct dentry *tunables_file; | ||
56 | |||
38 | static int __init setup_nobau(char *arg) | 57 | static int __init setup_nobau(char *arg) |
39 | { | 58 | { |
40 | nobau = 1; | 59 | nobau = 1; |
@@ -52,10 +71,6 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | |||
52 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 71 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
53 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | 72 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); |
54 | 73 | ||
55 | struct reset_args { | ||
56 | int sender; | ||
57 | }; | ||
58 | |||
59 | /* | 74 | /* |
60 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | 75 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
61 | * memory allocation. | 76 | * memory allocation. |
@@ -126,7 +141,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
126 | struct ptc_stats *stat; | 141 | struct ptc_stats *stat; |
127 | 142 | ||
128 | msg = mdp->msg; | 143 | msg = mdp->msg; |
129 | stat = &per_cpu(ptcstats, bcp->cpu); | 144 | stat = bcp->statp; |
130 | stat->d_retries++; | 145 | stat->d_retries++; |
131 | /* | 146 | /* |
132 | * cancel any message from msg+1 to the retry itself | 147 | * cancel any message from msg+1 to the retry itself |
@@ -146,15 +161,14 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
146 | slot2 = msg2 - mdp->va_queue_first; | 161 | slot2 = msg2 - mdp->va_queue_first; |
147 | mmr = uv_read_local_mmr | 162 | mmr = uv_read_local_mmr |
148 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 163 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
149 | msg_res = ((msg2->sw_ack_vector << 8) | | 164 | msg_res = msg2->sw_ack_vector; |
150 | msg2->sw_ack_vector); | ||
151 | /* | 165 | /* |
152 | * This is a message retry; clear the resources held | 166 | * This is a message retry; clear the resources held |
153 | * by the previous message only if they timed out. | 167 | * by the previous message only if they timed out. |
154 | * If it has not timed out we have an unexpected | 168 | * If it has not timed out we have an unexpected |
155 | * situation to report. | 169 | * situation to report. |
156 | */ | 170 | */ |
157 | if (mmr & (msg_res << 8)) { | 171 | if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { |
158 | /* | 172 | /* |
159 | * is the resource timed out? | 173 | * is the resource timed out? |
160 | * make everyone ignore the cancelled message. | 174 | * make everyone ignore the cancelled message. |
@@ -164,9 +178,9 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
164 | cancel_count++; | 178 | cancel_count++; |
165 | uv_write_local_mmr( | 179 | uv_write_local_mmr( |
166 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | 180 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, |
167 | (msg_res << 8) | msg_res); | 181 | (msg_res << UV_SW_ACK_NPENDING) | |
168 | } else | 182 | msg_res); |
169 | printk(KERN_INFO "note bau retry: no effect\n"); | 183 | } |
170 | } | 184 | } |
171 | } | 185 | } |
172 | if (!cancel_count) | 186 | if (!cancel_count) |
@@ -190,7 +204,7 @@ static void uv_bau_process_message(struct msg_desc *mdp, | |||
190 | * This must be a normal message, or retry of a normal message | 204 | * This must be a normal message, or retry of a normal message |
191 | */ | 205 | */ |
192 | msg = mdp->msg; | 206 | msg = mdp->msg; |
193 | stat = &per_cpu(ptcstats, bcp->cpu); | 207 | stat = bcp->statp; |
194 | if (msg->address == TLB_FLUSH_ALL) { | 208 | if (msg->address == TLB_FLUSH_ALL) { |
195 | local_flush_tlb(); | 209 | local_flush_tlb(); |
196 | stat->d_alltlb++; | 210 | stat->d_alltlb++; |
@@ -274,7 +288,7 @@ uv_do_reset(void *ptr) | |||
274 | 288 | ||
275 | bcp = &per_cpu(bau_control, smp_processor_id()); | 289 | bcp = &per_cpu(bau_control, smp_processor_id()); |
276 | rap = (struct reset_args *)ptr; | 290 | rap = (struct reset_args *)ptr; |
277 | stat = &per_cpu(ptcstats, bcp->cpu); | 291 | stat = bcp->statp; |
278 | stat->d_resets++; | 292 | stat->d_resets++; |
279 | 293 | ||
280 | /* | 294 | /* |
@@ -302,13 +316,13 @@ uv_do_reset(void *ptr) | |||
302 | */ | 316 | */ |
303 | mmr = uv_read_local_mmr | 317 | mmr = uv_read_local_mmr |
304 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 318 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
305 | msg_res = ((msg->sw_ack_vector << 8) | | 319 | msg_res = msg->sw_ack_vector; |
306 | msg->sw_ack_vector); | ||
307 | if (mmr & msg_res) { | 320 | if (mmr & msg_res) { |
308 | stat->d_rcanceled++; | 321 | stat->d_rcanceled++; |
309 | uv_write_local_mmr( | 322 | uv_write_local_mmr( |
310 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | 323 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, |
311 | msg_res); | 324 | (msg_res << UV_SW_ACK_NPENDING) | |
325 | msg_res); | ||
312 | } | 326 | } |
313 | } | 327 | } |
314 | } | 328 | } |
@@ -386,17 +400,12 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
386 | unsigned long mmr_offset, int right_shift, int this_cpu, | 400 | unsigned long mmr_offset, int right_shift, int this_cpu, |
387 | struct bau_control *bcp, struct bau_control *smaster, long try) | 401 | struct bau_control *bcp, struct bau_control *smaster, long try) |
388 | { | 402 | { |
389 | int relaxes = 0; | ||
390 | unsigned long descriptor_status; | 403 | unsigned long descriptor_status; |
391 | unsigned long mmr; | ||
392 | unsigned long mask; | ||
393 | cycles_t ttime; | 404 | cycles_t ttime; |
394 | cycles_t timeout_time; | 405 | struct ptc_stats *stat = bcp->statp; |
395 | struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); | ||
396 | struct bau_control *hmaster; | 406 | struct bau_control *hmaster; |
397 | 407 | ||
398 | hmaster = bcp->uvhub_master; | 408 | hmaster = bcp->uvhub_master; |
399 | timeout_time = get_cycles() + bcp->timeout_interval; | ||
400 | 409 | ||
401 | /* spin on the status MMR, waiting for it to go idle */ | 410 | /* spin on the status MMR, waiting for it to go idle */ |
402 | while ((descriptor_status = (((unsigned long) | 411 | while ((descriptor_status = (((unsigned long) |
@@ -423,7 +432,8 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
423 | * pending. In that case hardware returns the | 432 | * pending. In that case hardware returns the |
424 | * ERROR that looks like a destination timeout. | 433 | * ERROR that looks like a destination timeout. |
425 | */ | 434 | */ |
426 | if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { | 435 | if (cycles_2_us(ttime - bcp->send_message) < |
436 | timeout_us) { | ||
427 | bcp->conseccompletes = 0; | 437 | bcp->conseccompletes = 0; |
428 | return FLUSH_RETRY_PLUGGED; | 438 | return FLUSH_RETRY_PLUGGED; |
429 | } | 439 | } |
@@ -435,26 +445,6 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
435 | * descriptor_status is still BUSY | 445 | * descriptor_status is still BUSY |
436 | */ | 446 | */ |
437 | cpu_relax(); | 447 | cpu_relax(); |
438 | relaxes++; | ||
439 | if (relaxes >= 10000) { | ||
440 | relaxes = 0; | ||
441 | if (get_cycles() > timeout_time) { | ||
442 | quiesce_local_uvhub(hmaster); | ||
443 | |||
444 | /* single-thread the register change */ | ||
445 | spin_lock(&hmaster->masks_lock); | ||
446 | mmr = uv_read_local_mmr(mmr_offset); | ||
447 | mask = 0UL; | ||
448 | mask |= (3UL < right_shift); | ||
449 | mask = ~mask; | ||
450 | mmr &= mask; | ||
451 | uv_write_local_mmr(mmr_offset, mmr); | ||
452 | spin_unlock(&hmaster->masks_lock); | ||
453 | end_uvhub_quiesce(hmaster); | ||
454 | stat->s_busy++; | ||
455 | return FLUSH_GIVEUP; | ||
456 | } | ||
457 | } | ||
458 | } | 448 | } |
459 | } | 449 | } |
460 | bcp->conseccompletes++; | 450 | bcp->conseccompletes++; |
@@ -494,56 +484,116 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |||
494 | return 1; | 484 | return 1; |
495 | } | 485 | } |
496 | 486 | ||
487 | /* | ||
488 | * Our retries are blocked by all destination swack resources being | ||
489 | * in use, and a timeout is pending. In that case hardware immediately | ||
490 | * returns the ERROR that looks like a destination timeout. | ||
491 | */ | ||
492 | static void | ||
493 | destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
494 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
495 | { | ||
496 | udelay(bcp->plugged_delay); | ||
497 | bcp->plugged_tries++; | ||
498 | if (bcp->plugged_tries >= bcp->plugsb4reset) { | ||
499 | bcp->plugged_tries = 0; | ||
500 | quiesce_local_uvhub(hmaster); | ||
501 | spin_lock(&hmaster->queue_lock); | ||
502 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
503 | spin_unlock(&hmaster->queue_lock); | ||
504 | end_uvhub_quiesce(hmaster); | ||
505 | bcp->ipi_attempts++; | ||
506 | stat->s_resets_plug++; | ||
507 | } | ||
508 | } | ||
509 | |||
510 | static void | ||
511 | destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
512 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
513 | { | ||
514 | hmaster->max_bau_concurrent = 1; | ||
515 | bcp->timeout_tries++; | ||
516 | if (bcp->timeout_tries >= bcp->timeoutsb4reset) { | ||
517 | bcp->timeout_tries = 0; | ||
518 | quiesce_local_uvhub(hmaster); | ||
519 | spin_lock(&hmaster->queue_lock); | ||
520 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
521 | spin_unlock(&hmaster->queue_lock); | ||
522 | end_uvhub_quiesce(hmaster); | ||
523 | bcp->ipi_attempts++; | ||
524 | stat->s_resets_timeout++; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Completions are taking a very long time due to a congested numalink | ||
530 | * network. | ||
531 | */ | ||
532 | static void | ||
533 | disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | ||
534 | { | ||
535 | int tcpu; | ||
536 | struct bau_control *tbcp; | ||
537 | |||
538 | /* let only one cpu do this disabling */ | ||
539 | spin_lock(&disable_lock); | ||
540 | if (!baudisabled && bcp->period_requests && | ||
541 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | ||
542 | /* it becomes this cpu's job to turn on the use of the | ||
543 | BAU again */ | ||
544 | baudisabled = 1; | ||
545 | bcp->set_bau_off = 1; | ||
546 | bcp->set_bau_on_time = get_cycles() + | ||
547 | sec_2_cycles(bcp->congested_period); | ||
548 | stat->s_bau_disabled++; | ||
549 | for_each_present_cpu(tcpu) { | ||
550 | tbcp = &per_cpu(bau_control, tcpu); | ||
551 | tbcp->baudisabled = 1; | ||
552 | } | ||
553 | } | ||
554 | spin_unlock(&disable_lock); | ||
555 | } | ||
556 | |||
497 | /** | 557 | /** |
498 | * uv_flush_send_and_wait | 558 | * uv_flush_send_and_wait |
499 | * | 559 | * |
500 | * Send a broadcast and wait for it to complete. | 560 | * Send a broadcast and wait for it to complete. |
501 | * | 561 | * |
502 | * The flush_mask contains the cpus the broadcast is to be sent to, plus | 562 | * The flush_mask contains the cpus the broadcast is to be sent to including |
503 | * cpus that are on the local uvhub. | 563 | * cpus that are on the local uvhub. |
504 | * | 564 | * |
505 | * Returns NULL if all flushing represented in the mask was done. The mask | 565 | * Returns 0 if all flushing represented in the mask was done. |
506 | * is zeroed. | 566 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
507 | * Returns @flush_mask if some remote flushing remains to be done. The | 567 | * returned to the kernel. |
508 | * mask will have some bits still set, representing any cpus on the local | ||
509 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
510 | */ | 568 | */ |
511 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | 569 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, |
512 | struct cpumask *flush_mask, | 570 | struct cpumask *flush_mask, struct bau_control *bcp) |
513 | struct bau_control *bcp) | ||
514 | { | 571 | { |
515 | int right_shift; | 572 | int right_shift; |
516 | int uvhub; | ||
517 | int bit; | ||
518 | int completion_status = 0; | 573 | int completion_status = 0; |
519 | int seq_number = 0; | 574 | int seq_number = 0; |
520 | long try = 0; | 575 | long try = 0; |
521 | int cpu = bcp->uvhub_cpu; | 576 | int cpu = bcp->uvhub_cpu; |
522 | int this_cpu = bcp->cpu; | 577 | int this_cpu = bcp->cpu; |
523 | int this_uvhub = bcp->uvhub; | ||
524 | unsigned long mmr_offset; | 578 | unsigned long mmr_offset; |
525 | unsigned long index; | 579 | unsigned long index; |
526 | cycles_t time1; | 580 | cycles_t time1; |
527 | cycles_t time2; | 581 | cycles_t time2; |
528 | struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); | 582 | cycles_t elapsed; |
583 | struct ptc_stats *stat = bcp->statp; | ||
529 | struct bau_control *smaster = bcp->socket_master; | 584 | struct bau_control *smaster = bcp->socket_master; |
530 | struct bau_control *hmaster = bcp->uvhub_master; | 585 | struct bau_control *hmaster = bcp->uvhub_master; |
531 | 586 | ||
532 | /* | ||
533 | * Spin here while there are hmaster->max_concurrent or more active | ||
534 | * descriptors. This is the per-uvhub 'throttle'. | ||
535 | */ | ||
536 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 587 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
537 | &hmaster->active_descriptor_count, | 588 | &hmaster->active_descriptor_count, |
538 | hmaster->max_concurrent)) { | 589 | hmaster->max_bau_concurrent)) { |
539 | stat->s_throttles++; | 590 | stat->s_throttles++; |
540 | do { | 591 | do { |
541 | cpu_relax(); | 592 | cpu_relax(); |
542 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 593 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
543 | &hmaster->active_descriptor_count, | 594 | &hmaster->active_descriptor_count, |
544 | hmaster->max_concurrent)); | 595 | hmaster->max_bau_concurrent)); |
545 | } | 596 | } |
546 | |||
547 | while (hmaster->uvhub_quiesce) | 597 | while (hmaster->uvhub_quiesce) |
548 | cpu_relax(); | 598 | cpu_relax(); |
549 | 599 | ||
@@ -557,23 +607,10 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
557 | } | 607 | } |
558 | time1 = get_cycles(); | 608 | time1 = get_cycles(); |
559 | do { | 609 | do { |
560 | /* | ||
561 | * Every message from any given cpu gets a unique message | ||
562 | * sequence number. But retries use that same number. | ||
563 | * Our message may have timed out at the destination because | ||
564 | * all sw-ack resources are in use and there is a timeout | ||
565 | * pending there. In that case, our last send never got | ||
566 | * placed into the queue and we need to persist until it | ||
567 | * does. | ||
568 | * | ||
569 | * Make any retry a type MSG_RETRY so that the destination will | ||
570 | * free any resource held by a previous message from this cpu. | ||
571 | */ | ||
572 | if (try == 0) { | 610 | if (try == 0) { |
573 | /* use message type set by the caller the first time */ | 611 | bau_desc->header.msg_type = MSG_REGULAR; |
574 | seq_number = bcp->message_number++; | 612 | seq_number = bcp->message_number++; |
575 | } else { | 613 | } else { |
576 | /* use RETRY type on all the rest; same sequence */ | ||
577 | bau_desc->header.msg_type = MSG_RETRY; | 614 | bau_desc->header.msg_type = MSG_RETRY; |
578 | stat->s_retry_messages++; | 615 | stat->s_retry_messages++; |
579 | } | 616 | } |
@@ -581,50 +618,17 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
581 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 618 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
582 | bcp->uvhub_cpu; | 619 | bcp->uvhub_cpu; |
583 | bcp->send_message = get_cycles(); | 620 | bcp->send_message = get_cycles(); |
584 | |||
585 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 621 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
586 | |||
587 | try++; | 622 | try++; |
588 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 623 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
589 | right_shift, this_cpu, bcp, smaster, try); | 624 | right_shift, this_cpu, bcp, smaster, try); |
590 | 625 | ||
591 | if (completion_status == FLUSH_RETRY_PLUGGED) { | 626 | if (completion_status == FLUSH_RETRY_PLUGGED) { |
592 | /* | 627 | destination_plugged(bau_desc, bcp, hmaster, stat); |
593 | * Our retries may be blocked by all destination swack | ||
594 | * resources being consumed, and a timeout pending. In | ||
595 | * that case hardware immediately returns the ERROR | ||
596 | * that looks like a destination timeout. | ||
597 | */ | ||
598 | udelay(TIMEOUT_DELAY); | ||
599 | bcp->plugged_tries++; | ||
600 | if (bcp->plugged_tries >= PLUGSB4RESET) { | ||
601 | bcp->plugged_tries = 0; | ||
602 | quiesce_local_uvhub(hmaster); | ||
603 | spin_lock(&hmaster->queue_lock); | ||
604 | uv_reset_with_ipi(&bau_desc->distribution, | ||
605 | this_cpu); | ||
606 | spin_unlock(&hmaster->queue_lock); | ||
607 | end_uvhub_quiesce(hmaster); | ||
608 | bcp->ipi_attempts++; | ||
609 | stat->s_resets_plug++; | ||
610 | } | ||
611 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | 628 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { |
612 | hmaster->max_concurrent = 1; | 629 | destination_timeout(bau_desc, bcp, hmaster, stat); |
613 | bcp->timeout_tries++; | ||
614 | udelay(TIMEOUT_DELAY); | ||
615 | if (bcp->timeout_tries >= TIMEOUTSB4RESET) { | ||
616 | bcp->timeout_tries = 0; | ||
617 | quiesce_local_uvhub(hmaster); | ||
618 | spin_lock(&hmaster->queue_lock); | ||
619 | uv_reset_with_ipi(&bau_desc->distribution, | ||
620 | this_cpu); | ||
621 | spin_unlock(&hmaster->queue_lock); | ||
622 | end_uvhub_quiesce(hmaster); | ||
623 | bcp->ipi_attempts++; | ||
624 | stat->s_resets_timeout++; | ||
625 | } | ||
626 | } | 630 | } |
627 | if (bcp->ipi_attempts >= 3) { | 631 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { |
628 | bcp->ipi_attempts = 0; | 632 | bcp->ipi_attempts = 0; |
629 | completion_status = FLUSH_GIVEUP; | 633 | completion_status = FLUSH_GIVEUP; |
630 | break; | 634 | break; |
@@ -633,49 +637,36 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
633 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | 637 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || |
634 | (completion_status == FLUSH_RETRY_TIMEOUT)); | 638 | (completion_status == FLUSH_RETRY_TIMEOUT)); |
635 | time2 = get_cycles(); | 639 | time2 = get_cycles(); |
636 | 640 | bcp->plugged_tries = 0; | |
637 | if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) | 641 | bcp->timeout_tries = 0; |
638 | && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) | 642 | if ((completion_status == FLUSH_COMPLETE) && |
639 | hmaster->max_concurrent++; | 643 | (bcp->conseccompletes > bcp->complete_threshold) && |
640 | 644 | (hmaster->max_bau_concurrent < | |
641 | /* | 645 | hmaster->max_bau_concurrent_constant)) |
642 | * hold any cpu not timing out here; no other cpu currently held by | 646 | hmaster->max_bau_concurrent++; |
643 | * the 'throttle' should enter the activation code | ||
644 | */ | ||
645 | while (hmaster->uvhub_quiesce) | 647 | while (hmaster->uvhub_quiesce) |
646 | cpu_relax(); | 648 | cpu_relax(); |
647 | atomic_dec(&hmaster->active_descriptor_count); | 649 | atomic_dec(&hmaster->active_descriptor_count); |
648 | 650 | if (time2 > time1) { | |
649 | /* guard against cycles wrap */ | 651 | elapsed = time2 - time1; |
650 | if (time2 > time1) | 652 | stat->s_time += elapsed; |
651 | stat->s_time += (time2 - time1); | 653 | if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { |
652 | else | 654 | bcp->period_requests++; |
653 | stat->s_requestor--; /* don't count this one */ | 655 | bcp->period_time += elapsed; |
656 | if ((elapsed > congested_cycles) && | ||
657 | (bcp->period_requests > bcp->congested_reps)) { | ||
658 | disable_for_congestion(bcp, stat); | ||
659 | } | ||
660 | } | ||
661 | } else | ||
662 | stat->s_requestor--; | ||
654 | if (completion_status == FLUSH_COMPLETE && try > 1) | 663 | if (completion_status == FLUSH_COMPLETE && try > 1) |
655 | stat->s_retriesok++; | 664 | stat->s_retriesok++; |
656 | else if (completion_status == FLUSH_GIVEUP) { | 665 | else if (completion_status == FLUSH_GIVEUP) { |
657 | /* | ||
658 | * Cause the caller to do an IPI-style TLB shootdown on | ||
659 | * the target cpu's, all of which are still in the mask. | ||
660 | */ | ||
661 | stat->s_giveup++; | 666 | stat->s_giveup++; |
662 | return flush_mask; | 667 | return 1; |
663 | } | ||
664 | |||
665 | /* | ||
666 | * Success, so clear the remote cpu's from the mask so we don't | ||
667 | * use the IPI method of shootdown on them. | ||
668 | */ | ||
669 | for_each_cpu(bit, flush_mask) { | ||
670 | uvhub = uv_cpu_to_blade_id(bit); | ||
671 | if (uvhub == this_uvhub) | ||
672 | continue; | ||
673 | cpumask_clear_cpu(bit, flush_mask); | ||
674 | } | 668 | } |
675 | if (!cpumask_empty(flush_mask)) | 669 | return 0; |
676 | return flush_mask; | ||
677 | |||
678 | return NULL; | ||
679 | } | 670 | } |
680 | 671 | ||
681 | /** | 672 | /** |
@@ -707,70 +698,89 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
707 | struct mm_struct *mm, | 698 | struct mm_struct *mm, |
708 | unsigned long va, unsigned int cpu) | 699 | unsigned long va, unsigned int cpu) |
709 | { | 700 | { |
710 | int remotes; | ||
711 | int tcpu; | 701 | int tcpu; |
712 | int uvhub; | 702 | int uvhub; |
713 | int locals = 0; | 703 | int locals = 0; |
704 | int remotes = 0; | ||
705 | int hubs = 0; | ||
714 | struct bau_desc *bau_desc; | 706 | struct bau_desc *bau_desc; |
715 | struct cpumask *flush_mask; | 707 | struct cpumask *flush_mask; |
716 | struct ptc_stats *stat; | 708 | struct ptc_stats *stat; |
717 | struct bau_control *bcp; | 709 | struct bau_control *bcp; |
710 | struct bau_control *tbcp; | ||
718 | 711 | ||
712 | /* kernel was booted 'nobau' */ | ||
719 | if (nobau) | 713 | if (nobau) |
720 | return cpumask; | 714 | return cpumask; |
721 | 715 | ||
722 | bcp = &per_cpu(bau_control, cpu); | 716 | bcp = &per_cpu(bau_control, cpu); |
717 | stat = bcp->statp; | ||
718 | |||
719 | /* bau was disabled due to slow response */ | ||
720 | if (bcp->baudisabled) { | ||
721 | /* the cpu that disabled it must re-enable it */ | ||
722 | if (bcp->set_bau_off) { | ||
723 | if (get_cycles() >= bcp->set_bau_on_time) { | ||
724 | stat->s_bau_reenabled++; | ||
725 | baudisabled = 0; | ||
726 | for_each_present_cpu(tcpu) { | ||
727 | tbcp = &per_cpu(bau_control, tcpu); | ||
728 | tbcp->baudisabled = 0; | ||
729 | tbcp->period_requests = 0; | ||
730 | tbcp->period_time = 0; | ||
731 | } | ||
732 | } | ||
733 | } | ||
734 | return cpumask; | ||
735 | } | ||
736 | |||
723 | /* | 737 | /* |
724 | * Each sending cpu has a per-cpu mask which it fills from the caller's | 738 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
725 | * cpu mask. Only remote cpus are converted to uvhubs and copied. | 739 | * cpu mask. All cpus are converted to uvhubs and copied to the |
740 | * activation descriptor. | ||
726 | */ | 741 | */ |
727 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | 742 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); |
728 | /* | 743 | /* don't actually do a shootdown of the local cpu */ |
729 | * copy cpumask to flush_mask, removing current cpu | ||
730 | * (current cpu should already have been flushed by the caller and | ||
731 | * should never be returned if we return flush_mask) | ||
732 | */ | ||
733 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 744 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
734 | if (cpu_isset(cpu, *cpumask)) | 745 | if (cpu_isset(cpu, *cpumask)) |
735 | locals++; /* current cpu was targeted */ | 746 | stat->s_ntargself++; |
736 | 747 | ||
737 | bau_desc = bcp->descriptor_base; | 748 | bau_desc = bcp->descriptor_base; |
738 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 749 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
739 | |||
740 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 750 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
741 | remotes = 0; | 751 | |
752 | /* cpu statistics */ | ||
742 | for_each_cpu(tcpu, flush_mask) { | 753 | for_each_cpu(tcpu, flush_mask) { |
743 | uvhub = uv_cpu_to_blade_id(tcpu); | 754 | uvhub = uv_cpu_to_blade_id(tcpu); |
744 | if (uvhub == bcp->uvhub) { | ||
745 | locals++; | ||
746 | continue; | ||
747 | } | ||
748 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 755 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
749 | remotes++; | 756 | if (uvhub == bcp->uvhub) |
750 | } | 757 | locals++; |
751 | if (remotes == 0) { | ||
752 | /* | ||
753 | * No off_hub flushing; return status for local hub. | ||
754 | * Return the caller's mask if all were local (the current | ||
755 | * cpu may be in that mask). | ||
756 | */ | ||
757 | if (locals) | ||
758 | return cpumask; | ||
759 | else | 758 | else |
760 | return NULL; | 759 | remotes++; |
761 | } | 760 | } |
762 | stat = &per_cpu(ptcstats, cpu); | 761 | if ((locals + remotes) == 0) |
762 | return NULL; | ||
763 | stat->s_requestor++; | 763 | stat->s_requestor++; |
764 | stat->s_ntargcpu += remotes; | 764 | stat->s_ntargcpu += remotes + locals; |
765 | stat->s_ntargremotes += remotes; | ||
766 | stat->s_ntarglocals += locals; | ||
765 | remotes = bau_uvhub_weight(&bau_desc->distribution); | 767 | remotes = bau_uvhub_weight(&bau_desc->distribution); |
766 | stat->s_ntarguvhub += remotes; | 768 | |
767 | if (remotes >= 16) | 769 | /* uvhub statistics */ |
770 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
771 | if (locals) { | ||
772 | stat->s_ntarglocaluvhub++; | ||
773 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
774 | } else | ||
775 | stat->s_ntargremoteuvhub += hubs; | ||
776 | stat->s_ntarguvhub += hubs; | ||
777 | if (hubs >= 16) | ||
768 | stat->s_ntarguvhub16++; | 778 | stat->s_ntarguvhub16++; |
769 | else if (remotes >= 8) | 779 | else if (hubs >= 8) |
770 | stat->s_ntarguvhub8++; | 780 | stat->s_ntarguvhub8++; |
771 | else if (remotes >= 4) | 781 | else if (hubs >= 4) |
772 | stat->s_ntarguvhub4++; | 782 | stat->s_ntarguvhub4++; |
773 | else if (remotes >= 2) | 783 | else if (hubs >= 2) |
774 | stat->s_ntarguvhub2++; | 784 | stat->s_ntarguvhub2++; |
775 | else | 785 | else |
776 | stat->s_ntarguvhub1++; | 786 | stat->s_ntarguvhub1++; |
@@ -779,10 +789,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
779 | bau_desc->payload.sending_cpu = cpu; | 789 | bau_desc->payload.sending_cpu = cpu; |
780 | 790 | ||
781 | /* | 791 | /* |
782 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | 792 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
783 | * the adjusted flush_mask if any cpu's were not messaged. | 793 | * or 1 if it gave up and the original cpumask should be returned. |
784 | */ | 794 | */ |
785 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | 795 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) |
796 | return NULL; | ||
797 | else | ||
798 | return cpumask; | ||
786 | } | 799 | } |
787 | 800 | ||
788 | /* | 801 | /* |
@@ -810,7 +823,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
810 | 823 | ||
811 | time_start = get_cycles(); | 824 | time_start = get_cycles(); |
812 | bcp = &per_cpu(bau_control, smp_processor_id()); | 825 | bcp = &per_cpu(bau_control, smp_processor_id()); |
813 | stat = &per_cpu(ptcstats, smp_processor_id()); | 826 | stat = bcp->statp; |
814 | msgdesc.va_queue_first = bcp->va_queue_first; | 827 | msgdesc.va_queue_first = bcp->va_queue_first; |
815 | msgdesc.va_queue_last = bcp->va_queue_last; | 828 | msgdesc.va_queue_last = bcp->va_queue_last; |
816 | msg = bcp->bau_msg_head; | 829 | msg = bcp->bau_msg_head; |
@@ -908,12 +921,12 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data) | |||
908 | } | 921 | } |
909 | 922 | ||
910 | static inline unsigned long long | 923 | static inline unsigned long long |
911 | millisec_2_cycles(unsigned long millisec) | 924 | microsec_2_cycles(unsigned long microsec) |
912 | { | 925 | { |
913 | unsigned long ns; | 926 | unsigned long ns; |
914 | unsigned long long cyc; | 927 | unsigned long long cyc; |
915 | 928 | ||
916 | ns = millisec * 1000; | 929 | ns = microsec * 1000; |
917 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | 930 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); |
918 | return cyc; | 931 | return cyc; |
919 | } | 932 | } |
@@ -931,15 +944,19 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
931 | 944 | ||
932 | if (!cpu) { | 945 | if (!cpu) { |
933 | seq_printf(file, | 946 | seq_printf(file, |
934 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); | 947 | "# cpu sent stime self locals remotes ncpus localhub "); |
948 | seq_printf(file, | ||
949 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
935 | seq_printf(file, | 950 | seq_printf(file, |
936 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); | 951 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); |
937 | seq_printf(file, | 952 | seq_printf(file, |
938 | "retries rok resetp resett giveup sto bz throt "); | 953 | "retries rok resetp resett giveup sto bz throt "); |
939 | seq_printf(file, | 954 | seq_printf(file, |
940 | "sw_ack recv rtime all "); | 955 | "sw_ack recv rtime all "); |
941 | seq_printf(file, | 956 | seq_printf(file, |
942 | "one mult none retry canc nocan reset rcan\n"); | 957 | "one mult none retry canc nocan reset rcan "); |
958 | seq_printf(file, | ||
959 | "disable enable\n"); | ||
943 | } | 960 | } |
944 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 961 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
945 | stat = &per_cpu(ptcstats, cpu); | 962 | stat = &per_cpu(ptcstats, cpu); |
@@ -947,18 +964,23 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
947 | seq_printf(file, | 964 | seq_printf(file, |
948 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 965 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
949 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 966 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
950 | stat->s_ntarguvhub, stat->s_ntarguvhub16, | 967 | stat->s_ntargself, stat->s_ntarglocals, |
968 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
969 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
970 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
971 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
951 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | 972 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, |
952 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | 973 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, |
953 | stat->s_ntargcpu, stat->s_dtimeout); | 974 | stat->s_dtimeout); |
954 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | 975 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", |
955 | stat->s_retry_messages, stat->s_retriesok, | 976 | stat->s_retry_messages, stat->s_retriesok, |
956 | stat->s_resets_plug, stat->s_resets_timeout, | 977 | stat->s_resets_plug, stat->s_resets_timeout, |
957 | stat->s_giveup, stat->s_stimeout, | 978 | stat->s_giveup, stat->s_stimeout, |
958 | stat->s_busy, stat->s_throttles); | 979 | stat->s_busy, stat->s_throttles); |
980 | |||
959 | /* destination side statistics */ | 981 | /* destination side statistics */ |
960 | seq_printf(file, | 982 | seq_printf(file, |
961 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", | 983 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
962 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 984 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
963 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 985 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
964 | stat->d_requestee, cycles_2_us(stat->d_time), | 986 | stat->d_requestee, cycles_2_us(stat->d_time), |
@@ -966,15 +988,36 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
966 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | 988 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
967 | stat->d_nocanceled, stat->d_resets, | 989 | stat->d_nocanceled, stat->d_resets, |
968 | stat->d_rcanceled); | 990 | stat->d_rcanceled); |
991 | seq_printf(file, "%ld %ld\n", | ||
992 | stat->s_bau_disabled, stat->s_bau_reenabled); | ||
969 | } | 993 | } |
970 | 994 | ||
971 | return 0; | 995 | return 0; |
972 | } | 996 | } |
973 | 997 | ||
974 | /* | 998 | /* |
999 | * Display the tunables thru debugfs | ||
1000 | */ | ||
1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | ||
1002 | size_t count, loff_t *ppos) | ||
1003 | { | ||
1004 | char buf[300]; | ||
1005 | int ret; | ||
1006 | |||
1007 | ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | ||
1008 | "max_bau_concurrent plugged_delay plugsb4reset", | ||
1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", | ||
1010 | "congested_response_us congested_reps congested_period", | ||
1011 | max_bau_concurrent, plugged_delay, plugsb4reset, | ||
1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | ||
1013 | congested_response_us, congested_reps, congested_period); | ||
1014 | |||
1015 | return simple_read_from_buffer(userbuf, count, ppos, buf, ret); | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
975 | * -1: resetf the statistics | 1019 | * -1: resetf the statistics |
976 | * 0: display meaning of the statistics | 1020 | * 0: display meaning of the statistics |
977 | * >0: maximum concurrent active descriptors per uvhub (throttle) | ||
978 | */ | 1021 | */ |
979 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 1022 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
980 | size_t count, loff_t *data) | 1023 | size_t count, loff_t *data) |
@@ -983,7 +1026,6 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | |||
983 | long input_arg; | 1026 | long input_arg; |
984 | char optstr[64]; | 1027 | char optstr[64]; |
985 | struct ptc_stats *stat; | 1028 | struct ptc_stats *stat; |
986 | struct bau_control *bcp; | ||
987 | 1029 | ||
988 | if (count == 0 || count > sizeof(optstr)) | 1030 | if (count == 0 || count > sizeof(optstr)) |
989 | return -EINVAL; | 1031 | return -EINVAL; |
@@ -1059,29 +1101,158 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | |||
1059 | "reset: number of ipi-style reset requests processed\n"); | 1101 | "reset: number of ipi-style reset requests processed\n"); |
1060 | printk(KERN_DEBUG | 1102 | printk(KERN_DEBUG |
1061 | "rcan: number messages canceled by reset requests\n"); | 1103 | "rcan: number messages canceled by reset requests\n"); |
1104 | printk(KERN_DEBUG | ||
1105 | "disable: number times use of the BAU was disabled\n"); | ||
1106 | printk(KERN_DEBUG | ||
1107 | "enable: number times use of the BAU was re-enabled\n"); | ||
1062 | } else if (input_arg == -1) { | 1108 | } else if (input_arg == -1) { |
1063 | for_each_present_cpu(cpu) { | 1109 | for_each_present_cpu(cpu) { |
1064 | stat = &per_cpu(ptcstats, cpu); | 1110 | stat = &per_cpu(ptcstats, cpu); |
1065 | memset(stat, 0, sizeof(struct ptc_stats)); | 1111 | memset(stat, 0, sizeof(struct ptc_stats)); |
1066 | } | 1112 | } |
1067 | } else { | 1113 | } |
1068 | uv_bau_max_concurrent = input_arg; | 1114 | |
1069 | bcp = &per_cpu(bau_control, smp_processor_id()); | 1115 | return count; |
1070 | if (uv_bau_max_concurrent < 1 || | 1116 | } |
1071 | uv_bau_max_concurrent > bcp->cpus_in_uvhub) { | 1117 | |
1072 | printk(KERN_DEBUG | 1118 | static int local_atoi(const char *name) |
1073 | "Error: BAU max concurrent %d; %d is invalid\n", | 1119 | { |
1074 | bcp->max_concurrent, uv_bau_max_concurrent); | 1120 | int val = 0; |
1075 | return -EINVAL; | 1121 | |
1076 | } | 1122 | for (;; name++) { |
1077 | printk(KERN_DEBUG "Set BAU max concurrent:%d\n", | 1123 | switch (*name) { |
1078 | uv_bau_max_concurrent); | 1124 | case '0' ... '9': |
1079 | for_each_present_cpu(cpu) { | 1125 | val = 10*val+(*name-'0'); |
1080 | bcp = &per_cpu(bau_control, cpu); | 1126 | break; |
1081 | bcp->max_concurrent = uv_bau_max_concurrent; | 1127 | default: |
1128 | return val; | ||
1082 | } | 1129 | } |
1083 | } | 1130 | } |
1131 | } | ||
1132 | |||
1133 | /* | ||
1134 | * set the tunables | ||
1135 | * 0 values reset them to defaults | ||
1136 | */ | ||
1137 | static ssize_t tunables_write(struct file *file, const char __user *user, | ||
1138 | size_t count, loff_t *data) | ||
1139 | { | ||
1140 | int cpu; | ||
1141 | int cnt = 0; | ||
1142 | int val; | ||
1143 | char *p; | ||
1144 | char *q; | ||
1145 | char instr[64]; | ||
1146 | struct bau_control *bcp; | ||
1147 | |||
1148 | if (count == 0 || count > sizeof(instr)-1) | ||
1149 | return -EINVAL; | ||
1150 | if (copy_from_user(instr, user, count)) | ||
1151 | return -EFAULT; | ||
1084 | 1152 | ||
1153 | instr[count] = '\0'; | ||
1154 | /* count the fields */ | ||
1155 | p = instr + strspn(instr, WHITESPACE); | ||
1156 | q = p; | ||
1157 | for (; *p; p = q + strspn(q, WHITESPACE)) { | ||
1158 | q = p + strcspn(p, WHITESPACE); | ||
1159 | cnt++; | ||
1160 | if (q == p) | ||
1161 | break; | ||
1162 | } | ||
1163 | if (cnt != 9) { | ||
1164 | printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); | ||
1165 | return -EINVAL; | ||
1166 | } | ||
1167 | |||
1168 | p = instr + strspn(instr, WHITESPACE); | ||
1169 | q = p; | ||
1170 | for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { | ||
1171 | q = p + strcspn(p, WHITESPACE); | ||
1172 | val = local_atoi(p); | ||
1173 | switch (cnt) { | ||
1174 | case 0: | ||
1175 | if (val == 0) { | ||
1176 | max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
1177 | max_bau_concurrent_constant = | ||
1178 | MAX_BAU_CONCURRENT; | ||
1179 | continue; | ||
1180 | } | ||
1181 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1182 | if (val < 1 || val > bcp->cpus_in_uvhub) { | ||
1183 | printk(KERN_DEBUG | ||
1184 | "Error: BAU max concurrent %d is invalid\n", | ||
1185 | val); | ||
1186 | return -EINVAL; | ||
1187 | } | ||
1188 | max_bau_concurrent = val; | ||
1189 | max_bau_concurrent_constant = val; | ||
1190 | continue; | ||
1191 | case 1: | ||
1192 | if (val == 0) | ||
1193 | plugged_delay = PLUGGED_DELAY; | ||
1194 | else | ||
1195 | plugged_delay = val; | ||
1196 | continue; | ||
1197 | case 2: | ||
1198 | if (val == 0) | ||
1199 | plugsb4reset = PLUGSB4RESET; | ||
1200 | else | ||
1201 | plugsb4reset = val; | ||
1202 | continue; | ||
1203 | case 3: | ||
1204 | if (val == 0) | ||
1205 | timeoutsb4reset = TIMEOUTSB4RESET; | ||
1206 | else | ||
1207 | timeoutsb4reset = val; | ||
1208 | continue; | ||
1209 | case 4: | ||
1210 | if (val == 0) | ||
1211 | ipi_reset_limit = IPI_RESET_LIMIT; | ||
1212 | else | ||
1213 | ipi_reset_limit = val; | ||
1214 | continue; | ||
1215 | case 5: | ||
1216 | if (val == 0) | ||
1217 | complete_threshold = COMPLETE_THRESHOLD; | ||
1218 | else | ||
1219 | complete_threshold = val; | ||
1220 | continue; | ||
1221 | case 6: | ||
1222 | if (val == 0) | ||
1223 | congested_response_us = CONGESTED_RESPONSE_US; | ||
1224 | else | ||
1225 | congested_response_us = val; | ||
1226 | continue; | ||
1227 | case 7: | ||
1228 | if (val == 0) | ||
1229 | congested_reps = CONGESTED_REPS; | ||
1230 | else | ||
1231 | congested_reps = val; | ||
1232 | continue; | ||
1233 | case 8: | ||
1234 | if (val == 0) | ||
1235 | congested_period = CONGESTED_PERIOD; | ||
1236 | else | ||
1237 | congested_period = val; | ||
1238 | continue; | ||
1239 | } | ||
1240 | if (q == p) | ||
1241 | break; | ||
1242 | } | ||
1243 | for_each_present_cpu(cpu) { | ||
1244 | bcp = &per_cpu(bau_control, cpu); | ||
1245 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1246 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1247 | bcp->plugged_delay = plugged_delay; | ||
1248 | bcp->plugsb4reset = plugsb4reset; | ||
1249 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1250 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1251 | bcp->complete_threshold = complete_threshold; | ||
1252 | bcp->congested_response_us = congested_response_us; | ||
1253 | bcp->congested_reps = congested_reps; | ||
1254 | bcp->congested_period = congested_period; | ||
1255 | } | ||
1085 | return count; | 1256 | return count; |
1086 | } | 1257 | } |
1087 | 1258 | ||
@@ -1097,6 +1268,11 @@ static int uv_ptc_proc_open(struct inode *inode, struct file *file) | |||
1097 | return seq_open(file, &uv_ptc_seq_ops); | 1268 | return seq_open(file, &uv_ptc_seq_ops); |
1098 | } | 1269 | } |
1099 | 1270 | ||
1271 | static int tunables_open(struct inode *inode, struct file *file) | ||
1272 | { | ||
1273 | return 0; | ||
1274 | } | ||
1275 | |||
1100 | static const struct file_operations proc_uv_ptc_operations = { | 1276 | static const struct file_operations proc_uv_ptc_operations = { |
1101 | .open = uv_ptc_proc_open, | 1277 | .open = uv_ptc_proc_open, |
1102 | .read = seq_read, | 1278 | .read = seq_read, |
@@ -1105,6 +1281,12 @@ static const struct file_operations proc_uv_ptc_operations = { | |||
1105 | .release = seq_release, | 1281 | .release = seq_release, |
1106 | }; | 1282 | }; |
1107 | 1283 | ||
1284 | static const struct file_operations tunables_fops = { | ||
1285 | .open = tunables_open, | ||
1286 | .read = tunables_read, | ||
1287 | .write = tunables_write, | ||
1288 | }; | ||
1289 | |||
1108 | static int __init uv_ptc_init(void) | 1290 | static int __init uv_ptc_init(void) |
1109 | { | 1291 | { |
1110 | struct proc_dir_entry *proc_uv_ptc; | 1292 | struct proc_dir_entry *proc_uv_ptc; |
@@ -1119,6 +1301,20 @@ static int __init uv_ptc_init(void) | |||
1119 | UV_PTC_BASENAME); | 1301 | UV_PTC_BASENAME); |
1120 | return -EINVAL; | 1302 | return -EINVAL; |
1121 | } | 1303 | } |
1304 | |||
1305 | tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); | ||
1306 | if (!tunables_dir) { | ||
1307 | printk(KERN_ERR "unable to create debugfs directory %s\n", | ||
1308 | UV_BAU_TUNABLES_DIR); | ||
1309 | return -EINVAL; | ||
1310 | } | ||
1311 | tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, | ||
1312 | tunables_dir, NULL, &tunables_fops); | ||
1313 | if (!tunables_file) { | ||
1314 | printk(KERN_ERR "unable to create debugfs file %s\n", | ||
1315 | UV_BAU_TUNABLES_FILE); | ||
1316 | return -EINVAL; | ||
1317 | } | ||
1122 | return 0; | 1318 | return 0; |
1123 | } | 1319 | } |
1124 | 1320 | ||
@@ -1259,15 +1455,45 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1259 | } | 1455 | } |
1260 | 1456 | ||
1261 | /* | 1457 | /* |
1458 | * We will set BAU_MISC_CONTROL with a timeout period. | ||
1459 | * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. | ||
1460 | * So the destination timeout period has be be calculated from them. | ||
1461 | */ | ||
1462 | static int | ||
1463 | calculate_destination_timeout(void) | ||
1464 | { | ||
1465 | unsigned long mmr_image; | ||
1466 | int mult1; | ||
1467 | int mult2; | ||
1468 | int index; | ||
1469 | int base; | ||
1470 | int ret; | ||
1471 | unsigned long ts_ns; | ||
1472 | |||
1473 | mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; | ||
1474 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1475 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | ||
1476 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | ||
1477 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | ||
1478 | base = timeout_base_ns[index]; | ||
1479 | ts_ns = base * mult1 * mult2; | ||
1480 | ret = ts_ns / 1000; | ||
1481 | return ret; | ||
1482 | } | ||
1483 | |||
1484 | /* | ||
1262 | * initialize the bau_control structure for each cpu | 1485 | * initialize the bau_control structure for each cpu |
1263 | */ | 1486 | */ |
1264 | static void uv_init_per_cpu(int nuvhubs) | 1487 | static void __init uv_init_per_cpu(int nuvhubs) |
1265 | { | 1488 | { |
1266 | int i, j, k; | 1489 | int i; |
1267 | int cpu; | 1490 | int cpu; |
1268 | int pnode; | 1491 | int pnode; |
1269 | int uvhub; | 1492 | int uvhub; |
1493 | int have_hmaster; | ||
1270 | short socket = 0; | 1494 | short socket = 0; |
1495 | unsigned short socket_mask; | ||
1496 | unsigned char *uvhub_mask; | ||
1271 | struct bau_control *bcp; | 1497 | struct bau_control *bcp; |
1272 | struct uvhub_desc *bdp; | 1498 | struct uvhub_desc *bdp; |
1273 | struct socket_desc *sdp; | 1499 | struct socket_desc *sdp; |
@@ -1278,7 +1504,7 @@ static void uv_init_per_cpu(int nuvhubs) | |||
1278 | short cpu_number[16]; | 1504 | short cpu_number[16]; |
1279 | }; | 1505 | }; |
1280 | struct uvhub_desc { | 1506 | struct uvhub_desc { |
1281 | short num_sockets; | 1507 | unsigned short socket_mask; |
1282 | short num_cpus; | 1508 | short num_cpus; |
1283 | short uvhub; | 1509 | short uvhub; |
1284 | short pnode; | 1510 | short pnode; |
@@ -1286,57 +1512,84 @@ static void uv_init_per_cpu(int nuvhubs) | |||
1286 | }; | 1512 | }; |
1287 | struct uvhub_desc *uvhub_descs; | 1513 | struct uvhub_desc *uvhub_descs; |
1288 | 1514 | ||
1515 | timeout_us = calculate_destination_timeout(); | ||
1516 | |||
1289 | uvhub_descs = (struct uvhub_desc *) | 1517 | uvhub_descs = (struct uvhub_desc *) |
1290 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | 1518 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); |
1291 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | 1519 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); |
1520 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); | ||
1292 | for_each_present_cpu(cpu) { | 1521 | for_each_present_cpu(cpu) { |
1293 | bcp = &per_cpu(bau_control, cpu); | 1522 | bcp = &per_cpu(bau_control, cpu); |
1294 | memset(bcp, 0, sizeof(struct bau_control)); | 1523 | memset(bcp, 0, sizeof(struct bau_control)); |
1295 | spin_lock_init(&bcp->masks_lock); | ||
1296 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1297 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1524 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1298 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1525 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1526 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | ||
1299 | bdp = &uvhub_descs[uvhub]; | 1527 | bdp = &uvhub_descs[uvhub]; |
1300 | bdp->num_cpus++; | 1528 | bdp->num_cpus++; |
1301 | bdp->uvhub = uvhub; | 1529 | bdp->uvhub = uvhub; |
1302 | bdp->pnode = pnode; | 1530 | bdp->pnode = pnode; |
1303 | /* time interval to catch a hardware stay-busy bug */ | 1531 | /* kludge: 'assuming' one node per socket, and assuming that |
1304 | bcp->timeout_interval = millisec_2_cycles(3); | 1532 | disabling a socket just leaves a gap in node numbers */ |
1305 | /* kludge: assume uv_hub.h is constant */ | 1533 | socket = (cpu_to_node(cpu) & 1); |
1306 | socket = (cpu_physical_id(cpu)>>5)&1; | 1534 | bdp->socket_mask |= (1 << socket); |
1307 | if (socket >= bdp->num_sockets) | ||
1308 | bdp->num_sockets = socket+1; | ||
1309 | sdp = &bdp->socket[socket]; | 1535 | sdp = &bdp->socket[socket]; |
1310 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1536 | sdp->cpu_number[sdp->num_cpus] = cpu; |
1311 | sdp->num_cpus++; | 1537 | sdp->num_cpus++; |
1312 | } | 1538 | } |
1313 | socket = 0; | 1539 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1314 | for_each_possible_blade(uvhub) { | 1540 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) |
1541 | continue; | ||
1542 | have_hmaster = 0; | ||
1315 | bdp = &uvhub_descs[uvhub]; | 1543 | bdp = &uvhub_descs[uvhub]; |
1316 | for (i = 0; i < bdp->num_sockets; i++) { | 1544 | socket_mask = bdp->socket_mask; |
1317 | sdp = &bdp->socket[i]; | 1545 | socket = 0; |
1318 | for (j = 0; j < sdp->num_cpus; j++) { | 1546 | while (socket_mask) { |
1319 | cpu = sdp->cpu_number[j]; | 1547 | if (!(socket_mask & 1)) |
1548 | goto nextsocket; | ||
1549 | sdp = &bdp->socket[socket]; | ||
1550 | for (i = 0; i < sdp->num_cpus; i++) { | ||
1551 | cpu = sdp->cpu_number[i]; | ||
1320 | bcp = &per_cpu(bau_control, cpu); | 1552 | bcp = &per_cpu(bau_control, cpu); |
1321 | bcp->cpu = cpu; | 1553 | bcp->cpu = cpu; |
1322 | if (j == 0) { | 1554 | if (i == 0) { |
1323 | smaster = bcp; | 1555 | smaster = bcp; |
1324 | if (i == 0) | 1556 | if (!have_hmaster) { |
1557 | have_hmaster++; | ||
1325 | hmaster = bcp; | 1558 | hmaster = bcp; |
1559 | } | ||
1326 | } | 1560 | } |
1327 | bcp->cpus_in_uvhub = bdp->num_cpus; | 1561 | bcp->cpus_in_uvhub = bdp->num_cpus; |
1328 | bcp->cpus_in_socket = sdp->num_cpus; | 1562 | bcp->cpus_in_socket = sdp->num_cpus; |
1329 | bcp->socket_master = smaster; | 1563 | bcp->socket_master = smaster; |
1564 | bcp->uvhub = bdp->uvhub; | ||
1330 | bcp->uvhub_master = hmaster; | 1565 | bcp->uvhub_master = hmaster; |
1331 | for (k = 0; k < DEST_Q_SIZE; k++) | 1566 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> |
1332 | bcp->socket_acknowledge_count[k] = 0; | 1567 | blade_processor_id; |
1333 | bcp->uvhub_cpu = | ||
1334 | uv_cpu_hub_info(cpu)->blade_processor_id; | ||
1335 | } | 1568 | } |
1569 | nextsocket: | ||
1336 | socket++; | 1570 | socket++; |
1571 | socket_mask = (socket_mask >> 1); | ||
1337 | } | 1572 | } |
1338 | } | 1573 | } |
1339 | kfree(uvhub_descs); | 1574 | kfree(uvhub_descs); |
1575 | kfree(uvhub_mask); | ||
1576 | for_each_present_cpu(cpu) { | ||
1577 | bcp = &per_cpu(bau_control, cpu); | ||
1578 | bcp->baudisabled = 0; | ||
1579 | bcp->statp = &per_cpu(ptcstats, cpu); | ||
1580 | /* time interval to catch a hardware stay-busy bug */ | ||
1581 | bcp->timeout_interval = microsec_2_cycles(2*timeout_us); | ||
1582 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1583 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1584 | bcp->plugged_delay = plugged_delay; | ||
1585 | bcp->plugsb4reset = plugsb4reset; | ||
1586 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1587 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1588 | bcp->complete_threshold = complete_threshold; | ||
1589 | bcp->congested_response_us = congested_response_us; | ||
1590 | bcp->congested_reps = congested_reps; | ||
1591 | bcp->congested_period = congested_period; | ||
1592 | } | ||
1340 | } | 1593 | } |
1341 | 1594 | ||
1342 | /* | 1595 | /* |
@@ -1361,10 +1614,11 @@ static int __init uv_bau_init(void) | |||
1361 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 1614 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
1362 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 1615 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
1363 | 1616 | ||
1364 | uv_bau_max_concurrent = MAX_BAU_CONCURRENT; | ||
1365 | uv_nshift = uv_hub_info->m_val; | 1617 | uv_nshift = uv_hub_info->m_val; |
1366 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | 1618 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
1367 | nuvhubs = uv_num_possible_blades(); | 1619 | nuvhubs = uv_num_possible_blades(); |
1620 | spin_lock_init(&disable_lock); | ||
1621 | congested_cycles = microsec_2_cycles(congested_response_us); | ||
1368 | 1622 | ||
1369 | uv_init_per_cpu(nuvhubs); | 1623 | uv_init_per_cpu(nuvhubs); |
1370 | 1624 | ||
@@ -1383,15 +1637,19 @@ static int __init uv_bau_init(void) | |||
1383 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1637 | alloc_intr_gate(vector, uv_bau_message_intr1); |
1384 | 1638 | ||
1385 | for_each_possible_blade(uvhub) { | 1639 | for_each_possible_blade(uvhub) { |
1386 | pnode = uv_blade_to_pnode(uvhub); | 1640 | if (uv_blade_nr_possible_cpus(uvhub)) { |
1387 | /* INIT the bau */ | 1641 | pnode = uv_blade_to_pnode(uvhub); |
1388 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, | 1642 | /* INIT the bau */ |
1389 | ((unsigned long)1 << 63)); | 1643 | uv_write_global_mmr64(pnode, |
1390 | mmr = 1; /* should be 1 to broadcast to both sockets */ | 1644 | UVH_LB_BAU_SB_ACTIVATION_CONTROL, |
1391 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); | 1645 | ((unsigned long)1 << 63)); |
1646 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1647 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, | ||
1648 | mmr); | ||
1649 | } | ||
1392 | } | 1650 | } |
1393 | 1651 | ||
1394 | return 0; | 1652 | return 0; |
1395 | } | 1653 | } |
1396 | core_initcall(uv_bau_init); | 1654 | core_initcall(uv_bau_init); |
1397 | core_initcall(uv_ptc_init); | 1655 | fs_initcall(uv_ptc_init); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 725ef4d17cd5..60788dee0f8a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
392 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 392 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
393 | == NOTIFY_STOP) | 393 | == NOTIFY_STOP) |
394 | return; | 394 | return; |
395 | |||
395 | #ifdef CONFIG_X86_LOCAL_APIC | 396 | #ifdef CONFIG_X86_LOCAL_APIC |
397 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
398 | == NOTIFY_STOP) | ||
399 | return; | ||
400 | |||
401 | #ifndef CONFIG_LOCKUP_DETECTOR | ||
396 | /* | 402 | /* |
397 | * Ok, so this is none of the documented NMI sources, | 403 | * Ok, so this is none of the documented NMI sources, |
398 | * so it must be the NMI watchdog. | 404 | * so it must be the NMI watchdog. |
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
400 | if (nmi_watchdog_tick(regs, reason)) | 406 | if (nmi_watchdog_tick(regs, reason)) |
401 | return; | 407 | return; |
402 | if (!do_nmi_callback(regs, cpu)) | 408 | if (!do_nmi_callback(regs, cpu)) |
409 | #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||
403 | unknown_nmi_error(reason, regs); | 410 | unknown_nmi_error(reason, regs); |
404 | #else | 411 | #else |
405 | unknown_nmi_error(reason, regs); | 412 | unknown_nmi_error(reason, regs); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9faf91ae1841..ce8e50239332 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -751,7 +751,6 @@ static struct clocksource clocksource_tsc = { | |||
751 | .read = read_tsc, | 751 | .read = read_tsc, |
752 | .resume = resume_tsc, | 752 | .resume = resume_tsc, |
753 | .mask = CLOCKSOURCE_MASK(64), | 753 | .mask = CLOCKSOURCE_MASK(64), |
754 | .shift = 22, | ||
755 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 754 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
756 | CLOCK_SOURCE_MUST_VERIFY, | 755 | CLOCK_SOURCE_MUST_VERIFY, |
757 | #ifdef CONFIG_X86_64 | 756 | #ifdef CONFIG_X86_64 |
@@ -845,8 +844,6 @@ __cpuinit int unsynchronized_tsc(void) | |||
845 | 844 | ||
846 | static void __init init_tsc_clocksource(void) | 845 | static void __init init_tsc_clocksource(void) |
847 | { | 846 | { |
848 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | ||
849 | clocksource_tsc.shift); | ||
850 | if (tsc_clocksource_reliable) | 847 | if (tsc_clocksource_reliable) |
851 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 848 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
852 | /* lower the rating if we already know its unstable: */ | 849 | /* lower the rating if we already know its unstable: */ |
@@ -854,7 +851,7 @@ static void __init init_tsc_clocksource(void) | |||
854 | clocksource_tsc.rating = 0; | 851 | clocksource_tsc.rating = 0; |
855 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 852 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
856 | } | 853 | } |
857 | clocksource_register(&clocksource_tsc); | 854 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
858 | } | 855 | } |
859 | 856 | ||
860 | #ifdef CONFIG_X86_64 | 857 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S index 45b6f8a975a1..56a8c2a867d9 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu_64.S | |||
@@ -31,6 +31,7 @@ | |||
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
34 | #include <asm/msr-index.h> | ||
34 | 35 | ||
35 | verify_cpu: | 36 | verify_cpu: |
36 | pushfl # Save caller passed flags | 37 | pushfl # Save caller passed flags |
@@ -88,7 +89,7 @@ verify_cpu_sse_test: | |||
88 | je verify_cpu_sse_ok | 89 | je verify_cpu_sse_ok |
89 | test %di,%di | 90 | test %di,%di |
90 | jz verify_cpu_no_longmode # only try to force SSE on AMD | 91 | jz verify_cpu_no_longmode # only try to force SSE on AMD |
91 | movl $0xc0010015,%ecx # HWCR | 92 | movl $MSR_K7_HWCR,%ecx |
92 | rdmsr | 93 | rdmsr |
93 | btr $15,%eax # enable SSE | 94 | btr $15,%eax # enable SSE |
94 | wrmsr | 95 | wrmsr |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1c0c6ab9c60f..dcbb28c4b694 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -73,8 +73,8 @@ void update_vsyscall_tz(void) | |||
73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
74 | } | 74 | } |
75 | 75 | ||
76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, | 76 | void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, |
77 | u32 mult) | 77 | struct clocksource *clock, u32 mult) |
78 | { | 78 | { |
79 | unsigned long flags; | 79 | unsigned long flags; |
80 | 80 | ||
@@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, | |||
87 | vsyscall_gtod_data.clock.shift = clock->shift; | 87 | vsyscall_gtod_data.clock.shift = clock->shift; |
88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
90 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | 90 | vsyscall_gtod_data.wall_to_monotonic = *wtm; |
91 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 91 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); |
92 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 92 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
93 | } | 93 | } |
@@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | |||
169 | * unlikely */ | 169 | * unlikely */ |
170 | time_t __vsyscall(1) vtime(time_t *t) | 170 | time_t __vsyscall(1) vtime(time_t *t) |
171 | { | 171 | { |
172 | struct timeval tv; | 172 | unsigned seq; |
173 | time_t result; | 173 | time_t result; |
174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | 174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
175 | return time_syscall(t); | 175 | return time_syscall(t); |
176 | 176 | ||
177 | vgettimeofday(&tv, NULL); | 177 | do { |
178 | result = tv.tv_sec; | 178 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); |
179 | |||
180 | result = __vsyscall_gtod_data.wall_time_sec; | ||
181 | |||
182 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | ||
183 | |||
179 | if (t) | 184 | if (t) |
180 | *t = result; | 185 | *t = result; |
181 | return result; | 186 | return result; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 37e68fc5e24a..9c253bd65e24 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -16,11 +16,88 @@ | |||
16 | */ | 16 | */ |
17 | u64 pcntxt_mask; | 17 | u64 pcntxt_mask; |
18 | 18 | ||
19 | /* | ||
20 | * Represents init state for the supported extended state. | ||
21 | */ | ||
22 | static struct xsave_struct *init_xstate_buf; | ||
23 | |||
19 | struct _fpx_sw_bytes fx_sw_reserved; | 24 | struct _fpx_sw_bytes fx_sw_reserved; |
20 | #ifdef CONFIG_IA32_EMULATION | 25 | #ifdef CONFIG_IA32_EMULATION |
21 | struct _fpx_sw_bytes fx_sw_reserved_ia32; | 26 | struct _fpx_sw_bytes fx_sw_reserved_ia32; |
22 | #endif | 27 | #endif |
23 | 28 | ||
29 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | ||
30 | |||
31 | /* | ||
32 | * If a processor implementation discern that a processor state component is | ||
33 | * in its initialized state it may modify the corresponding bit in the | ||
34 | * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory | ||
35 | * layout in the case of xsaveopt. While presenting the xstate information to | ||
36 | * the user, we always ensure that the memory layout of a feature will be in | ||
37 | * the init state if the corresponding header bit is zero. This is to ensure | ||
38 | * that the user doesn't see some stale state in the memory layout during | ||
39 | * signal handling, debugging etc. | ||
40 | */ | ||
41 | void __sanitize_i387_state(struct task_struct *tsk) | ||
42 | { | ||
43 | u64 xstate_bv; | ||
44 | int feature_bit = 0x2; | ||
45 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | ||
46 | |||
47 | if (!fx) | ||
48 | return; | ||
49 | |||
50 | BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); | ||
51 | |||
52 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | ||
53 | |||
54 | /* | ||
55 | * None of the feature bits are in init state. So nothing else | ||
56 | * to do for us, as the memory layout is upto date. | ||
57 | */ | ||
58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) | ||
59 | return; | ||
60 | |||
61 | /* | ||
62 | * FP is in init state | ||
63 | */ | ||
64 | if (!(xstate_bv & XSTATE_FP)) { | ||
65 | fx->cwd = 0x37f; | ||
66 | fx->swd = 0; | ||
67 | fx->twd = 0; | ||
68 | fx->fop = 0; | ||
69 | fx->rip = 0; | ||
70 | fx->rdp = 0; | ||
71 | memset(&fx->st_space[0], 0, 128); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * SSE is in init state | ||
76 | */ | ||
77 | if (!(xstate_bv & XSTATE_SSE)) | ||
78 | memset(&fx->xmm_space[0], 0, 256); | ||
79 | |||
80 | xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; | ||
81 | |||
82 | /* | ||
83 | * Update all the other memory layouts for which the corresponding | ||
84 | * header bit is in the init state. | ||
85 | */ | ||
86 | while (xstate_bv) { | ||
87 | if (xstate_bv & 0x1) { | ||
88 | int offset = xstate_offsets[feature_bit]; | ||
89 | int size = xstate_sizes[feature_bit]; | ||
90 | |||
91 | memcpy(((void *) fx) + offset, | ||
92 | ((void *) init_xstate_buf) + offset, | ||
93 | size); | ||
94 | } | ||
95 | |||
96 | xstate_bv >>= 1; | ||
97 | feature_bit++; | ||
98 | } | ||
99 | } | ||
100 | |||
24 | /* | 101 | /* |
25 | * Check for the presence of extended state information in the | 102 | * Check for the presence of extended state information in the |
26 | * user fpstate pointer in the sigcontext. | 103 | * user fpstate pointer in the sigcontext. |
@@ -36,15 +113,14 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
36 | 113 | ||
37 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | 114 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], |
38 | sizeof(struct _fpx_sw_bytes)); | 115 | sizeof(struct _fpx_sw_bytes)); |
39 | |||
40 | if (err) | 116 | if (err) |
41 | return err; | 117 | return -EFAULT; |
42 | 118 | ||
43 | /* | 119 | /* |
44 | * First Magic check failed. | 120 | * First Magic check failed. |
45 | */ | 121 | */ |
46 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | 122 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) |
47 | return -1; | 123 | return -EINVAL; |
48 | 124 | ||
49 | /* | 125 | /* |
50 | * Check for error scenarios. | 126 | * Check for error scenarios. |
@@ -52,19 +128,21 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
52 | if (fx_sw_user->xstate_size < min_xstate_size || | 128 | if (fx_sw_user->xstate_size < min_xstate_size || |
53 | fx_sw_user->xstate_size > xstate_size || | 129 | fx_sw_user->xstate_size > xstate_size || |
54 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | 130 | fx_sw_user->xstate_size > fx_sw_user->extended_size) |
55 | return -1; | 131 | return -EINVAL; |
56 | 132 | ||
57 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + | 133 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + |
58 | fx_sw_user->extended_size - | 134 | fx_sw_user->extended_size - |
59 | FP_XSTATE_MAGIC2_SIZE)); | 135 | FP_XSTATE_MAGIC2_SIZE)); |
136 | if (err) | ||
137 | return err; | ||
60 | /* | 138 | /* |
61 | * Check for the presence of second magic word at the end of memory | 139 | * Check for the presence of second magic word at the end of memory |
62 | * layout. This detects the case where the user just copied the legacy | 140 | * layout. This detects the case where the user just copied the legacy |
63 | * fpstate layout with out copying the extended state information | 141 | * fpstate layout with out copying the extended state information |
64 | * in the memory layout. | 142 | * in the memory layout. |
65 | */ | 143 | */ |
66 | if (err || magic2 != FP_XSTATE_MAGIC2) | 144 | if (magic2 != FP_XSTATE_MAGIC2) |
67 | return -1; | 145 | return -EFAULT; |
68 | 146 | ||
69 | return 0; | 147 | return 0; |
70 | } | 148 | } |
@@ -91,14 +169,6 @@ int save_i387_xstate(void __user *buf) | |||
91 | return 0; | 169 | return 0; |
92 | 170 | ||
93 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 171 | if (task_thread_info(tsk)->status & TS_USEDFPU) { |
94 | /* | ||
95 | * Start with clearing the user buffer. This will present a | ||
96 | * clean context for the bytes not touched by the fxsave/xsave. | ||
97 | */ | ||
98 | err = __clear_user(buf, sig_xstate_size); | ||
99 | if (err) | ||
100 | return err; | ||
101 | |||
102 | if (use_xsave()) | 172 | if (use_xsave()) |
103 | err = xsave_user(buf); | 173 | err = xsave_user(buf); |
104 | else | 174 | else |
@@ -109,6 +179,7 @@ int save_i387_xstate(void __user *buf) | |||
109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 179 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
110 | stts(); | 180 | stts(); |
111 | } else { | 181 | } else { |
182 | sanitize_i387_state(tsk); | ||
112 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | 183 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
113 | xstate_size)) | 184 | xstate_size)) |
114 | return -1; | 185 | return -1; |
@@ -184,8 +255,8 @@ static int restore_user_xstate(void __user *buf) | |||
184 | * init the state skipped by the user. | 255 | * init the state skipped by the user. |
185 | */ | 256 | */ |
186 | mask = pcntxt_mask & ~mask; | 257 | mask = pcntxt_mask & ~mask; |
187 | 258 | if (unlikely(mask)) | |
188 | xrstor_state(init_xstate_buf, mask); | 259 | xrstor_state(init_xstate_buf, mask); |
189 | 260 | ||
190 | return 0; | 261 | return 0; |
191 | 262 | ||
@@ -274,11 +345,6 @@ static void prepare_fx_sw_frame(void) | |||
274 | #endif | 345 | #endif |
275 | } | 346 | } |
276 | 347 | ||
277 | /* | ||
278 | * Represents init state for the supported extended state. | ||
279 | */ | ||
280 | struct xsave_struct *init_xstate_buf; | ||
281 | |||
282 | #ifdef CONFIG_X86_64 | 348 | #ifdef CONFIG_X86_64 |
283 | unsigned int sig_xstate_size = sizeof(struct _fpstate); | 349 | unsigned int sig_xstate_size = sizeof(struct _fpstate); |
284 | #endif | 350 | #endif |
@@ -286,37 +352,77 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); | |||
286 | /* | 352 | /* |
287 | * Enable the extended processor state save/restore feature | 353 | * Enable the extended processor state save/restore feature |
288 | */ | 354 | */ |
289 | void __cpuinit xsave_init(void) | 355 | static inline void xstate_enable(void) |
290 | { | 356 | { |
291 | if (!cpu_has_xsave) | ||
292 | return; | ||
293 | |||
294 | set_in_cr4(X86_CR4_OSXSAVE); | 357 | set_in_cr4(X86_CR4_OSXSAVE); |
295 | |||
296 | /* | ||
297 | * Enable all the features that the HW is capable of | ||
298 | * and the Linux kernel is aware of. | ||
299 | */ | ||
300 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 358 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
301 | } | 359 | } |
302 | 360 | ||
303 | /* | 361 | /* |
362 | * Record the offsets and sizes of different state managed by the xsave | ||
363 | * memory layout. | ||
364 | */ | ||
365 | static void __init setup_xstate_features(void) | ||
366 | { | ||
367 | int eax, ebx, ecx, edx, leaf = 0x2; | ||
368 | |||
369 | xstate_features = fls64(pcntxt_mask); | ||
370 | xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); | ||
371 | xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); | ||
372 | |||
373 | do { | ||
374 | cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); | ||
375 | |||
376 | if (eax == 0) | ||
377 | break; | ||
378 | |||
379 | xstate_offsets[leaf] = ebx; | ||
380 | xstate_sizes[leaf] = eax; | ||
381 | |||
382 | leaf++; | ||
383 | } while (1); | ||
384 | } | ||
385 | |||
386 | /* | ||
304 | * setup the xstate image representing the init state | 387 | * setup the xstate image representing the init state |
305 | */ | 388 | */ |
306 | static void __init setup_xstate_init(void) | 389 | static void __init setup_xstate_init(void) |
307 | { | 390 | { |
391 | setup_xstate_features(); | ||
392 | |||
393 | /* | ||
394 | * Setup init_xstate_buf to represent the init state of | ||
395 | * all the features managed by the xsave | ||
396 | */ | ||
308 | init_xstate_buf = alloc_bootmem(xstate_size); | 397 | init_xstate_buf = alloc_bootmem(xstate_size); |
309 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 398 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; |
399 | |||
400 | clts(); | ||
401 | /* | ||
402 | * Init all the features state with header_bv being 0x0 | ||
403 | */ | ||
404 | xrstor_state(init_xstate_buf, -1); | ||
405 | /* | ||
406 | * Dump the init state again. This is to identify the init state | ||
407 | * of any feature which is not represented by all zero's. | ||
408 | */ | ||
409 | xsave_state(init_xstate_buf, -1); | ||
410 | stts(); | ||
310 | } | 411 | } |
311 | 412 | ||
312 | /* | 413 | /* |
313 | * Enable and initialize the xsave feature. | 414 | * Enable and initialize the xsave feature. |
314 | */ | 415 | */ |
315 | void __ref xsave_cntxt_init(void) | 416 | static void __init xstate_enable_boot_cpu(void) |
316 | { | 417 | { |
317 | unsigned int eax, ebx, ecx, edx; | 418 | unsigned int eax, ebx, ecx, edx; |
318 | 419 | ||
319 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | 420 | if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { |
421 | WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | ||
320 | pcntxt_mask = eax + ((u64)edx << 32); | 426 | pcntxt_mask = eax + ((u64)edx << 32); |
321 | 427 | ||
322 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | 428 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { |
@@ -329,12 +435,13 @@ void __ref xsave_cntxt_init(void) | |||
329 | * Support only the state known to OS. | 435 | * Support only the state known to OS. |
330 | */ | 436 | */ |
331 | pcntxt_mask = pcntxt_mask & XCNTXT_MASK; | 437 | pcntxt_mask = pcntxt_mask & XCNTXT_MASK; |
332 | xsave_init(); | 438 | |
439 | xstate_enable(); | ||
333 | 440 | ||
334 | /* | 441 | /* |
335 | * Recompute the context size for enabled features | 442 | * Recompute the context size for enabled features |
336 | */ | 443 | */ |
337 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | 444 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
338 | xstate_size = ebx; | 445 | xstate_size = ebx; |
339 | 446 | ||
340 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 447 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
@@ -346,3 +453,23 @@ void __ref xsave_cntxt_init(void) | |||
346 | "cntxt size 0x%x\n", | 453 | "cntxt size 0x%x\n", |
347 | pcntxt_mask, xstate_size); | 454 | pcntxt_mask, xstate_size); |
348 | } | 455 | } |
456 | |||
457 | /* | ||
458 | * For the very first instance, this calls xstate_enable_boot_cpu(); | ||
459 | * for all subsequent instances, this calls xstate_enable(). | ||
460 | * | ||
461 | * This is somewhat obfuscated due to the lack of powerful enough | ||
462 | * overrides for the section checks. | ||
463 | */ | ||
464 | void __cpuinit xsave_init(void) | ||
465 | { | ||
466 | static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; | ||
467 | void (*this_func)(void); | ||
468 | |||
469 | if (!cpu_has_xsave) | ||
470 | return; | ||
471 | |||
472 | this_func = next_func; | ||
473 | next_func = xstate_enable; | ||
474 | this_func(); | ||
475 | } | ||