aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-10-30 16:16:56 -0400
committerJiri Kosina <jkosina@suse.cz>2010-10-30 16:16:56 -0400
commitf1e095f1d206b81b44160f41278ce5c78641e9b7 (patch)
treebd293d46d2d3e4cdf435a22ddb2877c6ba1b8acc /arch/x86/kernel
parentb0438a1b71955c425c304a2a483765ef24841766 (diff)
parent1792f17b7210280a3d7ff29da9614ba779cfcedb (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c60
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c24
-rw-r--r--arch/x86/kernel/bios_uv.c215
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c26
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
-rw-r--r--arch/x86/kernel/crash_dump_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/efi.c613
-rw-r--r--arch/x86/kernel/efi_32.c112
-rw-r--r--arch/x86/kernel/efi_64.c114
-rw-r--r--arch/x86/kernel/efi_stub_32.S123
-rw-r--r--arch/x86/kernel/efi_stub_64.S116
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/irq_32.c17
-rw-r--r--arch/x86/kernel/kgdb.c3
-rw-r--r--arch/x86/kernel/mrst.c311
-rw-r--r--arch/x86/kernel/olpc-xo1.c140
-rw-r--r--arch/x86/kernel/olpc.c281
-rw-r--r--arch/x86/kernel/olpc_ofw.c112
-rw-r--r--arch/x86/kernel/ptrace.c17
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/scx200_32.c131
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sfi.c120
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/tlb_uv.c1661
-rw-r--r--arch/x86/kernel/uv_irq.c285
-rw-r--r--arch/x86/kernel/uv_sysfs.c76
-rw-r--r--arch/x86/kernel/uv_time.c423
-rw-r--r--arch/x86/kernel/visws_quirks.c614
-rw-r--r--arch/x86/kernel/x86_init.c7
41 files changed, 302 insertions, 5658 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2c833d8c4141..9e13763b6092 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,6 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o 38obj-$(CONFIG_IRQ_WORK) += irq_work.o
39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
40obj-$(CONFIG_X86_32) += probe_roms_32.o 39obj-$(CONFIG_X86_32) += probe_roms_32.o
41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
42obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
@@ -58,7 +57,6 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o
58obj-$(CONFIG_STACKTRACE) += stacktrace.o 57obj-$(CONFIG_STACKTRACE) += stacktrace.o
59obj-y += cpu/ 58obj-y += cpu/
60obj-y += acpi/ 59obj-y += acpi/
61obj-$(CONFIG_SFI) += sfi.o
62obj-y += reboot.o 60obj-y += reboot.o
63obj-$(CONFIG_MCA) += mca_32.o 61obj-$(CONFIG_MCA) += mca_32.o
64obj-$(CONFIG_X86_MSR) += msr.o 62obj-$(CONFIG_X86_MSR) += msr.o
@@ -82,7 +80,6 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
82obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 80obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
83obj-$(CONFIG_KPROBES) += kprobes.o 81obj-$(CONFIG_KPROBES) += kprobes.o
84obj-$(CONFIG_MODULES) += module.o 82obj-$(CONFIG_MODULES) += module.o
85obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
86obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 83obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
87obj-$(CONFIG_KGDB) += kgdb.o 84obj-$(CONFIG_KGDB) += kgdb.o
88obj-$(CONFIG_VM86) += vm86_32.o 85obj-$(CONFIG_VM86) += vm86_32.o
@@ -104,14 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
104 101
105obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 102obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
106 103
107obj-$(CONFIG_SCx200) += scx200.o
108scx200-y += scx200_32.o
109
110obj-$(CONFIG_OLPC) += olpc.o
111obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
112obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
113obj-$(CONFIG_X86_MRST) += mrst.o
114
115microcode-y := microcode_core.o 104microcode-y := microcode_core.o
116microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 105microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
117microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 106microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
@@ -124,7 +113,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
124### 113###
125# 64 bit specific files 114# 64 bit specific files
126ifeq ($(CONFIG_X86_64),y) 115ifeq ($(CONFIG_X86_64),y)
127 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
128 obj-$(CONFIG_AUDIT) += audit_64.o 116 obj-$(CONFIG_AUDIT) += audit_64.o
129 117
130 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 118 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c05872aa3ce0..71232b941b6c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
513 return 0; 513 return 0;
514} 514}
515 515
516/* 516static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
517 * success: return IRQ number (>=0) 517 int trigger, int polarity)
518 * failure: return < 0
519 */
520int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
521{ 518{
522 unsigned int irq;
523 unsigned int plat_gsi = gsi;
524
525#ifdef CONFIG_PCI 519#ifdef CONFIG_PCI
526 /* 520 /*
527 * Make sure all (legacy) PCI IRQs are set as level-triggered. 521 * Make sure all (legacy) PCI IRQs are set as level-triggered.
528 */ 522 */
529 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 523 if (trigger == ACPI_LEVEL_SENSITIVE)
530 if (trigger == ACPI_LEVEL_SENSITIVE) 524 eisa_set_level_irq(gsi);
531 eisa_set_level_irq(gsi);
532 }
533#endif 525#endif
534 526
527 return gsi;
528}
529
530static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
531 int trigger, int polarity)
532{
535#ifdef CONFIG_X86_IO_APIC 533#ifdef CONFIG_X86_IO_APIC
536 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { 534 gsi = mp_register_gsi(dev, gsi, trigger, polarity);
537 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
538 }
539#endif 535#endif
536
537 return gsi;
538}
539
540int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
541 int trigger, int polarity) = acpi_register_gsi_pic;
542
543/*
544 * success: return IRQ number (>=0)
545 * failure: return < 0
546 */
547int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
548{
549 unsigned int irq;
550 unsigned int plat_gsi = gsi;
551
552 plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
540 irq = gsi_to_irq(plat_gsi); 553 irq = gsi_to_irq(plat_gsi);
541 554
542 return irq; 555 return irq;
543} 556}
544 557
558void __init acpi_set_irq_model_pic(void)
559{
560 acpi_irq_model = ACPI_IRQ_MODEL_PIC;
561 __acpi_register_gsi = acpi_register_gsi_pic;
562 acpi_ioapic = 0;
563}
564
565void __init acpi_set_irq_model_ioapic(void)
566{
567 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
568 __acpi_register_gsi = acpi_register_gsi_ioapic;
569 acpi_ioapic = 1;
570}
571
545/* 572/*
546 * ACPI based hotplug support for CPU 573 * ACPI based hotplug support for CPU
547 */ 574 */
@@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void)
1259 */ 1286 */
1260 error = acpi_parse_madt_ioapic_entries(); 1287 error = acpi_parse_madt_ioapic_entries();
1261 if (!error) { 1288 if (!error) {
1262 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 1289 acpi_set_irq_model_ioapic();
1263 acpi_ioapic = 1;
1264 1290
1265 smp_found_config = 1; 1291 smp_found_config = 1;
1266 } 1292 }
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 74a847835bab..69fd72aa5594 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -15,7 +15,6 @@
15 15
16#ifdef CONFIG_X86_32 16#ifdef CONFIG_X86_32
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18#include <asm/pgtable_32.h>
19#endif 18#endif
20 19
21#include "realmode/wakeup.h" 20#include "realmode/wakeup.h"
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a36bb90aef53..5079f24c955a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -638,71 +638,32 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 641 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
642 return addr; 642 return addr;
643} 643}
644 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; 647#ifdef CONFIG_X86_64
648unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
649#else
650unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
651#endif
648 652
649void __init arch_init_ideal_nop5(void) 653void __init arch_init_ideal_nop5(void)
650{ 654{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /* 655 /*
657 * There is no good nop for all x86 archs. 656 * There is no good nop for all x86 archs. This selection
658 * We will default to using the P6_NOP5, but first we 657 * algorithm should be unified with the one in find_nop_table(),
659 * will test to make sure that the nop will actually 658 * but this should be good enough for now.
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 * 659 *
668 * TODO: check the cpuid to determine the best nop. 660 * For cases other than the ones below, use the safe (as in
661 * always functional) defaults above.
669 */ 662 */
670 asm volatile ( 663#ifdef CONFIG_X86_64
671 "ftrace_test_jmp:" 664 /* Don't use these on 32 bits due to broken virtualizers */
672 "jmp ftrace_test_p6nop\n" 665 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
673 "nop\n" 666 memcpy(ideal_nop5, p6_nops[5], 5);
674 "nop\n" 667#endif
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707} 668}
708#endif 669#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ae808d110f4..7cc0a721f628 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq)
3109 3109
3110 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3110 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3111 3111
3112 if (intr_remapping_enabled) 3112 if (irq_remapped(cfg))
3113 free_irte(irq); 3113 free_irte(irq);
3114 raw_spin_lock_irqsave(&vector_lock, flags); 3114 raw_spin_lock_irqsave(&vector_lock, flags);
3115 __clear_irq_vector(irq, cfg); 3115 __clear_irq_vector(irq, cfg);
@@ -3331,7 +3331,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3331 return 0; 3331 return 0;
3332} 3332}
3333 3333
3334int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3334int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3335{ 3335{
3336 int node, ret, sub_handle, index = 0; 3336 int node, ret, sub_handle, index = 0;
3337 unsigned int irq, irq_want; 3337 unsigned int irq, irq_want;
@@ -3389,7 +3389,7 @@ error:
3389 return ret; 3389 return ret;
3390} 3390}
3391 3391
3392void arch_teardown_msi_irq(unsigned int irq) 3392void native_teardown_msi_irq(unsigned int irq)
3393{ 3393{
3394 destroy_irq(irq); 3394 destroy_irq(irq);
3395} 3395}
@@ -3650,6 +3650,11 @@ void __init probe_nr_irqs_gsi(void)
3650 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); 3650 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3651} 3651}
3652 3652
3653int get_nr_irqs_gsi(void)
3654{
3655 return nr_irqs_gsi;
3656}
3657
3653#ifdef CONFIG_SPARSE_IRQ 3658#ifdef CONFIG_SPARSE_IRQ
3654int __init arch_probe_nr_irqs(void) 3659int __init arch_probe_nr_irqs(void)
3655{ 3660{
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54cb248..ed4118de249e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -41,6 +41,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
41 41
42static enum uv_system_type uv_system_type; 42static enum uv_system_type uv_system_type;
43static u64 gru_start_paddr, gru_end_paddr; 43static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid;
44int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
45EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
46static DEFINE_SPINLOCK(uv_nmi_lock); 47static DEFINE_SPINLOCK(uv_nmi_lock);
@@ -70,12 +71,27 @@ static int early_get_nodeid(void)
70 return node_id.s.node_id; 71 return node_id.s.node_id;
71} 72}
72 73
74static void __init early_get_apic_pnode_shift(void)
75{
76 unsigned long *mmr;
77
78 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
79 uvh_apicid.v = *mmr;
80 early_iounmap(mmr, sizeof(*mmr));
81 if (!uvh_apicid.v)
82 /*
83 * Old bios, use default value
84 */
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86}
87
73static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
74{ 89{
75 int nodeid; 90 int nodeid;
76 91
77 if (!strcmp(oem_id, "SGI")) { 92 if (!strcmp(oem_id, "SGI")) {
78 nodeid = early_get_nodeid(); 93 nodeid = early_get_nodeid();
94 early_get_apic_pnode_shift();
79 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 95 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
80 x86_platform.nmi_init = uv_nmi_init; 96 x86_platform.nmi_init = uv_nmi_init;
81 if (!strcmp(oem_table_id, "UVL")) 97 if (!strcmp(oem_table_id, "UVL"))
@@ -84,7 +100,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
84 uv_system_type = UV_X2APIC; 100 uv_system_type = UV_X2APIC;
85 else if (!strcmp(oem_table_id, "UVH")) { 101 else if (!strcmp(oem_table_id, "UVH")) {
86 __get_cpu_var(x2apic_extra_bits) = 102 __get_cpu_var(x2apic_extra_bits) =
87 nodeid << (UV_APIC_PNODE_SHIFT - 1); 103 nodeid << (uvh_apicid.s.pnode_shift - 1);
88 uv_system_type = UV_NON_UNIQUE_APIC; 104 uv_system_type = UV_NON_UNIQUE_APIC;
89 return 1; 105 return 1;
90 } 106 }
@@ -716,6 +732,10 @@ void __init uv_system_init(void)
716 int apicid = per_cpu(x86_cpu_to_apicid, cpu); 732 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
717 733
718 nid = cpu_to_node(cpu); 734 nid = cpu_to_node(cpu);
735 /*
736 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
737 */
738 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
719 pnode = uv_apicid_to_pnode(apicid); 739 pnode = uv_apicid_to_pnode(apicid);
720 blade = boot_pnode_to_blade(pnode); 740 blade = boot_pnode_to_blade(pnode);
721 lcpu = uv_blade_info[blade].nr_possible_cpus; 741 lcpu = uv_blade_info[blade].nr_possible_cpus;
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
deleted file mode 100644
index 8bc57baaa9ad..000000000000
--- a/arch/x86/kernel/bios_uv.c
+++ /dev/null
@@ -1,215 +0,0 @@
1/*
2 * BIOS run time interface routines.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson <rja@sgi.com>
20 */
21
22#include <linux/efi.h>
23#include <asm/efi.h>
24#include <linux/io.h>
25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h>
27
28static struct uv_systab uv_systab;
29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{
32 struct uv_systab *tab = &uv_systab;
33 s64 ret;
34
35 if (!tab->function)
36 /*
37 * BIOS does not support UV systab
38 */
39 return BIOS_STATUS_UNIMPLEMENTED;
40
41 ret = efi_call6((void *)__va(tab->function), (u64)which,
42 a1, a2, a3, a4, a5);
43 return ret;
44}
45EXPORT_SYMBOL_GPL(uv_bios_call);
46
47s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
48 u64 a4, u64 a5)
49{
50 unsigned long bios_flags;
51 s64 ret;
52
53 local_irq_save(bios_flags);
54 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
55 local_irq_restore(bios_flags);
56
57 return ret;
58}
59
60s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
61 u64 a4, u64 a5)
62{
63 s64 ret;
64
65 preempt_disable();
66 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
67 preempt_enable();
68
69 return ret;
70}
71
72
73long sn_partition_id;
74EXPORT_SYMBOL_GPL(sn_partition_id);
75long sn_coherency_id;
76EXPORT_SYMBOL_GPL(sn_coherency_id);
77long sn_region_size;
78EXPORT_SYMBOL_GPL(sn_region_size);
79long system_serial_number;
80EXPORT_SYMBOL_GPL(system_serial_number);
81int uv_type;
82EXPORT_SYMBOL_GPL(uv_type);
83
84
85s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
86 long *region, long *ssn)
87{
88 s64 ret;
89 u64 v0, v1;
90 union partition_info_u part;
91
92 ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
93 (u64)(&v0), (u64)(&v1), 0, 0);
94 if (ret != BIOS_STATUS_SUCCESS)
95 return ret;
96
97 part.val = v0;
98 if (uvtype)
99 *uvtype = part.hub_version;
100 if (partid)
101 *partid = part.partition_id;
102 if (coher)
103 *coher = part.coherence_id;
104 if (region)
105 *region = part.region_size;
106 if (ssn)
107 *ssn = v1;
108 return ret;
109}
110EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
111
112int
113uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
114 unsigned long *intr_mmr_offset)
115{
116 u64 watchlist;
117 s64 ret;
118
119 /*
120 * bios returns watchlist number or negative error number.
121 */
122 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
123 mq_size, (u64)intr_mmr_offset,
124 (u64)&watchlist, 0);
125 if (ret < BIOS_STATUS_SUCCESS)
126 return ret;
127
128 return watchlist;
129}
130EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
131
132int
133uv_bios_mq_watchlist_free(int blade, int watchlist_num)
134{
135 return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
136 blade, watchlist_num, 0, 0, 0);
137}
138EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
139
140s64
141uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
142{
143 return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
144 perms, 0, 0);
145}
146EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
147
148s64
149uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
150{
151 s64 ret;
152
153 ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
154 (u64)addr, buf, (u64)len, 0);
155 return ret;
156}
157EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
158
159s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
160{
161 return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
162 (u64)ticks_per_second, 0, 0, 0);
163}
164EXPORT_SYMBOL_GPL(uv_bios_freq_base);
165
166/*
167 * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
168 * @decode: true to enable target, false to disable target
169 * @domain: PCI domain number
170 * @bus: PCI bus number
171 *
172 * Returns:
173 * 0: Success
174 * -EINVAL: Invalid domain or bus number
175 * -ENOSYS: Capability not available
176 * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
177 */
178int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
179{
180 return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
181 (u64)decode, (u64)domain, (u64)bus, 0, 0);
182}
183EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
184
185
186#ifdef CONFIG_EFI
187void uv_bios_init(void)
188{
189 struct uv_systab *tab;
190
191 if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
192 (efi.uv_systab == (unsigned long)NULL)) {
193 printk(KERN_CRIT "No EFI UV System Table.\n");
194 uv_systab.function = (unsigned long)NULL;
195 return;
196 }
197
198 tab = (struct uv_systab *)ioremap(efi.uv_systab,
199 sizeof(struct uv_systab));
200 if (strncmp(tab->signature, "UVST", 4) != 0)
201 printk(KERN_ERR "bad signature in UV system table!");
202
203 /*
204 * Copy table to permanent spot for later use.
205 */
206 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
207 iounmap(tab);
208
209 printk(KERN_INFO "EFI UV System Table Revision %d\n",
210 uv_systab.revision);
211}
212#else /* !CONFIG_EFI */
213
214void uv_bios_init(void) { }
215#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index cd8da247dda1..a2baafb2fe6d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -701,6 +701,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
701 per_cpu(acfreq_data, policy->cpu) = NULL; 701 per_cpu(acfreq_data, policy->cpu) = NULL;
702 acpi_processor_unregister_performance(data->acpi_data, 702 acpi_processor_unregister_performance(data->acpi_data,
703 policy->cpu); 703 policy->cpu);
704 kfree(data->freq_table);
704 kfree(data); 705 kfree(data);
705 } 706 }
706 707
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 733093d60436..141abebc4516 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = {
393 * Detects nForce2 A2 and C1 stepping 393 * Detects nForce2 A2 and C1 stepping
394 * 394 *
395 */ 395 */
396static unsigned int nforce2_detect_chipset(void) 396static int nforce2_detect_chipset(void)
397{ 397{
398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
399 PCI_DEVICE_ID_NVIDIA_NFORCE2, 399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index fc09f142d94d..d9f51367666b 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq;
35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS 35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
36 * and MSR_TMTA_LONGRUN_CTRL 36 * and MSR_TMTA_LONGRUN_CTRL
37 */ 37 */
38static void __init longrun_get_policy(struct cpufreq_policy *policy) 38static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
39{ 39{
40 u32 msr_lo, msr_hi; 40 u32 msr_lo, msr_hi;
41 41
@@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu)
165 * TMTA rules: 165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) 166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */ 167 */
168static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, 168static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq) 169 unsigned int *high_freq)
170{ 170{
171 u32 msr_lo, msr_hi; 171 u32 msr_lo, msr_hi;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 12cd823c8d03..17ad03366211 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,6 +327,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
327 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); 327 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
328 328
329 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; 329 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
330} 331}
331 332
332static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 333static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fe73c1844a9a..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -49,7 +49,6 @@ static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{ 50{
51 unsigned long offset, addr = (unsigned long)from; 51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0; 52 unsigned long size, len = 0;
54 struct page *page; 53 struct page *page;
55 void *map; 54 void *map;
@@ -63,9 +62,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
63 offset = addr & (PAGE_SIZE - 1); 62 offset = addr & (PAGE_SIZE - 1);
64 size = min(PAGE_SIZE - offset, n - len); 63 size = min(PAGE_SIZE - offset, n - len);
65 64
66 map = kmap_atomic(page, type); 65 map = kmap_atomic(page);
67 memcpy(to, map+offset, size); 66 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type); 67 kunmap_atomic(map);
69 put_page(page); 68 put_page(page);
70 69
71 len += size; 70 len += size;
@@ -238,6 +237,7 @@ struct x86_pmu {
238 * Intel DebugStore bits 237 * Intel DebugStore bits
239 */ 238 */
240 int bts, pebs; 239 int bts, pebs;
240 int bts_active, pebs_active;
241 int pebs_record_size; 241 int pebs_record_size;
242 void (*drain_pebs)(struct pt_regs *regs); 242 void (*drain_pebs)(struct pt_regs *regs);
243 struct event_constraint *pebs_constraints; 243 struct event_constraint *pebs_constraints;
@@ -381,7 +381,7 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static int reserve_ds_buffers(void); 384static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 385static void release_ds_buffers(void);
386 386
387static void hw_perf_event_destroy(struct perf_event *event) 387static void hw_perf_event_destroy(struct perf_event *event)
@@ -478,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
479 (hwc->sample_period == 1)) { 479 (hwc->sample_period == 1)) {
480 /* BTS is not supported by this architecture. */ 480 /* BTS is not supported by this architecture. */
481 if (!x86_pmu.bts) 481 if (!x86_pmu.bts_active)
482 return -EOPNOTSUPP; 482 return -EOPNOTSUPP;
483 483
484 /* BTS is currently only allowed for user-mode. */ 484 /* BTS is currently only allowed for user-mode. */
@@ -497,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
497 int precise = 0; 497 int precise = 0;
498 498
499 /* Support for constant skid */ 499 /* Support for constant skid */
500 if (x86_pmu.pebs) 500 if (x86_pmu.pebs_active) {
501 precise++; 501 precise++;
502 502
503 /* Support for IP fixup */ 503 /* Support for IP fixup */
504 if (x86_pmu.lbr_nr) 504 if (x86_pmu.lbr_nr)
505 precise++; 505 precise++;
506 }
506 507
507 if (event->attr.precise_ip > precise) 508 if (event->attr.precise_ip > precise)
508 return -EOPNOTSUPP; 509 return -EOPNOTSUPP;
@@ -544,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
544 if (atomic_read(&active_events) == 0) { 545 if (atomic_read(&active_events) == 0) {
545 if (!reserve_pmc_hardware()) 546 if (!reserve_pmc_hardware())
546 err = -EBUSY; 547 err = -EBUSY;
547 else { 548 else
548 err = reserve_ds_buffers(); 549 reserve_ds_buffers();
549 if (err)
550 release_pmc_hardware();
551 }
552 } 550 }
553 if (!err) 551 if (!err)
554 atomic_inc(&active_events); 552 atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75} 75}
76 76
77static int alloc_pebs_buffer(int cpu)
78{
79 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
80 int node = cpu_to_node(cpu);
81 int max, thresh = 1; /* always use a single PEBS record */
82 void *buffer;
83
84 if (!x86_pmu.pebs)
85 return 0;
86
87 buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
88 if (unlikely(!buffer))
89 return -ENOMEM;
90
91 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
92
93 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
94 ds->pebs_index = ds->pebs_buffer_base;
95 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
96 max * x86_pmu.pebs_record_size;
97
98 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
99 thresh * x86_pmu.pebs_record_size;
100
101 return 0;
102}
103
104static void release_pebs_buffer(int cpu)
105{
106 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
107
108 if (!ds || !x86_pmu.pebs)
109 return;
110
111 kfree((void *)(unsigned long)ds->pebs_buffer_base);
112 ds->pebs_buffer_base = 0;
113}
114
115static int alloc_bts_buffer(int cpu)
116{
117 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
118 int node = cpu_to_node(cpu);
119 int max, thresh;
120 void *buffer;
121
122 if (!x86_pmu.bts)
123 return 0;
124
125 buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
126 if (unlikely(!buffer))
127 return -ENOMEM;
128
129 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
130 thresh = max / 16;
131
132 ds->bts_buffer_base = (u64)(unsigned long)buffer;
133 ds->bts_index = ds->bts_buffer_base;
134 ds->bts_absolute_maximum = ds->bts_buffer_base +
135 max * BTS_RECORD_SIZE;
136 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
137 thresh * BTS_RECORD_SIZE;
138
139 return 0;
140}
141
142static void release_bts_buffer(int cpu)
143{
144 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
145
146 if (!ds || !x86_pmu.bts)
147 return;
148
149 kfree((void *)(unsigned long)ds->bts_buffer_base);
150 ds->bts_buffer_base = 0;
151}
152
153static int alloc_ds_buffer(int cpu)
154{
155 int node = cpu_to_node(cpu);
156 struct debug_store *ds;
157
158 ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
159 if (unlikely(!ds))
160 return -ENOMEM;
161
162 per_cpu(cpu_hw_events, cpu).ds = ds;
163
164 return 0;
165}
166
167static void release_ds_buffer(int cpu)
168{
169 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170
171 if (!ds)
172 return;
173
174 per_cpu(cpu_hw_events, cpu).ds = NULL;
175 kfree(ds);
176}
177
77static void release_ds_buffers(void) 178static void release_ds_buffers(void)
78{ 179{
79 int cpu; 180 int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
82 return; 183 return;
83 184
84 get_online_cpus(); 185 get_online_cpus();
85
86 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu); 187 fini_debug_store_on_cpu(cpu);
88 188
89 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 190 release_pebs_buffer(cpu);
91 191 release_bts_buffer(cpu);
92 if (!ds) 192 release_ds_buffer(cpu);
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 } 193 }
101
102 put_online_cpus(); 194 put_online_cpus();
103} 195}
104 196
105static int reserve_ds_buffers(void) 197static void reserve_ds_buffers(void)
106{ 198{
107 int cpu, err = 0; 199 int bts_err = 0, pebs_err = 0;
200 int cpu;
201
202 x86_pmu.bts_active = 0;
203 x86_pmu.pebs_active = 0;
108 204
109 if (!x86_pmu.bts && !x86_pmu.pebs) 205 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0; 206 return;
207
208 if (!x86_pmu.bts)
209 bts_err = 1;
210
211 if (!x86_pmu.pebs)
212 pebs_err = 1;
111 213
112 get_online_cpus(); 214 get_online_cpus();
113 215
114 for_each_possible_cpu(cpu) { 216 for_each_possible_cpu(cpu) {
115 struct debug_store *ds; 217 if (alloc_ds_buffer(cpu)) {
116 void *buffer; 218 bts_err = 1;
117 int max, thresh; 219 pebs_err = 1;
220 }
221
222 if (!bts_err && alloc_bts_buffer(cpu))
223 bts_err = 1;
118 224
119 err = -ENOMEM; 225 if (!pebs_err && alloc_pebs_buffer(cpu))
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 226 pebs_err = 1;
121 if (unlikely(!ds)) 227
228 if (bts_err && pebs_err)
122 break; 229 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds; 230 }
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140 231
141 if (x86_pmu.pebs) { 232 if (bts_err) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 233 for_each_possible_cpu(cpu)
143 if (unlikely(!buffer)) 234 release_bts_buffer(cpu);
144 break; 235 }
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158 236
159 err = 0; 237 if (pebs_err) {
238 for_each_possible_cpu(cpu)
239 release_pebs_buffer(cpu);
160 } 240 }
161 241
162 if (err) 242 if (bts_err && pebs_err) {
163 release_ds_buffers(); 243 for_each_possible_cpu(cpu)
164 else { 244 release_ds_buffer(cpu);
245 } else {
246 if (x86_pmu.bts && !bts_err)
247 x86_pmu.bts_active = 1;
248
249 if (x86_pmu.pebs && !pebs_err)
250 x86_pmu.pebs_active = 1;
251
165 for_each_online_cpu(cpu) 252 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu); 253 init_debug_store_on_cpu(cpu);
167 } 254 }
168 255
169 put_online_cpus(); 256 put_online_cpus();
170
171 return err;
172} 257}
173 258
174/* 259/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
233 if (!event) 318 if (!event)
234 return 0; 319 return 0;
235 320
236 if (!ds) 321 if (!x86_pmu.bts_active)
237 return 0; 322 return 0;
238 323
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 324 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
503 struct pebs_record_core *at, *top; 588 struct pebs_record_core *at, *top;
504 int n; 589 int n;
505 590
506 if (!ds || !x86_pmu.pebs) 591 if (!x86_pmu.pebs_active)
507 return; 592 return;
508 593
509 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 594 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
545 u64 status = 0; 630 u64 status = 0;
546 int bit, n; 631 int bit, n;
547 632
548 if (!ds || !x86_pmu.pebs) 633 if (!x86_pmu.pebs_active)
549 return; 634 return;
550 635
551 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 636 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
630 715
631#else /* CONFIG_CPU_SUP_INTEL */ 716#else /* CONFIG_CPU_SUP_INTEL */
632 717
633static int reserve_ds_buffers(void) 718static void reserve_ds_buffers(void)
634{ 719{
635 return 0;
636} 720}
637 721
638static void release_ds_buffers(void) 722static void release_ds_buffers(void)
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 67414550c3cc..d5cd13945d5a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -61,7 +61,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
61 if (!is_crashed_pfn_valid(pfn)) 61 if (!is_crashed_pfn_valid(pfn))
62 return -EFAULT; 62 return -EFAULT;
63 63
64 vaddr = kmap_atomic_pfn(pfn, KM_PTE0); 64 vaddr = kmap_atomic_pfn(pfn);
65 65
66 if (!userbuf) { 66 if (!userbuf) {
67 memcpy(buf, (vaddr + offset), csize); 67 memcpy(buf, (vaddr + offset), csize);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0f6376ffa2d9..1bc7f75a5bda 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,11 +82,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
82 if (kstack_end(stack)) 82 if (kstack_end(stack))
83 break; 83 break;
84 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 84 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
85 printk("\n%s", log_lvl); 85 printk(KERN_CONT "\n");
86 printk(" %08lx", *stack++); 86 printk(KERN_CONT " %08lx", *stack++);
87 touch_nmi_watchdog(); 87 touch_nmi_watchdog();
88 } 88 }
89 printk("\n"); 89 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 90 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
91} 91}
92 92
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 57a21f11c791..6a340485249a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -265,20 +265,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
265 if (stack >= irq_stack && stack <= irq_stack_end) { 265 if (stack >= irq_stack && stack <= irq_stack_end) {
266 if (stack == irq_stack_end) { 266 if (stack == irq_stack_end) {
267 stack = (unsigned long *) (irq_stack_end[-1]); 267 stack = (unsigned long *) (irq_stack_end[-1]);
268 printk(" <EOI> "); 268 printk(KERN_CONT " <EOI> ");
269 } 269 }
270 } else { 270 } else {
271 if (((long) stack & (THREAD_SIZE-1)) == 0) 271 if (((long) stack & (THREAD_SIZE-1)) == 0)
272 break; 272 break;
273 } 273 }
274 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 274 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
275 printk("\n%s", log_lvl); 275 printk(KERN_CONT "\n");
276 printk(" %016lx", *stack++); 276 printk(KERN_CONT " %016lx", *stack++);
277 touch_nmi_watchdog(); 277 touch_nmi_watchdog();
278 } 278 }
279 preempt_enable(); 279 preempt_enable();
280 280
281 printk("\n"); 281 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 282 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
283} 283}
284 284
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
deleted file mode 100644
index 0fe27d7c6258..000000000000
--- a/arch/x86/kernel/efi.c
+++ /dev/null
@@ -1,613 +0,0 @@
1/*
2 * Common EFI (Extensible Firmware Interface) support functions
3 * Based on Extensible Firmware Interface Specification version 1.0
4 *
5 * Copyright (C) 1999 VA Linux Systems
6 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
7 * Copyright (C) 1999-2002 Hewlett-Packard Co.
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 * Stephane Eranian <eranian@hpl.hp.com>
10 * Copyright (C) 2005-2008 Intel Co.
11 * Fenghua Yu <fenghua.yu@intel.com>
12 * Bibo Mao <bibo.mao@intel.com>
13 * Chandramouli Narayanan <mouli@linux.intel.com>
14 * Huang Ying <ying.huang@intel.com>
15 *
16 * Copied from efi_32.c to eliminate the duplicated code between EFI
17 * 32/64 support code. --ying 2007-10-26
18 *
19 * All EFI Runtime Services are not implemented yet as EFI only
20 * supports physical mode addressing on SoftSDV. This is to be fixed
21 * in a future version. --drummond 1999-07-20
22 *
23 * Implemented EFI runtime services and virtual mode calls. --davidm
24 *
25 * Goutham Rao: <goutham.rao@intel.com>
26 * Skip non-WB memory and ignore empty memory ranges.
27 */
28
29#include <linux/kernel.h>
30#include <linux/init.h>
31#include <linux/efi.h>
32#include <linux/bootmem.h>
33#include <linux/memblock.h>
34#include <linux/spinlock.h>
35#include <linux/uaccess.h>
36#include <linux/time.h>
37#include <linux/io.h>
38#include <linux/reboot.h>
39#include <linux/bcd.h>
40
41#include <asm/setup.h>
42#include <asm/efi.h>
43#include <asm/time.h>
44#include <asm/cacheflush.h>
45#include <asm/tlbflush.h>
46#include <asm/x86_init.h>
47
48#define EFI_DEBUG 1
49#define PFX "EFI: "
50
51int efi_enabled;
52EXPORT_SYMBOL(efi_enabled);
53
54struct efi efi;
55EXPORT_SYMBOL(efi);
56
57struct efi_memory_map memmap;
58
59static struct efi efi_phys __initdata;
60static efi_system_table_t efi_systab __initdata;
61
62static int __init setup_noefi(char *arg)
63{
64 efi_enabled = 0;
65 return 0;
66}
67early_param("noefi", setup_noefi);
68
69int add_efi_memmap;
70EXPORT_SYMBOL(add_efi_memmap);
71
72static int __init setup_add_efi_memmap(char *arg)
73{
74 add_efi_memmap = 1;
75 return 0;
76}
77early_param("add_efi_memmap", setup_add_efi_memmap);
78
79
80static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
81{
82 return efi_call_virt2(get_time, tm, tc);
83}
84
85static efi_status_t virt_efi_set_time(efi_time_t *tm)
86{
87 return efi_call_virt1(set_time, tm);
88}
89
90static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
91 efi_bool_t *pending,
92 efi_time_t *tm)
93{
94 return efi_call_virt3(get_wakeup_time,
95 enabled, pending, tm);
96}
97
98static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
99{
100 return efi_call_virt2(set_wakeup_time,
101 enabled, tm);
102}
103
104static efi_status_t virt_efi_get_variable(efi_char16_t *name,
105 efi_guid_t *vendor,
106 u32 *attr,
107 unsigned long *data_size,
108 void *data)
109{
110 return efi_call_virt5(get_variable,
111 name, vendor, attr,
112 data_size, data);
113}
114
115static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
116 efi_char16_t *name,
117 efi_guid_t *vendor)
118{
119 return efi_call_virt3(get_next_variable,
120 name_size, name, vendor);
121}
122
123static efi_status_t virt_efi_set_variable(efi_char16_t *name,
124 efi_guid_t *vendor,
125 unsigned long attr,
126 unsigned long data_size,
127 void *data)
128{
129 return efi_call_virt5(set_variable,
130 name, vendor, attr,
131 data_size, data);
132}
133
134static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
135{
136 return efi_call_virt1(get_next_high_mono_count, count);
137}
138
139static void virt_efi_reset_system(int reset_type,
140 efi_status_t status,
141 unsigned long data_size,
142 efi_char16_t *data)
143{
144 efi_call_virt4(reset_system, reset_type, status,
145 data_size, data);
146}
147
148static efi_status_t virt_efi_set_virtual_address_map(
149 unsigned long memory_map_size,
150 unsigned long descriptor_size,
151 u32 descriptor_version,
152 efi_memory_desc_t *virtual_map)
153{
154 return efi_call_virt4(set_virtual_address_map,
155 memory_map_size, descriptor_size,
156 descriptor_version, virtual_map);
157}
158
159static efi_status_t __init phys_efi_set_virtual_address_map(
160 unsigned long memory_map_size,
161 unsigned long descriptor_size,
162 u32 descriptor_version,
163 efi_memory_desc_t *virtual_map)
164{
165 efi_status_t status;
166
167 efi_call_phys_prelog();
168 status = efi_call_phys4(efi_phys.set_virtual_address_map,
169 memory_map_size, descriptor_size,
170 descriptor_version, virtual_map);
171 efi_call_phys_epilog();
172 return status;
173}
174
175static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
176 efi_time_cap_t *tc)
177{
178 efi_status_t status;
179
180 efi_call_phys_prelog();
181 status = efi_call_phys2(efi_phys.get_time, tm, tc);
182 efi_call_phys_epilog();
183 return status;
184}
185
186int efi_set_rtc_mmss(unsigned long nowtime)
187{
188 int real_seconds, real_minutes;
189 efi_status_t status;
190 efi_time_t eft;
191 efi_time_cap_t cap;
192
193 status = efi.get_time(&eft, &cap);
194 if (status != EFI_SUCCESS) {
195 printk(KERN_ERR "Oops: efitime: can't read time!\n");
196 return -1;
197 }
198
199 real_seconds = nowtime % 60;
200 real_minutes = nowtime / 60;
201 if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
202 real_minutes += 30;
203 real_minutes %= 60;
204 eft.minute = real_minutes;
205 eft.second = real_seconds;
206
207 status = efi.set_time(&eft);
208 if (status != EFI_SUCCESS) {
209 printk(KERN_ERR "Oops: efitime: can't write time!\n");
210 return -1;
211 }
212 return 0;
213}
214
215unsigned long efi_get_time(void)
216{
217 efi_status_t status;
218 efi_time_t eft;
219 efi_time_cap_t cap;
220
221 status = efi.get_time(&eft, &cap);
222 if (status != EFI_SUCCESS)
223 printk(KERN_ERR "Oops: efitime: can't read time!\n");
224
225 return mktime(eft.year, eft.month, eft.day, eft.hour,
226 eft.minute, eft.second);
227}
228
229/*
230 * Tell the kernel about the EFI memory map. This might include
231 * more than the max 128 entries that can fit in the e820 legacy
232 * (zeropage) memory map.
233 */
234
235static void __init do_add_efi_memmap(void)
236{
237 void *p;
238
239 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
240 efi_memory_desc_t *md = p;
241 unsigned long long start = md->phys_addr;
242 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
243 int e820_type;
244
245 switch (md->type) {
246 case EFI_LOADER_CODE:
247 case EFI_LOADER_DATA:
248 case EFI_BOOT_SERVICES_CODE:
249 case EFI_BOOT_SERVICES_DATA:
250 case EFI_CONVENTIONAL_MEMORY:
251 if (md->attribute & EFI_MEMORY_WB)
252 e820_type = E820_RAM;
253 else
254 e820_type = E820_RESERVED;
255 break;
256 case EFI_ACPI_RECLAIM_MEMORY:
257 e820_type = E820_ACPI;
258 break;
259 case EFI_ACPI_MEMORY_NVS:
260 e820_type = E820_NVS;
261 break;
262 case EFI_UNUSABLE_MEMORY:
263 e820_type = E820_UNUSABLE;
264 break;
265 default:
266 /*
267 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
268 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
269 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
270 */
271 e820_type = E820_RESERVED;
272 break;
273 }
274 e820_add_region(start, size, e820_type);
275 }
276 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
277}
278
279void __init efi_memblock_x86_reserve_range(void)
280{
281 unsigned long pmap;
282
283#ifdef CONFIG_X86_32
284 pmap = boot_params.efi_info.efi_memmap;
285#else
286 pmap = (boot_params.efi_info.efi_memmap |
287 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
288#endif
289 memmap.phys_map = (void *)pmap;
290 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
291 boot_params.efi_info.efi_memdesc_size;
292 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
293 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
294 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
295 "EFI memmap");
296}
297
298#if EFI_DEBUG
299static void __init print_efi_memmap(void)
300{
301 efi_memory_desc_t *md;
302 void *p;
303 int i;
304
305 for (p = memmap.map, i = 0;
306 p < memmap.map_end;
307 p += memmap.desc_size, i++) {
308 md = p;
309 printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
310 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
311 i, md->type, md->attribute, md->phys_addr,
312 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
313 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
314 }
315}
316#endif /* EFI_DEBUG */
317
318void __init efi_init(void)
319{
320 efi_config_table_t *config_tables;
321 efi_runtime_services_t *runtime;
322 efi_char16_t *c16;
323 char vendor[100] = "unknown";
324 int i = 0;
325 void *tmp;
326
327#ifdef CONFIG_X86_32
328 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
329#else
330 efi_phys.systab = (efi_system_table_t *)
331 (boot_params.efi_info.efi_systab |
332 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
333#endif
334
335 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
336 sizeof(efi_system_table_t));
337 if (efi.systab == NULL)
338 printk(KERN_ERR "Couldn't map the EFI system table!\n");
339 memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
340 early_iounmap(efi.systab, sizeof(efi_system_table_t));
341 efi.systab = &efi_systab;
342
343 /*
344 * Verify the EFI Table
345 */
346 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
347 printk(KERN_ERR "EFI system table signature incorrect!\n");
348 if ((efi.systab->hdr.revision >> 16) == 0)
349 printk(KERN_ERR "Warning: EFI system table version "
350 "%d.%02d, expected 1.00 or greater!\n",
351 efi.systab->hdr.revision >> 16,
352 efi.systab->hdr.revision & 0xffff);
353
354 /*
355 * Show what we know for posterity
356 */
357 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
358 if (c16) {
359 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
360 vendor[i] = *c16++;
361 vendor[i] = '\0';
362 } else
363 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
364 early_iounmap(tmp, 2);
365
366 printk(KERN_INFO "EFI v%u.%.02u by %s\n",
367 efi.systab->hdr.revision >> 16,
368 efi.systab->hdr.revision & 0xffff, vendor);
369
370 /*
371 * Let's see what config tables the firmware passed to us.
372 */
373 config_tables = early_ioremap(
374 efi.systab->tables,
375 efi.systab->nr_tables * sizeof(efi_config_table_t));
376 if (config_tables == NULL)
377 printk(KERN_ERR "Could not map EFI Configuration Table!\n");
378
379 printk(KERN_INFO);
380 for (i = 0; i < efi.systab->nr_tables; i++) {
381 if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
382 efi.mps = config_tables[i].table;
383 printk(" MPS=0x%lx ", config_tables[i].table);
384 } else if (!efi_guidcmp(config_tables[i].guid,
385 ACPI_20_TABLE_GUID)) {
386 efi.acpi20 = config_tables[i].table;
387 printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
388 } else if (!efi_guidcmp(config_tables[i].guid,
389 ACPI_TABLE_GUID)) {
390 efi.acpi = config_tables[i].table;
391 printk(" ACPI=0x%lx ", config_tables[i].table);
392 } else if (!efi_guidcmp(config_tables[i].guid,
393 SMBIOS_TABLE_GUID)) {
394 efi.smbios = config_tables[i].table;
395 printk(" SMBIOS=0x%lx ", config_tables[i].table);
396#ifdef CONFIG_X86_UV
397 } else if (!efi_guidcmp(config_tables[i].guid,
398 UV_SYSTEM_TABLE_GUID)) {
399 efi.uv_systab = config_tables[i].table;
400 printk(" UVsystab=0x%lx ", config_tables[i].table);
401#endif
402 } else if (!efi_guidcmp(config_tables[i].guid,
403 HCDP_TABLE_GUID)) {
404 efi.hcdp = config_tables[i].table;
405 printk(" HCDP=0x%lx ", config_tables[i].table);
406 } else if (!efi_guidcmp(config_tables[i].guid,
407 UGA_IO_PROTOCOL_GUID)) {
408 efi.uga = config_tables[i].table;
409 printk(" UGA=0x%lx ", config_tables[i].table);
410 }
411 }
412 printk("\n");
413 early_iounmap(config_tables,
414 efi.systab->nr_tables * sizeof(efi_config_table_t));
415
416 /*
417 * Check out the runtime services table. We need to map
418 * the runtime services table so that we can grab the physical
419 * address of several of the EFI runtime functions, needed to
420 * set the firmware into virtual mode.
421 */
422 runtime = early_ioremap((unsigned long)efi.systab->runtime,
423 sizeof(efi_runtime_services_t));
424 if (runtime != NULL) {
425 /*
426 * We will only need *early* access to the following
427 * two EFI runtime services before set_virtual_address_map
428 * is invoked.
429 */
430 efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
431 efi_phys.set_virtual_address_map =
432 (efi_set_virtual_address_map_t *)
433 runtime->set_virtual_address_map;
434 /*
435 * Make efi_get_time can be called before entering
436 * virtual mode.
437 */
438 efi.get_time = phys_efi_get_time;
439 } else
440 printk(KERN_ERR "Could not map the EFI runtime service "
441 "table!\n");
442 early_iounmap(runtime, sizeof(efi_runtime_services_t));
443
444 /* Map the EFI memory map */
445 memmap.map = early_ioremap((unsigned long)memmap.phys_map,
446 memmap.nr_map * memmap.desc_size);
447 if (memmap.map == NULL)
448 printk(KERN_ERR "Could not map the EFI memory map!\n");
449 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
450
451 if (memmap.desc_size != sizeof(efi_memory_desc_t))
452 printk(KERN_WARNING
453 "Kernel-defined memdesc doesn't match the one from EFI!\n");
454
455 if (add_efi_memmap)
456 do_add_efi_memmap();
457
458#ifdef CONFIG_X86_32
459 x86_platform.get_wallclock = efi_get_time;
460 x86_platform.set_wallclock = efi_set_rtc_mmss;
461#endif
462
463 /* Setup for EFI runtime service */
464 reboot_type = BOOT_EFI;
465
466#if EFI_DEBUG
467 print_efi_memmap();
468#endif
469}
470
471static void __init runtime_code_page_mkexec(void)
472{
473 efi_memory_desc_t *md;
474 void *p;
475 u64 addr, npages;
476
477 /* Make EFI runtime service code area executable */
478 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
479 md = p;
480
481 if (md->type != EFI_RUNTIME_SERVICES_CODE)
482 continue;
483
484 addr = md->virt_addr;
485 npages = md->num_pages;
486 memrange_efi_to_native(&addr, &npages);
487 set_memory_x(addr, npages);
488 }
489}
490
491/*
492 * This function will switch the EFI runtime services to virtual mode.
493 * Essentially, look through the EFI memmap and map every region that
494 * has the runtime attribute bit set in its memory descriptor and update
495 * that memory descriptor with the virtual address obtained from ioremap().
496 * This enables the runtime services to be called without having to
497 * thunk back into physical mode for every invocation.
498 */
499void __init efi_enter_virtual_mode(void)
500{
501 efi_memory_desc_t *md;
502 efi_status_t status;
503 unsigned long size;
504 u64 end, systab, addr, npages, end_pfn;
505 void *p, *va;
506
507 efi.systab = NULL;
508 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
509 md = p;
510 if (!(md->attribute & EFI_MEMORY_RUNTIME))
511 continue;
512
513 size = md->num_pages << EFI_PAGE_SHIFT;
514 end = md->phys_addr + size;
515
516 end_pfn = PFN_UP(end);
517 if (end_pfn <= max_low_pfn_mapped
518 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
519 && end_pfn <= max_pfn_mapped))
520 va = __va(md->phys_addr);
521 else
522 va = efi_ioremap(md->phys_addr, size, md->type);
523
524 md->virt_addr = (u64) (unsigned long) va;
525
526 if (!va) {
527 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
528 (unsigned long long)md->phys_addr);
529 continue;
530 }
531
532 if (!(md->attribute & EFI_MEMORY_WB)) {
533 addr = md->virt_addr;
534 npages = md->num_pages;
535 memrange_efi_to_native(&addr, &npages);
536 set_memory_uc(addr, npages);
537 }
538
539 systab = (u64) (unsigned long) efi_phys.systab;
540 if (md->phys_addr <= systab && systab < end) {
541 systab += md->virt_addr - md->phys_addr;
542 efi.systab = (efi_system_table_t *) (unsigned long) systab;
543 }
544 }
545
546 BUG_ON(!efi.systab);
547
548 status = phys_efi_set_virtual_address_map(
549 memmap.desc_size * memmap.nr_map,
550 memmap.desc_size,
551 memmap.desc_version,
552 memmap.phys_map);
553
554 if (status != EFI_SUCCESS) {
555 printk(KERN_ALERT "Unable to switch EFI into virtual mode "
556 "(status=%lx)!\n", status);
557 panic("EFI call to SetVirtualAddressMap() failed!");
558 }
559
560 /*
561 * Now that EFI is in virtual mode, update the function
562 * pointers in the runtime service table to the new virtual addresses.
563 *
564 * Call EFI services through wrapper functions.
565 */
566 efi.get_time = virt_efi_get_time;
567 efi.set_time = virt_efi_set_time;
568 efi.get_wakeup_time = virt_efi_get_wakeup_time;
569 efi.set_wakeup_time = virt_efi_set_wakeup_time;
570 efi.get_variable = virt_efi_get_variable;
571 efi.get_next_variable = virt_efi_get_next_variable;
572 efi.set_variable = virt_efi_set_variable;
573 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
574 efi.reset_system = virt_efi_reset_system;
575 efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
576 if (__supported_pte_mask & _PAGE_NX)
577 runtime_code_page_mkexec();
578 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
579 memmap.map = NULL;
580}
581
582/*
583 * Convenience functions to obtain memory types and attributes
584 */
585u32 efi_mem_type(unsigned long phys_addr)
586{
587 efi_memory_desc_t *md;
588 void *p;
589
590 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
591 md = p;
592 if ((md->phys_addr <= phys_addr) &&
593 (phys_addr < (md->phys_addr +
594 (md->num_pages << EFI_PAGE_SHIFT))))
595 return md->type;
596 }
597 return 0;
598}
599
600u64 efi_mem_attributes(unsigned long phys_addr)
601{
602 efi_memory_desc_t *md;
603 void *p;
604
605 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
606 md = p;
607 if ((md->phys_addr <= phys_addr) &&
608 (phys_addr < (md->phys_addr +
609 (md->num_pages << EFI_PAGE_SHIFT))))
610 return md->attribute;
611 }
612 return 0;
613}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
deleted file mode 100644
index 5cab48ee61a4..000000000000
--- a/arch/x86/kernel/efi_32.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * Extensible Firmware Interface
3 *
4 * Based on Extensible Firmware Interface Specification version 1.0
5 *
6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 * Copyright (C) 1999-2002 Hewlett-Packard Co.
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 *
12 * All EFI Runtime Services are not implemented yet as EFI only
13 * supports physical mode addressing on SoftSDV. This is to be fixed
14 * in a future version. --drummond 1999-07-20
15 *
16 * Implemented EFI runtime services and virtual mode calls. --davidm
17 *
18 * Goutham Rao: <goutham.rao@intel.com>
19 * Skip non-WB memory and ignore empty memory ranges.
20 */
21
22#include <linux/kernel.h>
23#include <linux/types.h>
24#include <linux/ioport.h>
25#include <linux/efi.h>
26
27#include <asm/io.h>
28#include <asm/page.h>
29#include <asm/pgtable.h>
30#include <asm/tlbflush.h>
31#include <asm/efi.h>
32
33/*
34 * To make EFI call EFI runtime service in physical addressing mode we need
35 * prelog/epilog before/after the invocation to disable interrupt, to
36 * claim EFI runtime service handler exclusively and to duplicate a memory in
37 * low memory space say 0 - 3G.
38 */
39
40static unsigned long efi_rt_eflags;
41static pgd_t efi_bak_pg_dir_pointer[2];
42
43void efi_call_phys_prelog(void)
44{
45 unsigned long cr4;
46 unsigned long temp;
47 struct desc_ptr gdt_descr;
48
49 local_irq_save(efi_rt_eflags);
50
51 /*
52 * If I don't have PAE, I should just duplicate two entries in page
53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory.
55 */
56 cr4 = read_cr4_safe();
57
58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd =
60 swapper_pg_dir[pgd_index(0)].pgd;
61 swapper_pg_dir[0].pgd =
62 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
63 } else {
64 efi_bak_pg_dir_pointer[0].pgd =
65 swapper_pg_dir[pgd_index(0)].pgd;
66 efi_bak_pg_dir_pointer[1].pgd =
67 swapper_pg_dir[pgd_index(0x400000)].pgd;
68 swapper_pg_dir[pgd_index(0)].pgd =
69 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
70 temp = PAGE_OFFSET + 0x400000;
71 swapper_pg_dir[pgd_index(0x400000)].pgd =
72 swapper_pg_dir[pgd_index(temp)].pgd;
73 }
74
75 /*
76 * After the lock is released, the original page table is restored.
77 */
78 __flush_tlb_all();
79
80 gdt_descr.address = __pa(get_cpu_gdt_table(0));
81 gdt_descr.size = GDT_SIZE - 1;
82 load_gdt(&gdt_descr);
83}
84
85void efi_call_phys_epilog(void)
86{
87 unsigned long cr4;
88 struct desc_ptr gdt_descr;
89
90 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
91 gdt_descr.size = GDT_SIZE - 1;
92 load_gdt(&gdt_descr);
93
94 cr4 = read_cr4_safe();
95
96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd =
98 efi_bak_pg_dir_pointer[0].pgd;
99 } else {
100 swapper_pg_dir[pgd_index(0)].pgd =
101 efi_bak_pg_dir_pointer[0].pgd;
102 swapper_pg_dir[pgd_index(0x400000)].pgd =
103 efi_bak_pg_dir_pointer[1].pgd;
104 }
105
106 /*
107 * After the lock is released, the original page table is restored.
108 */
109 __flush_tlb_all();
110
111 local_irq_restore(efi_rt_eflags);
112}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
deleted file mode 100644
index ac0621a7ac3d..000000000000
--- a/arch/x86/kernel/efi_64.c
+++ /dev/null
@@ -1,114 +0,0 @@
1/*
2 * x86_64 specific EFI support functions
3 * Based on Extensible Firmware Interface Specification version 1.0
4 *
5 * Copyright (C) 2005-2008 Intel Co.
6 * Fenghua Yu <fenghua.yu@intel.com>
7 * Bibo Mao <bibo.mao@intel.com>
8 * Chandramouli Narayanan <mouli@linux.intel.com>
9 * Huang Ying <ying.huang@intel.com>
10 *
11 * Code to convert EFI to E820 map has been implemented in elilo bootloader
12 * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
13 * is setup appropriately for EFI runtime code.
14 * - mouli 06/14/2007.
15 *
16 */
17
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/mm.h>
21#include <linux/types.h>
22#include <linux/spinlock.h>
23#include <linux/bootmem.h>
24#include <linux/ioport.h>
25#include <linux/module.h>
26#include <linux/efi.h>
27#include <linux/uaccess.h>
28#include <linux/io.h>
29#include <linux/reboot.h>
30
31#include <asm/setup.h>
32#include <asm/page.h>
33#include <asm/e820.h>
34#include <asm/pgtable.h>
35#include <asm/tlbflush.h>
36#include <asm/proto.h>
37#include <asm/efi.h>
38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
40
41static pgd_t save_pgd __initdata;
42static unsigned long efi_flags __initdata;
43
44static void __init early_mapping_set_exec(unsigned long start,
45 unsigned long end,
46 int executable)
47{
48 unsigned long num_pages;
49
50 start &= PMD_MASK;
51 end = (end + PMD_SIZE - 1) & PMD_MASK;
52 num_pages = (end - start) >> PAGE_SHIFT;
53 if (executable)
54 set_memory_x((unsigned long)__va(start), num_pages);
55 else
56 set_memory_nx((unsigned long)__va(start), num_pages);
57}
58
59static void __init early_runtime_code_mapping_set_exec(int executable)
60{
61 efi_memory_desc_t *md;
62 void *p;
63
64 if (!(__supported_pte_mask & _PAGE_NX))
65 return;
66
67 /* Make EFI runtime service code area executable */
68 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
69 md = p;
70 if (md->type == EFI_RUNTIME_SERVICES_CODE) {
71 unsigned long end;
72 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
73 early_mapping_set_exec(md->phys_addr, end, executable);
74 }
75 }
76}
77
78void __init efi_call_phys_prelog(void)
79{
80 unsigned long vaddress;
81
82 early_runtime_code_mapping_set_exec(1);
83 local_irq_save(efi_flags);
84 vaddress = (unsigned long)__va(0x0UL);
85 save_pgd = *pgd_offset_k(0x0UL);
86 set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
87 __flush_tlb_all();
88}
89
90void __init efi_call_phys_epilog(void)
91{
92 /*
93 * After the lock is released, the original page table is restored.
94 */
95 set_pgd(pgd_offset_k(0x0UL), save_pgd);
96 __flush_tlb_all();
97 local_irq_restore(efi_flags);
98 early_runtime_code_mapping_set_exec(0);
99}
100
101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
102 u32 type)
103{
104 unsigned long last_map_pfn;
105
106 if (type == EFI_MEMORY_MAPPED_IO)
107 return ioremap(phys_addr, size);
108
109 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
110 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
111 return NULL;
112
113 return (void __iomem *)__va(phys_addr);
114}
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
deleted file mode 100644
index fbe66e626c09..000000000000
--- a/arch/x86/kernel/efi_stub_32.S
+++ /dev/null
@@ -1,123 +0,0 @@
1/*
2 * EFI call stub for IA32.
3 *
4 * This stub allows us to make EFI calls in physical mode with interrupts
5 * turned off.
6 */
7
8#include <linux/linkage.h>
9#include <asm/page_types.h>
10
11/*
12 * efi_call_phys(void *, ...) is a function with variable parameters.
13 * All the callers of this function assure that all the parameters are 4-bytes.
14 */
15
16/*
17 * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
18 * So we'd better save all of them at the beginning of this function and restore
19 * at the end no matter how many we use, because we can not assure EFI runtime
20 * service functions will comply with gcc calling convention, too.
21 */
22
23.text
24ENTRY(efi_call_phys)
25 /*
26 * 0. The function can only be called in Linux kernel. So CS has been
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog.
31 */
32
33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and
37 * epilog.
38 */
39 movl $1f, %edx
40 subl $__PAGE_OFFSET, %edx
41 jmp *%edx
421:
43
44 /*
45 * 2. Now on the top of stack is the return
46 * address in the caller of efi_call_phys(), then parameter 1,
47 * parameter 2, ..., param n. To make things easy, we save the return
48 * address of efi_call_phys in a global variable.
49 */
50 popl %edx
51 movl %edx, saved_return_addr
52 /* get the function pointer into ECX*/
53 popl %ecx
54 movl %ecx, efi_rt_function_ptr
55 movl $2f, %edx
56 subl $__PAGE_OFFSET, %edx
57 pushl %edx
58
59 /*
60 * 3. Clear PG bit in %CR0.
61 */
62 movl %cr0, %edx
63 andl $0x7fffffff, %edx
64 movl %edx, %cr0
65 jmp 1f
661:
67
68 /*
69 * 4. Adjust stack pointer.
70 */
71 subl $__PAGE_OFFSET, %esp
72
73 /*
74 * 5. Call the physical function.
75 */
76 jmp *%ecx
77
782:
79 /*
80 * 6. After EFI runtime service returns, control will return to
81 * following instruction. We'd better readjust stack pointer first.
82 */
83 addl $__PAGE_OFFSET, %esp
84
85 /*
86 * 7. Restore PG bit
87 */
88 movl %cr0, %edx
89 orl $0x80000000, %edx
90 movl %edx, %cr0
91 jmp 1f
921:
93 /*
94 * 8. Now restore the virtual mode from flat mode by
95 * adding EIP with PAGE_OFFSET.
96 */
97 movl $1f, %edx
98 jmp *%edx
991:
100
101 /*
102 * 9. Balance the stack. And because EAX contain the return value,
103 * we'd better not clobber it.
104 */
105 leal efi_rt_function_ptr, %edx
106 movl (%edx), %ecx
107 pushl %ecx
108
109 /*
110 * 10. Push the saved return address onto the stack and return.
111 */
112 leal saved_return_addr, %edx
113 movl (%edx), %ecx
114 pushl %ecx
115 ret
116ENDPROC(efi_call_phys)
117.previous
118
119.data
120saved_return_addr:
121 .long 0
122efi_rt_function_ptr:
123 .long 0
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
deleted file mode 100644
index 4c07ccab8146..000000000000
--- a/arch/x86/kernel/efi_stub_64.S
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Function calling ABI conversion from Linux to EFI for x86_64
3 *
4 * Copyright (C) 2007 Intel Corp
5 * Bibo Mao <bibo.mao@intel.com>
6 * Huang Ying <ying.huang@intel.com>
7 */
8
9#include <linux/linkage.h>
10
11#define SAVE_XMM \
12 mov %rsp, %rax; \
13 subq $0x70, %rsp; \
14 and $~0xf, %rsp; \
15 mov %rax, (%rsp); \
16 mov %cr0, %rax; \
17 clts; \
18 mov %rax, 0x8(%rsp); \
19 movaps %xmm0, 0x60(%rsp); \
20 movaps %xmm1, 0x50(%rsp); \
21 movaps %xmm2, 0x40(%rsp); \
22 movaps %xmm3, 0x30(%rsp); \
23 movaps %xmm4, 0x20(%rsp); \
24 movaps %xmm5, 0x10(%rsp)
25
26#define RESTORE_XMM \
27 movaps 0x60(%rsp), %xmm0; \
28 movaps 0x50(%rsp), %xmm1; \
29 movaps 0x40(%rsp), %xmm2; \
30 movaps 0x30(%rsp), %xmm3; \
31 movaps 0x20(%rsp), %xmm4; \
32 movaps 0x10(%rsp), %xmm5; \
33 mov 0x8(%rsp), %rsi; \
34 mov %rsi, %cr0; \
35 mov (%rsp), %rsp
36
37ENTRY(efi_call0)
38 SAVE_XMM
39 subq $32, %rsp
40 call *%rdi
41 addq $32, %rsp
42 RESTORE_XMM
43 ret
44ENDPROC(efi_call0)
45
46ENTRY(efi_call1)
47 SAVE_XMM
48 subq $32, %rsp
49 mov %rsi, %rcx
50 call *%rdi
51 addq $32, %rsp
52 RESTORE_XMM
53 ret
54ENDPROC(efi_call1)
55
56ENTRY(efi_call2)
57 SAVE_XMM
58 subq $32, %rsp
59 mov %rsi, %rcx
60 call *%rdi
61 addq $32, %rsp
62 RESTORE_XMM
63 ret
64ENDPROC(efi_call2)
65
66ENTRY(efi_call3)
67 SAVE_XMM
68 subq $32, %rsp
69 mov %rcx, %r8
70 mov %rsi, %rcx
71 call *%rdi
72 addq $32, %rsp
73 RESTORE_XMM
74 ret
75ENDPROC(efi_call3)
76
77ENTRY(efi_call4)
78 SAVE_XMM
79 subq $32, %rsp
80 mov %r8, %r9
81 mov %rcx, %r8
82 mov %rsi, %rcx
83 call *%rdi
84 addq $32, %rsp
85 RESTORE_XMM
86 ret
87ENDPROC(efi_call4)
88
89ENTRY(efi_call5)
90 SAVE_XMM
91 subq $48, %rsp
92 mov %r9, 32(%rsp)
93 mov %r8, %r9
94 mov %rcx, %r8
95 mov %rsi, %rcx
96 call *%rdi
97 addq $48, %rsp
98 RESTORE_XMM
99 ret
100ENDPROC(efi_call5)
101
102ENTRY(efi_call6)
103 SAVE_XMM
104 mov (%rsp), %rax
105 mov 8(%rax), %rax
106 subq $48, %rsp
107 mov %r9, 32(%rsp)
108 mov %rax, 40(%rsp)
109 mov %r8, %r9
110 mov %rcx, %r8
111 mov %rsi, %rcx
112 call *%rdi
113 addq $48, %rsp
114 RESTORE_XMM
115 ret
116ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index aff0b3c27509..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -713,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
713 713
714 switch (action & 0xf) { 714 switch (action & 0xf) {
715 case CPU_ONLINE: 715 case CPU_ONLINE:
716 INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); 716 INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
717 init_completion(&work.complete); 717 init_completion(&work.complete);
718 /* FIXME: add schedule_work_on() */ 718 /* FIXME: add schedule_work_on() */
719 schedule_delayed_work_on(cpu, &work.work, 0); 719 schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 50fbbe60e507..96656f207751 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -17,6 +17,7 @@
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/mm.h>
20 21
21#include <asm/apic.h> 22#include <asm/apic.h>
22 23
@@ -60,9 +61,6 @@ union irq_ctx {
60static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); 61static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
61static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); 62static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
62 63
63static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE);
64static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE);
65
66static void call_on_stack(void *func, void *stack) 64static void call_on_stack(void *func, void *stack)
67{ 65{
68 asm volatile("xchgl %%ebx,%%esp \n" 66 asm volatile("xchgl %%ebx,%%esp \n"
@@ -128,7 +126,9 @@ void __cpuinit irq_ctx_init(int cpu)
128 if (per_cpu(hardirq_ctx, cpu)) 126 if (per_cpu(hardirq_ctx, cpu))
129 return; 127 return;
130 128
131 irqctx = &per_cpu(hardirq_stack, cpu); 129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
130 THREAD_FLAGS,
131 THREAD_ORDER));
132 irqctx->tinfo.task = NULL; 132 irqctx->tinfo.task = NULL;
133 irqctx->tinfo.exec_domain = NULL; 133 irqctx->tinfo.exec_domain = NULL;
134 irqctx->tinfo.cpu = cpu; 134 irqctx->tinfo.cpu = cpu;
@@ -137,7 +137,9 @@ void __cpuinit irq_ctx_init(int cpu)
137 137
138 per_cpu(hardirq_ctx, cpu) = irqctx; 138 per_cpu(hardirq_ctx, cpu) = irqctx;
139 139
140 irqctx = &per_cpu(softirq_stack, cpu); 140 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
141 THREAD_FLAGS,
142 THREAD_ORDER));
141 irqctx->tinfo.task = NULL; 143 irqctx->tinfo.task = NULL;
142 irqctx->tinfo.exec_domain = NULL; 144 irqctx->tinfo.exec_domain = NULL;
143 irqctx->tinfo.cpu = cpu; 145 irqctx->tinfo.cpu = cpu;
@@ -150,11 +152,6 @@ void __cpuinit irq_ctx_init(int cpu)
150 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); 152 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
151} 153}
152 154
153void irq_ctx_exit(int cpu)
154{
155 per_cpu(hardirq_ctx, cpu) = NULL;
156}
157
158asmlinkage void do_softirq(void) 155asmlinkage void do_softirq(void)
159{ 156{
160 unsigned long flags; 157 unsigned long flags;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index d81cfebb848f..ec592caac4b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
387 * disable hardware debugging while it is processing gdb packets or 387 * disable hardware debugging while it is processing gdb packets or
388 * handling exception. 388 * handling exception.
389 */ 389 */
390void kgdb_disable_hw_debug(struct pt_regs *regs) 390static void kgdb_disable_hw_debug(struct pt_regs *regs)
391{ 391{
392 int i; 392 int i;
393 int cpu = raw_smp_processor_id(); 393 int cpu = raw_smp_processor_id();
@@ -724,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = {
724 .flags = KGDB_HW_BREAKPOINT, 724 .flags = KGDB_HW_BREAKPOINT,
725 .set_hw_breakpoint = kgdb_set_hw_break, 725 .set_hw_breakpoint = kgdb_set_hw_break,
726 .remove_hw_breakpoint = kgdb_remove_hw_break, 726 .remove_hw_breakpoint = kgdb_remove_hw_break,
727 .disable_hw_break = kgdb_disable_hw_debug,
727 .remove_all_hw_break = kgdb_remove_all_hw_break, 728 .remove_all_hw_break = kgdb_remove_all_hw_break,
728 .correct_hw_break = kgdb_correct_hw_break, 729 .correct_hw_break = kgdb_correct_hw_break,
729}; 730};
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
deleted file mode 100644
index 79ae68154e87..000000000000
--- a/arch/x86/kernel/mrst.c
+++ /dev/null
@@ -1,311 +0,0 @@
1/*
2 * mrst.c: Intel Moorestown platform specific setup code
3 *
4 * (C) Copyright 2008 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
17
18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
30 * cmdline option x86_mrst_timer can be used to override the configuration
31 * to prefer one or the other.
32 * at runtime, there are basically three timer configurations:
33 * 1. per cpu apbt clock only
34 * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
35 * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
36 *
37 * by default (without cmdline option), platform code first detects cpu type
38 * to see if we are on lincroft or penwell, then set up both lapic or apbt
39 * clocks accordingly.
40 * i.e. by default, medfield uses configuration #2, moorestown uses #1.
41 * config #3 is supported but not recommended on medfield.
42 *
43 * rating and feature summary:
44 * lapic (with C3STOP) --------- 100
45 * apbt (always-on) ------------ 110
46 * lapic (always-on,ARAT) ------ 150
47 */
48
49__cpuinitdata enum mrst_timer_options mrst_timer_options;
50
51static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
52static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
53enum mrst_cpu_type __mrst_cpu_chip;
54EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
55
56int sfi_mtimer_num;
57
58struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
59EXPORT_SYMBOL_GPL(sfi_mrtc_array);
60int sfi_mrtc_num;
61
62static inline void assign_to_mp_irq(struct mpc_intsrc *m,
63 struct mpc_intsrc *mp_irq)
64{
65 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
66}
67
68static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
69 struct mpc_intsrc *m)
70{
71 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
72}
73
74static void save_mp_irq(struct mpc_intsrc *m)
75{
76 int i;
77
78 for (i = 0; i < mp_irq_entries; i++) {
79 if (!mp_irq_cmp(&mp_irqs[i], m))
80 return;
81 }
82
83 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
84 if (++mp_irq_entries == MAX_IRQ_SOURCES)
85 panic("Max # of irq sources exceeded!!\n");
86}
87
88/* parse all the mtimer info to a static mtimer array */
89static int __init sfi_parse_mtmr(struct sfi_table_header *table)
90{
91 struct sfi_table_simple *sb;
92 struct sfi_timer_table_entry *pentry;
93 struct mpc_intsrc mp_irq;
94 int totallen;
95
96 sb = (struct sfi_table_simple *)table;
97 if (!sfi_mtimer_num) {
98 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
99 struct sfi_timer_table_entry);
100 pentry = (struct sfi_timer_table_entry *) sb->pentry;
101 totallen = sfi_mtimer_num * sizeof(*pentry);
102 memcpy(sfi_mtimer_array, pentry, totallen);
103 }
104
105 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
106 pentry = sfi_mtimer_array;
107 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
108 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
109 " irq = %d\n", totallen, (u32)pentry->phys_addr,
110 pentry->freq_hz, pentry->irq);
111 if (!pentry->irq)
112 continue;
113 mp_irq.type = MP_IOAPIC;
114 mp_irq.irqtype = mp_INT;
115/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
116 mp_irq.irqflag = 5;
117 mp_irq.srcbus = 0;
118 mp_irq.srcbusirq = pentry->irq; /* IRQ */
119 mp_irq.dstapic = MP_APIC_ALL;
120 mp_irq.dstirq = pentry->irq;
121 save_mp_irq(&mp_irq);
122 }
123
124 return 0;
125}
126
127struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
128{
129 int i;
130 if (hint < sfi_mtimer_num) {
131 if (!sfi_mtimer_usage[hint]) {
132 pr_debug("hint taken for timer %d irq %d\n",\
133 hint, sfi_mtimer_array[hint].irq);
134 sfi_mtimer_usage[hint] = 1;
135 return &sfi_mtimer_array[hint];
136 }
137 }
138 /* take the first timer available */
139 for (i = 0; i < sfi_mtimer_num;) {
140 if (!sfi_mtimer_usage[i]) {
141 sfi_mtimer_usage[i] = 1;
142 return &sfi_mtimer_array[i];
143 }
144 i++;
145 }
146 return NULL;
147}
148
149void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
150{
151 int i;
152 for (i = 0; i < sfi_mtimer_num;) {
153 if (mtmr->irq == sfi_mtimer_array[i].irq) {
154 sfi_mtimer_usage[i] = 0;
155 return;
156 }
157 i++;
158 }
159}
160
161/* parse all the mrtc info to a global mrtc array */
162int __init sfi_parse_mrtc(struct sfi_table_header *table)
163{
164 struct sfi_table_simple *sb;
165 struct sfi_rtc_table_entry *pentry;
166 struct mpc_intsrc mp_irq;
167
168 int totallen;
169
170 sb = (struct sfi_table_simple *)table;
171 if (!sfi_mrtc_num) {
172 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
173 struct sfi_rtc_table_entry);
174 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
175 totallen = sfi_mrtc_num * sizeof(*pentry);
176 memcpy(sfi_mrtc_array, pentry, totallen);
177 }
178
179 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
180 pentry = sfi_mrtc_array;
181 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
182 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
183 totallen, (u32)pentry->phys_addr, pentry->irq);
184 mp_irq.type = MP_IOAPIC;
185 mp_irq.irqtype = mp_INT;
186 mp_irq.irqflag = 0;
187 mp_irq.srcbus = 0;
188 mp_irq.srcbusirq = pentry->irq; /* IRQ */
189 mp_irq.dstapic = MP_APIC_ALL;
190 mp_irq.dstirq = pentry->irq;
191 save_mp_irq(&mp_irq);
192 }
193 return 0;
194}
195
196static unsigned long __init mrst_calibrate_tsc(void)
197{
198 unsigned long flags, fast_calibrate;
199
200 local_irq_save(flags);
201 fast_calibrate = apbt_quick_calibrate();
202 local_irq_restore(flags);
203
204 if (fast_calibrate)
205 return fast_calibrate;
206
207 return 0;
208}
209
210void __init mrst_time_init(void)
211{
212 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY:
214 break;
215 case MRST_TIMER_LAPIC_APBT:
216 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
217 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
218 break;
219 default:
220 if (!boot_cpu_has(X86_FEATURE_ARAT))
221 break;
222 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
223 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
224 return;
225 }
226 /* we need at least one APB timer */
227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
228 pre_init_apic_IRQ0();
229 apbt_time_init();
230}
231
232void __init mrst_rtc_init(void)
233{
234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
235}
236
237void __cpuinit mrst_arch_setup(void)
238{
239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
240 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
241 else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
242 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
243 else {
244 pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
245 boot_cpu_data.x86, boot_cpu_data.x86_model);
246 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
247 }
248 pr_debug("Moorestown CPU %s identified\n",
249 (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
250 "Lincroft" : "Penwell");
251}
252
253/* MID systems don't have i8042 controller */
254static int mrst_i8042_detect(void)
255{
256 return 0;
257}
258
259/*
260 * Moorestown specific x86_init function overrides and early setup
261 * calls.
262 */
263void __init x86_mrst_early_setup(void)
264{
265 x86_init.resources.probe_roms = x86_init_noop;
266 x86_init.resources.reserve_resources = x86_init_noop;
267
268 x86_init.timers.timer_init = mrst_time_init;
269 x86_init.timers.setup_percpu_clockev = x86_init_noop;
270
271 x86_init.irqs.pre_vector_init = x86_init_noop;
272
273 x86_init.oem.arch_setup = mrst_arch_setup;
274
275 x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
276
277 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
278 x86_platform.i8042_detect = mrst_i8042_detect;
279 x86_init.pci.init = pci_mrst_init;
280 x86_init.pci.fixup_irqs = x86_init_noop;
281
282 legacy_pic = &null_legacy_pic;
283
284 /* Avoid searching for BIOS MP tables */
285 x86_init.mpparse.find_smp_config = x86_init_noop;
286 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
287
288}
289
290/*
291 * if user does not want to use per CPU apb timer, just give it a lower rating
292 * than local apic timer and skip the late per cpu timer init.
293 */
294static inline int __init setup_x86_mrst_timer(char *arg)
295{
296 if (!arg)
297 return -EINVAL;
298
299 if (strcmp("apbt_only", arg) == 0)
300 mrst_timer_options = MRST_TIMER_APBT_ONLY;
301 else if (strcmp("lapic_and_apbt", arg) == 0)
302 mrst_timer_options = MRST_TIMER_LAPIC_APBT;
303 else {
304 pr_warning("X86 MRST timer option %s not recognised"
305 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
306 arg);
307 return -EINVAL;
308 }
309 return 0;
310}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer);
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c
deleted file mode 100644
index f5442c03abc3..000000000000
--- a/arch/x86/kernel/olpc-xo1.c
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2 * Support for features of the OLPC XO-1 laptop
3 *
4 * Copyright (C) 2010 One Laptop per Child
5 * Copyright (C) 2006 Red Hat, Inc.
6 * Copyright (C) 2006 Advanced Micro Devices, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/platform_device.h>
18#include <linux/pm.h>
19
20#include <asm/io.h>
21#include <asm/olpc.h>
22
23#define DRV_NAME "olpc-xo1"
24
25#define PMS_BAR 4
26#define ACPI_BAR 5
27
28/* PMC registers (PMS block) */
29#define PM_SCLK 0x10
30#define PM_IN_SLPCTL 0x20
31#define PM_WKXD 0x34
32#define PM_WKD 0x30
33#define PM_SSC 0x54
34
35/* PM registers (ACPI block) */
36#define PM1_CNT 0x08
37#define PM_GPE0_STS 0x18
38
39static unsigned long acpi_base;
40static unsigned long pms_base;
41
42static void xo1_power_off(void)
43{
44 printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
45
46 /* Enable all of these controls with 0 delay */
47 outl(0x40000000, pms_base + PM_SCLK);
48 outl(0x40000000, pms_base + PM_IN_SLPCTL);
49 outl(0x40000000, pms_base + PM_WKXD);
50 outl(0x40000000, pms_base + PM_WKD);
51
52 /* Clear status bits (possibly unnecessary) */
53 outl(0x0002ffff, pms_base + PM_SSC);
54 outl(0xffffffff, acpi_base + PM_GPE0_STS);
55
56 /* Write SLP_EN bit to start the machinery */
57 outl(0x00002000, acpi_base + PM1_CNT);
58}
59
60/* Read the base addresses from the PCI BAR info */
61static int __devinit setup_bases(struct pci_dev *pdev)
62{
63 int r;
64
65 r = pci_enable_device_io(pdev);
66 if (r) {
67 dev_err(&pdev->dev, "can't enable device IO\n");
68 return r;
69 }
70
71 r = pci_request_region(pdev, ACPI_BAR, DRV_NAME);
72 if (r) {
73 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR);
74 return r;
75 }
76
77 r = pci_request_region(pdev, PMS_BAR, DRV_NAME);
78 if (r) {
79 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR);
80 pci_release_region(pdev, ACPI_BAR);
81 return r;
82 }
83
84 acpi_base = pci_resource_start(pdev, ACPI_BAR);
85 pms_base = pci_resource_start(pdev, PMS_BAR);
86
87 return 0;
88}
89
90static int __devinit olpc_xo1_probe(struct platform_device *pdev)
91{
92 struct pci_dev *pcidev;
93 int r;
94
95 pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
96 NULL);
97 if (!pdev)
98 return -ENODEV;
99
100 r = setup_bases(pcidev);
101 if (r)
102 return r;
103
104 pm_power_off = xo1_power_off;
105
106 printk(KERN_INFO "OLPC XO-1 support registered\n");
107 return 0;
108}
109
110static int __devexit olpc_xo1_remove(struct platform_device *pdev)
111{
112 pm_power_off = NULL;
113 return 0;
114}
115
116static struct platform_driver olpc_xo1_driver = {
117 .driver = {
118 .name = DRV_NAME,
119 .owner = THIS_MODULE,
120 },
121 .probe = olpc_xo1_probe,
122 .remove = __devexit_p(olpc_xo1_remove),
123};
124
125static int __init olpc_xo1_init(void)
126{
127 return platform_driver_register(&olpc_xo1_driver);
128}
129
130static void __exit olpc_xo1_exit(void)
131{
132 platform_driver_unregister(&olpc_xo1_driver);
133}
134
135MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
136MODULE_LICENSE("GPL");
137MODULE_ALIAS("platform:olpc-xo1");
138
139module_init(olpc_xo1_init);
140module_exit(olpc_xo1_exit);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
deleted file mode 100644
index edaf3fe8dc5e..000000000000
--- a/arch/x86/kernel/olpc.c
+++ /dev/null
@@ -1,281 +0,0 @@
1/*
2 * Support for the OLPC DCON and OLPC EC access
3 *
4 * Copyright © 2006 Advanced Micro Devices, Inc.
5 * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/delay.h>
17#include <linux/spinlock.h>
18#include <linux/io.h>
19#include <linux/string.h>
20#include <linux/platform_device.h>
21
22#include <asm/geode.h>
23#include <asm/setup.h>
24#include <asm/olpc.h>
25#include <asm/olpc_ofw.h>
26
27struct olpc_platform_t olpc_platform_info;
28EXPORT_SYMBOL_GPL(olpc_platform_info);
29
30static DEFINE_SPINLOCK(ec_lock);
31
32/* what the timeout *should* be (in ms) */
33#define EC_BASE_TIMEOUT 20
34
35/* the timeout that bugs in the EC might force us to actually use */
36static int ec_timeout = EC_BASE_TIMEOUT;
37
38static int __init olpc_ec_timeout_set(char *str)
39{
40 if (get_option(&str, &ec_timeout) != 1) {
41 ec_timeout = EC_BASE_TIMEOUT;
42 printk(KERN_ERR "olpc-ec: invalid argument to "
43 "'olpc_ec_timeout=', ignoring!\n");
44 }
45 printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n",
46 ec_timeout);
47 return 1;
48}
49__setup("olpc_ec_timeout=", olpc_ec_timeout_set);
50
51/*
52 * These {i,o}bf_status functions return whether the buffers are full or not.
53 */
54
55static inline unsigned int ibf_status(unsigned int port)
56{
57 return !!(inb(port) & 0x02);
58}
59
60static inline unsigned int obf_status(unsigned int port)
61{
62 return inb(port) & 0x01;
63}
64
65#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d))
66static int __wait_on_ibf(unsigned int line, unsigned int port, int desired)
67{
68 unsigned int timeo;
69 int state = ibf_status(port);
70
71 for (timeo = ec_timeout; state != desired && timeo; timeo--) {
72 mdelay(1);
73 state = ibf_status(port);
74 }
75
76 if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
77 timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
78 printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n",
79 line, ec_timeout - timeo);
80 }
81
82 return !(state == desired);
83}
84
85#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d))
86static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
87{
88 unsigned int timeo;
89 int state = obf_status(port);
90
91 for (timeo = ec_timeout; state != desired && timeo; timeo--) {
92 mdelay(1);
93 state = obf_status(port);
94 }
95
96 if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
97 timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
98 printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n",
99 line, ec_timeout - timeo);
100 }
101
102 return !(state == desired);
103}
104
105/*
106 * This allows the kernel to run Embedded Controller commands. The EC is
107 * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the
108 * available EC commands are here:
109 * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
110 * OpenFirmware's source is available, the EC's is not.
111 */
112int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
113 unsigned char *outbuf, size_t outlen)
114{
115 unsigned long flags;
116 int ret = -EIO;
117 int i;
118 int restarts = 0;
119
120 spin_lock_irqsave(&ec_lock, flags);
121
122 /* Clear OBF */
123 for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
124 inb(0x68);
125 if (i == 10) {
126 printk(KERN_ERR "olpc-ec: timeout while attempting to "
127 "clear OBF flag!\n");
128 goto err;
129 }
130
131 if (wait_on_ibf(0x6c, 0)) {
132 printk(KERN_ERR "olpc-ec: timeout waiting for EC to "
133 "quiesce!\n");
134 goto err;
135 }
136
137restart:
138 /*
139 * Note that if we time out during any IBF checks, that's a failure;
140 * we have to return. There's no way for the kernel to clear that.
141 *
142 * If we time out during an OBF check, we can restart the command;
143 * reissuing it will clear the OBF flag, and we should be alright.
144 * The OBF flag will sometimes misbehave due to what we believe
145 * is a hardware quirk..
146 */
147 pr_devel("olpc-ec: running cmd 0x%x\n", cmd);
148 outb(cmd, 0x6c);
149
150 if (wait_on_ibf(0x6c, 0)) {
151 printk(KERN_ERR "olpc-ec: timeout waiting for EC to read "
152 "command!\n");
153 goto err;
154 }
155
156 if (inbuf && inlen) {
157 /* write data to EC */
158 for (i = 0; i < inlen; i++) {
159 if (wait_on_ibf(0x6c, 0)) {
160 printk(KERN_ERR "olpc-ec: timeout waiting for"
161 " EC accept data!\n");
162 goto err;
163 }
164 pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
165 outb(inbuf[i], 0x68);
166 }
167 }
168 if (outbuf && outlen) {
169 /* read data from EC */
170 for (i = 0; i < outlen; i++) {
171 if (wait_on_obf(0x6c, 1)) {
172 printk(KERN_ERR "olpc-ec: timeout waiting for"
173 " EC to provide data!\n");
174 if (restarts++ < 10)
175 goto restart;
176 goto err;
177 }
178 outbuf[i] = inb(0x68);
179 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
180 }
181 }
182
183 ret = 0;
184err:
185 spin_unlock_irqrestore(&ec_lock, flags);
186 return ret;
187}
188EXPORT_SYMBOL_GPL(olpc_ec_cmd);
189
190static bool __init check_ofw_architecture(void)
191{
192 size_t propsize;
193 char olpc_arch[5];
194 const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
195 void *res[] = { &propsize };
196
197 if (olpc_ofw("getprop", args, res)) {
198 printk(KERN_ERR "ofw: getprop call failed!\n");
199 return false;
200 }
201 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
202}
203
204static u32 __init get_board_revision(void)
205{
206 size_t propsize;
207 __be32 rev;
208 const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
209 void *res[] = { &propsize };
210
211 if (olpc_ofw("getprop", args, res) || propsize != 4) {
212 printk(KERN_ERR "ofw: getprop call failed!\n");
213 return cpu_to_be32(0);
214 }
215 return be32_to_cpu(rev);
216}
217
218static bool __init platform_detect(void)
219{
220 if (!check_ofw_architecture())
221 return false;
222 olpc_platform_info.flags |= OLPC_F_PRESENT;
223 olpc_platform_info.boardrev = get_board_revision();
224 return true;
225}
226
227static int __init add_xo1_platform_devices(void)
228{
229 struct platform_device *pdev;
230
231 pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
232 if (IS_ERR(pdev))
233 return PTR_ERR(pdev);
234
235 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
236 if (IS_ERR(pdev))
237 return PTR_ERR(pdev);
238
239 return 0;
240}
241
242static int __init olpc_init(void)
243{
244 int r = 0;
245
246 if (!olpc_ofw_present() || !platform_detect())
247 return 0;
248
249 spin_lock_init(&ec_lock);
250
251 /* assume B1 and above models always have a DCON */
252 if (olpc_board_at_least(olpc_board(0xb1)))
253 olpc_platform_info.flags |= OLPC_F_DCON;
254
255 /* get the EC revision */
256 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
257 (unsigned char *) &olpc_platform_info.ecver, 1);
258
259#ifdef CONFIG_PCI_OLPC
260 /* If the VSA exists let it emulate PCI, if not emulate in kernel.
261 * XO-1 only. */
262 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
263 !cs5535_has_vsa2())
264 x86_init.pci.arch_init = pci_olpc_init;
265#endif
266
267 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
268 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
269 olpc_platform_info.boardrev >> 4,
270 olpc_platform_info.ecver);
271
272 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
273 r = add_xo1_platform_devices();
274 if (r)
275 return r;
276 }
277
278 return 0;
279}
280
281postcore_initcall(olpc_init);
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
deleted file mode 100644
index 787320464379..000000000000
--- a/arch/x86/kernel/olpc_ofw.c
+++ /dev/null
@@ -1,112 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <asm/page.h>
5#include <asm/setup.h>
6#include <asm/io.h>
7#include <asm/pgtable.h>
8#include <asm/olpc_ofw.h>
9
10/* address of OFW callback interface; will be NULL if OFW isn't found */
11static int (*olpc_ofw_cif)(int *);
12
13/* page dir entry containing OFW's pgdir table; filled in by head_32.S */
14u32 olpc_ofw_pgd __initdata;
15
16static DEFINE_SPINLOCK(ofw_lock);
17
18#define MAXARGS 10
19
20void __init setup_olpc_ofw_pgd(void)
21{
22 pgd_t *base, *ofw_pde;
23
24 if (!olpc_ofw_cif)
25 return;
26
27 /* fetch OFW's PDE */
28 base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
29 if (!base) {
30 printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n");
31 olpc_ofw_cif = NULL;
32 return;
33 }
34 ofw_pde = &base[OLPC_OFW_PDE_NR];
35
36 /* install OFW's PDE permanently into the kernel's pgtable */
37 set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde);
38 /* implicit optimization barrier here due to uninline function return */
39
40 early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
41}
42
43int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
44 void **res)
45{
46 int ofw_args[MAXARGS + 3];
47 unsigned long flags;
48 int ret, i, *p;
49
50 BUG_ON(nr_args + nr_res > MAXARGS);
51
52 if (!olpc_ofw_cif)
53 return -EIO;
54
55 ofw_args[0] = (int)name;
56 ofw_args[1] = nr_args;
57 ofw_args[2] = nr_res;
58
59 p = &ofw_args[3];
60 for (i = 0; i < nr_args; i++, p++)
61 *p = (int)args[i];
62
63 /* call into ofw */
64 spin_lock_irqsave(&ofw_lock, flags);
65 ret = olpc_ofw_cif(ofw_args);
66 spin_unlock_irqrestore(&ofw_lock, flags);
67
68 if (!ret) {
69 for (i = 0; i < nr_res; i++, p++)
70 *((int *)res[i]) = *p;
71 }
72
73 return ret;
74}
75EXPORT_SYMBOL_GPL(__olpc_ofw);
76
77bool olpc_ofw_present(void)
78{
79 return olpc_ofw_cif != NULL;
80}
81EXPORT_SYMBOL_GPL(olpc_ofw_present);
82
83/* OFW cif _should_ be above this address */
84#define OFW_MIN 0xff000000
85
86/* OFW starts on a 1MB boundary */
87#define OFW_BOUND (1<<20)
88
89void __init olpc_ofw_detect(void)
90{
91 struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header;
92 unsigned long start;
93
94 /* ensure OFW booted us by checking for "OFW " string */
95 if (hdr->ofw_magic != OLPC_OFW_SIG)
96 return;
97
98 olpc_ofw_cif = (int (*)(int *))hdr->cif_handler;
99
100 if ((unsigned long)olpc_ofw_cif < OFW_MIN) {
101 printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n",
102 (unsigned long)olpc_ofw_cif);
103 olpc_ofw_cif = NULL;
104 return;
105 }
106
107 /* determine where OFW starts in memory */
108 start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND);
109 printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n",
110 (unsigned long)olpc_ofw_cif, (-start) >> 20);
111 reserve_top_address(-start);
112}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 70c4872cd8aa..45892dc4b72a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child)
801static const struct user_regset_view user_x86_32_view; /* Initialized below. */ 801static const struct user_regset_view user_x86_32_view; /* Initialized below. */
802#endif 802#endif
803 803
804long arch_ptrace(struct task_struct *child, long request, long addr, long data) 804long arch_ptrace(struct task_struct *child, long request,
805 unsigned long addr, unsigned long data)
805{ 806{
806 int ret; 807 int ret;
807 unsigned long __user *datap = (unsigned long __user *)data; 808 unsigned long __user *datap = (unsigned long __user *)data;
@@ -812,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
812 unsigned long tmp; 813 unsigned long tmp;
813 814
814 ret = -EIO; 815 ret = -EIO;
815 if ((addr & (sizeof(data) - 1)) || addr < 0 || 816 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
816 addr >= sizeof(struct user))
817 break; 817 break;
818 818
819 tmp = 0; /* Default return condition */ 819 tmp = 0; /* Default return condition */
@@ -830,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
830 830
831 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 831 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
832 ret = -EIO; 832 ret = -EIO;
833 if ((addr & (sizeof(data) - 1)) || addr < 0 || 833 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
834 addr >= sizeof(struct user))
835 break; 834 break;
836 835
837 if (addr < sizeof(struct user_regs_struct)) 836 if (addr < sizeof(struct user_regs_struct))
@@ -888,17 +887,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
888 887
889#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 888#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
890 case PTRACE_GET_THREAD_AREA: 889 case PTRACE_GET_THREAD_AREA:
891 if (addr < 0) 890 if ((int) addr < 0)
892 return -EIO; 891 return -EIO;
893 ret = do_get_thread_area(child, addr, 892 ret = do_get_thread_area(child, addr,
894 (struct user_desc __user *) data); 893 (struct user_desc __user *)data);
895 break; 894 break;
896 895
897 case PTRACE_SET_THREAD_AREA: 896 case PTRACE_SET_THREAD_AREA:
898 if (addr < 0) 897 if ((int) addr < 0)
899 return -EIO; 898 return -EIO;
900 ret = do_set_thread_area(child, addr, 899 ret = do_set_thread_area(child, addr,
901 (struct user_desc __user *) data, 0); 900 (struct user_desc __user *)data, 0);
902 break; 901 break;
903#endif 902#endif
904 903
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f7f53dcd3e0a..c495aa8d4815 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -635,7 +635,7 @@ void native_machine_shutdown(void)
635 /* O.K Now that I'm on the appropriate processor, 635 /* O.K Now that I'm on the appropriate processor,
636 * stop all of the others. 636 * stop all of the others.
637 */ 637 */
638 smp_send_stop(); 638 stop_other_cpus();
639#endif 639#endif
640 640
641 lapic_shutdown(); 641 lapic_shutdown();
diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c
deleted file mode 100644
index 7e004acbe526..000000000000
--- a/arch/x86/kernel/scx200_32.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/*
2 * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
3 *
4 * National Semiconductor SCx200 support.
5 */
6
7#include <linux/module.h>
8#include <linux/errno.h>
9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <linux/mutex.h>
12#include <linux/pci.h>
13
14#include <linux/scx200.h>
15#include <linux/scx200_gpio.h>
16
17/* Verify that the configuration block really is there */
18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
19
20#define NAME "scx200"
21
22MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
23MODULE_DESCRIPTION("NatSemi SCx200 Driver");
24MODULE_LICENSE("GPL");
25
26unsigned scx200_gpio_base = 0;
27unsigned long scx200_gpio_shadow[2];
28
29unsigned scx200_cb_base = 0;
30
31static struct pci_device_id scx200_tbl[] = {
32 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
33 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
34 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
35 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
36 { },
37};
38MODULE_DEVICE_TABLE(pci,scx200_tbl);
39
40static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
41
42static struct pci_driver scx200_pci_driver = {
43 .name = "scx200",
44 .id_table = scx200_tbl,
45 .probe = scx200_probe,
46};
47
48static DEFINE_MUTEX(scx200_gpio_config_lock);
49
50static void __devinit scx200_init_shadow(void)
51{
52 int bank;
53
54 /* read the current values driven on the GPIO signals */
55 for (bank = 0; bank < 2; ++bank)
56 scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
57}
58
59static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
60{
61 unsigned base;
62
63 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
64 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
65 base = pci_resource_start(pdev, 0);
66 printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
67
68 if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
69 printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
70 return -EBUSY;
71 }
72
73 scx200_gpio_base = base;
74 scx200_init_shadow();
75
76 } else {
77 /* find the base of the Configuration Block */
78 if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
79 scx200_cb_base = SCx200_CB_BASE_FIXED;
80 } else {
81 pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
82 if (scx200_cb_probe(base)) {
83 scx200_cb_base = base;
84 } else {
85 printk(KERN_WARNING NAME ": Configuration Block not found\n");
86 return -ENODEV;
87 }
88 }
89 printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
90 }
91
92 return 0;
93}
94
95u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
96{
97 u32 config, new_config;
98
99 mutex_lock(&scx200_gpio_config_lock);
100
101 outl(index, scx200_gpio_base + 0x20);
102 config = inl(scx200_gpio_base + 0x24);
103
104 new_config = (config & mask) | bits;
105 outl(new_config, scx200_gpio_base + 0x24);
106
107 mutex_unlock(&scx200_gpio_config_lock);
108
109 return config;
110}
111
112static int __init scx200_init(void)
113{
114 printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
115
116 return pci_register_driver(&scx200_pci_driver);
117}
118
119static void __exit scx200_cleanup(void)
120{
121 pci_unregister_driver(&scx200_pci_driver);
122 release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
123}
124
125module_init(scx200_init);
126module_exit(scx200_cleanup);
127
128EXPORT_SYMBOL(scx200_gpio_base);
129EXPORT_SYMBOL(scx200_gpio_shadow);
130EXPORT_SYMBOL(scx200_gpio_configure);
131EXPORT_SYMBOL(scx200_cb_base);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 95a32746fbf9..21c6746338af 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -769,6 +769,8 @@ void __init setup_arch(char **cmdline_p)
769 769
770 x86_init.oem.arch_setup(); 770 x86_init.oem.arch_setup();
771 771
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
772 setup_memory_map(); 774 setup_memory_map();
773 parse_setup_data(); 775 parse_setup_data();
774 /* update the e820_saved too */ 776 /* update the e820_saved too */
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
deleted file mode 100644
index dd4c281ffe57..000000000000
--- a/arch/x86/kernel/sfi.c
+++ /dev/null
@@ -1,120 +0,0 @@
1/*
2 * sfi.c - x86 architecture SFI support.
3 *
4 * Copyright (c) 2009, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
21#define KMSG_COMPONENT "SFI"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/acpi.h>
25#include <linux/init.h>
26#include <linux/sfi.h>
27#include <linux/io.h>
28
29#include <asm/io_apic.h>
30#include <asm/mpspec.h>
31#include <asm/setup.h>
32#include <asm/apic.h>
33
34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36
37static void __init mp_sfi_register_lapic_address(unsigned long address)
38{
39 mp_lapic_addr = address;
40
41 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
42 if (boot_cpu_physical_apicid == -1U)
43 boot_cpu_physical_apicid = read_apic_id();
44
45 pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
46}
47
48/* All CPUs enumerated by SFI must be present and enabled */
49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{
51 if (MAX_APICS - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS);
54 return;
55 }
56
57 pr_info("registering lapic[%d]\n", id);
58
59 generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
60}
61
62static int __init sfi_parse_cpus(struct sfi_table_header *table)
63{
64 struct sfi_table_simple *sb;
65 struct sfi_cpu_table_entry *pentry;
66 int i;
67 int cpu_num;
68
69 sb = (struct sfi_table_simple *)table;
70 cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
71 pentry = (struct sfi_cpu_table_entry *)sb->pentry;
72
73 for (i = 0; i < cpu_num; i++) {
74 mp_sfi_register_lapic(pentry->apic_id);
75 pentry++;
76 }
77
78 smp_found_config = 1;
79 return 0;
80}
81#endif /* CONFIG_X86_LOCAL_APIC */
82
83#ifdef CONFIG_X86_IO_APIC
84
85static int __init sfi_parse_ioapic(struct sfi_table_header *table)
86{
87 struct sfi_table_simple *sb;
88 struct sfi_apic_table_entry *pentry;
89 int i, num;
90
91 sb = (struct sfi_table_simple *)table;
92 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
93 pentry = (struct sfi_apic_table_entry *)sb->pentry;
94
95 for (i = 0; i < num; i++) {
96 mp_register_ioapic(i, pentry->phys_addr, gsi_top);
97 pentry++;
98 }
99
100 WARN(pic_mode, KERN_WARNING
101 "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
102 pic_mode = 0;
103 return 0;
104}
105#endif /* CONFIG_X86_IO_APIC */
106
107/*
108 * sfi_platform_init(): register lapics & io-apics
109 */
110int __init sfi_platform_init(void)
111{
112#ifdef CONFIG_X86_LOCAL_APIC
113 mp_sfi_register_lapic_address(sfi_lapic_addr);
114 sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
115#endif
116#ifdef CONFIG_X86_IO_APIC
117 sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
118#endif
119 return 0;
120}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..513deac7228d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
159 irq_exit(); 159 irq_exit();
160} 160}
161 161
162static void native_smp_send_stop(void) 162static void native_stop_other_cpus(int wait)
163{ 163{
164 unsigned long flags; 164 unsigned long flags;
165 unsigned long wait; 165 unsigned long timeout;
166 166
167 if (reboot_force) 167 if (reboot_force)
168 return; 168 return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
179 if (num_online_cpus() > 1) { 179 if (num_online_cpus() > 1) {
180 apic->send_IPI_allbutself(REBOOT_VECTOR); 180 apic->send_IPI_allbutself(REBOOT_VECTOR);
181 181
182 /* Don't wait longer than a second */ 182 /*
183 wait = USEC_PER_SEC; 183 * Don't wait longer than a second if the caller
184 while (num_online_cpus() > 1 && wait--) 184 * didn't ask us to wait.
185 */
186 timeout = USEC_PER_SEC;
187 while (num_online_cpus() > 1 && (wait || timeout--))
185 udelay(1); 188 udelay(1);
186 } 189 }
187 190
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
227 .smp_prepare_cpus = native_smp_prepare_cpus, 230 .smp_prepare_cpus = native_smp_prepare_cpus,
228 .smp_cpus_done = native_smp_cpus_done, 231 .smp_cpus_done = native_smp_cpus_done,
229 232
230 .smp_send_stop = native_smp_send_stop, 233 .stop_other_cpus = native_stop_other_cpus,
231 .smp_send_reschedule = native_smp_send_reschedule, 234 .smp_send_reschedule = native_smp_send_reschedule,
232 235
233 .cpu_up = native_cpu_up, 236 .cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6af118511b4a..083e99d1b7df 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -747,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
747 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 747 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
748 }; 748 };
749 749
750 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); 750 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
751 751
752 alternatives_smp_switch(1); 752 alternatives_smp_switch(1);
753 753
@@ -1373,7 +1373,6 @@ void play_dead_common(void)
1373{ 1373{
1374 idle_task_exit(); 1374 idle_task_exit();
1375 reset_lazy_tlbstate(); 1375 reset_lazy_tlbstate();
1376 irq_ctx_exit(raw_smp_processor_id());
1377 c1e_remove_cpu(raw_smp_processor_id()); 1376 c1e_remove_cpu(raw_smp_processor_id());
1378 1377
1379 mb(); 1378 mb();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
deleted file mode 100644
index 20ea20a39e2a..000000000000
--- a/arch/x86/kernel/tlb_uv.c
+++ /dev/null
@@ -1,1661 +0,0 @@
1/*
2 * SGI UltraViolet TLB flush routines.
3 *
4 * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI.
5 *
6 * This code is released under the GNU General Public License version 2 or
7 * later.
8 */
9#include <linux/seq_file.h>
10#include <linux/proc_fs.h>
11#include <linux/debugfs.h>
12#include <linux/kernel.h>
13#include <linux/slab.h>
14
15#include <asm/mmu_context.h>
16#include <asm/uv/uv.h>
17#include <asm/uv/uv_mmrs.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20#include <asm/apic.h>
21#include <asm/idle.h>
22#include <asm/tsc.h>
23#include <asm/irq_vectors.h>
24#include <asm/timer.h>
25
26/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
27static int timeout_base_ns[] = {
28 20,
29 160,
30 1280,
31 10240,
32 81920,
33 655360,
34 5242880,
35 167772160
36};
37static int timeout_us;
38static int nobau;
39static int baudisabled;
40static spinlock_t disable_lock;
41static cycles_t congested_cycles;
42
43/* tunables: */
44static int max_bau_concurrent = MAX_BAU_CONCURRENT;
45static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT;
46static int plugged_delay = PLUGGED_DELAY;
47static int plugsb4reset = PLUGSB4RESET;
48static int timeoutsb4reset = TIMEOUTSB4RESET;
49static int ipi_reset_limit = IPI_RESET_LIMIT;
50static int complete_threshold = COMPLETE_THRESHOLD;
51static int congested_response_us = CONGESTED_RESPONSE_US;
52static int congested_reps = CONGESTED_REPS;
53static int congested_period = CONGESTED_PERIOD;
54static struct dentry *tunables_dir;
55static struct dentry *tunables_file;
56
57static int __init setup_nobau(char *arg)
58{
59 nobau = 1;
60 return 0;
61}
62early_param("nobau", setup_nobau);
63
64/* base pnode in this partition */
65static int uv_partition_base_pnode __read_mostly;
66/* position of pnode (which is nasid>>1): */
67static int uv_nshift __read_mostly;
68static unsigned long uv_mmask __read_mostly;
69
70static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
71static DEFINE_PER_CPU(struct bau_control, bau_control);
72static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
73
74/*
75 * Determine the first node on a uvhub. 'Nodes' are used for kernel
76 * memory allocation.
77 */
78static int __init uvhub_to_first_node(int uvhub)
79{
80 int node, b;
81
82 for_each_online_node(node) {
83 b = uv_node_to_blade_id(node);
84 if (uvhub == b)
85 return node;
86 }
87 return -1;
88}
89
90/*
91 * Determine the apicid of the first cpu on a uvhub.
92 */
93static int __init uvhub_to_first_apicid(int uvhub)
94{
95 int cpu;
96
97 for_each_present_cpu(cpu)
98 if (uvhub == uv_cpu_to_blade_id(cpu))
99 return per_cpu(x86_cpu_to_apicid, cpu);
100 return -1;
101}
102
103/*
104 * Free a software acknowledge hardware resource by clearing its Pending
105 * bit. This will return a reply to the sender.
106 * If the message has timed out, a reply has already been sent by the
107 * hardware but the resource has not been released. In that case our
108 * clear of the Timeout bit (as well) will free the resource. No reply will
109 * be sent (the hardware will only do one reply per message).
110 */
111static inline void uv_reply_to_message(struct msg_desc *mdp,
112 struct bau_control *bcp)
113{
114 unsigned long dw;
115 struct bau_payload_queue_entry *msg;
116
117 msg = mdp->msg;
118 if (!msg->canceled) {
119 dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) |
120 msg->sw_ack_vector;
121 uv_write_local_mmr(
122 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
123 }
124 msg->replied_to = 1;
125 msg->sw_ack_vector = 0;
126}
127
128/*
129 * Process the receipt of a RETRY message
130 */
131static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
132 struct bau_control *bcp)
133{
134 int i;
135 int cancel_count = 0;
136 int slot2;
137 unsigned long msg_res;
138 unsigned long mmr = 0;
139 struct bau_payload_queue_entry *msg;
140 struct bau_payload_queue_entry *msg2;
141 struct ptc_stats *stat;
142
143 msg = mdp->msg;
144 stat = bcp->statp;
145 stat->d_retries++;
146 /*
147 * cancel any message from msg+1 to the retry itself
148 */
149 for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
150 if (msg2 > mdp->va_queue_last)
151 msg2 = mdp->va_queue_first;
152 if (msg2 == msg)
153 break;
154
155 /* same conditions for cancellation as uv_do_reset */
156 if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
157 (msg2->sw_ack_vector) && ((msg2->sw_ack_vector &
158 msg->sw_ack_vector) == 0) &&
159 (msg2->sending_cpu == msg->sending_cpu) &&
160 (msg2->msg_type != MSG_NOOP)) {
161 slot2 = msg2 - mdp->va_queue_first;
162 mmr = uv_read_local_mmr
163 (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
164 msg_res = msg2->sw_ack_vector;
165 /*
166 * This is a message retry; clear the resources held
167 * by the previous message only if they timed out.
168 * If it has not timed out we have an unexpected
169 * situation to report.
170 */
171 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
172 /*
173 * is the resource timed out?
174 * make everyone ignore the cancelled message.
175 */
176 msg2->canceled = 1;
177 stat->d_canceled++;
178 cancel_count++;
179 uv_write_local_mmr(
180 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
181 (msg_res << UV_SW_ACK_NPENDING) |
182 msg_res);
183 }
184 }
185 }
186 if (!cancel_count)
187 stat->d_nocanceled++;
188}
189
190/*
191 * Do all the things a cpu should do for a TLB shootdown message.
192 * Other cpu's may come here at the same time for this message.
193 */
194static void uv_bau_process_message(struct msg_desc *mdp,
195 struct bau_control *bcp)
196{
197 int msg_ack_count;
198 short socket_ack_count = 0;
199 struct ptc_stats *stat;
200 struct bau_payload_queue_entry *msg;
201 struct bau_control *smaster = bcp->socket_master;
202
203 /*
204 * This must be a normal message, or retry of a normal message
205 */
206 msg = mdp->msg;
207 stat = bcp->statp;
208 if (msg->address == TLB_FLUSH_ALL) {
209 local_flush_tlb();
210 stat->d_alltlb++;
211 } else {
212 __flush_tlb_one(msg->address);
213 stat->d_onetlb++;
214 }
215 stat->d_requestee++;
216
217 /*
218 * One cpu on each uvhub has the additional job on a RETRY
219 * of releasing the resource held by the message that is
220 * being retried. That message is identified by sending
221 * cpu number.
222 */
223 if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
224 uv_bau_process_retry_msg(mdp, bcp);
225
226 /*
227 * This is a sw_ack message, so we have to reply to it.
228 * Count each responding cpu on the socket. This avoids
229 * pinging the count's cache line back and forth between
230 * the sockets.
231 */
232 socket_ack_count = atomic_add_short_return(1, (struct atomic_short *)
233 &smaster->socket_acknowledge_count[mdp->msg_slot]);
234 if (socket_ack_count == bcp->cpus_in_socket) {
235 /*
236 * Both sockets dump their completed count total into
237 * the message's count.
238 */
239 smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
240 msg_ack_count = atomic_add_short_return(socket_ack_count,
241 (struct atomic_short *)&msg->acknowledge_count);
242
243 if (msg_ack_count == bcp->cpus_in_uvhub) {
244 /*
245 * All cpus in uvhub saw it; reply
246 */
247 uv_reply_to_message(mdp, bcp);
248 }
249 }
250
251 return;
252}
253
254/*
255 * Determine the first cpu on a uvhub.
256 */
257static int uvhub_to_first_cpu(int uvhub)
258{
259 int cpu;
260 for_each_present_cpu(cpu)
261 if (uvhub == uv_cpu_to_blade_id(cpu))
262 return cpu;
263 return -1;
264}
265
266/*
267 * Last resort when we get a large number of destination timeouts is
268 * to clear resources held by a given cpu.
269 * Do this with IPI so that all messages in the BAU message queue
270 * can be identified by their nonzero sw_ack_vector field.
271 *
272 * This is entered for a single cpu on the uvhub.
273 * The sender want's this uvhub to free a specific message's
274 * sw_ack resources.
275 */
276static void
277uv_do_reset(void *ptr)
278{
279 int i;
280 int slot;
281 int count = 0;
282 unsigned long mmr;
283 unsigned long msg_res;
284 struct bau_control *bcp;
285 struct reset_args *rap;
286 struct bau_payload_queue_entry *msg;
287 struct ptc_stats *stat;
288
289 bcp = &per_cpu(bau_control, smp_processor_id());
290 rap = (struct reset_args *)ptr;
291 stat = bcp->statp;
292 stat->d_resets++;
293
294 /*
295 * We're looking for the given sender, and
296 * will free its sw_ack resource.
297 * If all cpu's finally responded after the timeout, its
298 * message 'replied_to' was set.
299 */
300 for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
301 /* uv_do_reset: same conditions for cancellation as
302 uv_bau_process_retry_msg() */
303 if ((msg->replied_to == 0) &&
304 (msg->canceled == 0) &&
305 (msg->sending_cpu == rap->sender) &&
306 (msg->sw_ack_vector) &&
307 (msg->msg_type != MSG_NOOP)) {
308 /*
309 * make everyone else ignore this message
310 */
311 msg->canceled = 1;
312 slot = msg - bcp->va_queue_first;
313 count++;
314 /*
315 * only reset the resource if it is still pending
316 */
317 mmr = uv_read_local_mmr
318 (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
319 msg_res = msg->sw_ack_vector;
320 if (mmr & msg_res) {
321 stat->d_rcanceled++;
322 uv_write_local_mmr(
323 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
324 (msg_res << UV_SW_ACK_NPENDING) |
325 msg_res);
326 }
327 }
328 }
329 return;
330}
331
332/*
333 * Use IPI to get all target uvhubs to release resources held by
334 * a given sending cpu number.
335 */
336static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution,
337 int sender)
338{
339 int uvhub;
340 int cpu;
341 cpumask_t mask;
342 struct reset_args reset_args;
343
344 reset_args.sender = sender;
345
346 cpus_clear(mask);
347 /* find a single cpu for each uvhub in this distribution mask */
348 for (uvhub = 0;
349 uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE;
350 uvhub++) {
351 if (!bau_uvhub_isset(uvhub, distribution))
352 continue;
353 /* find a cpu for this uvhub */
354 cpu = uvhub_to_first_cpu(uvhub);
355 cpu_set(cpu, mask);
356 }
357 /* IPI all cpus; Preemption is already disabled */
358 smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1);
359 return;
360}
361
362static inline unsigned long
363cycles_2_us(unsigned long long cyc)
364{
365 unsigned long long ns;
366 unsigned long us;
367 ns = (cyc * per_cpu(cyc2ns, smp_processor_id()))
368 >> CYC2NS_SCALE_FACTOR;
369 us = ns / 1000;
370 return us;
371}
372
373/*
374 * wait for all cpus on this hub to finish their sends and go quiet
375 * leaves uvhub_quiesce set so that no new broadcasts are started by
376 * bau_flush_send_and_wait()
377 */
378static inline void
379quiesce_local_uvhub(struct bau_control *hmaster)
380{
381 atomic_add_short_return(1, (struct atomic_short *)
382 &hmaster->uvhub_quiesce);
383}
384
385/*
386 * mark this quiet-requestor as done
387 */
388static inline void
389end_uvhub_quiesce(struct bau_control *hmaster)
390{
391 atomic_add_short_return(-1, (struct atomic_short *)
392 &hmaster->uvhub_quiesce);
393}
394
395/*
396 * Wait for completion of a broadcast software ack message
397 * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
398 */
399static int uv_wait_completion(struct bau_desc *bau_desc,
400 unsigned long mmr_offset, int right_shift, int this_cpu,
401 struct bau_control *bcp, struct bau_control *smaster, long try)
402{
403 unsigned long descriptor_status;
404 cycles_t ttime;
405 struct ptc_stats *stat = bcp->statp;
406 struct bau_control *hmaster;
407
408 hmaster = bcp->uvhub_master;
409
410 /* spin on the status MMR, waiting for it to go idle */
411 while ((descriptor_status = (((unsigned long)
412 uv_read_local_mmr(mmr_offset) >>
413 right_shift) & UV_ACT_STATUS_MASK)) !=
414 DESC_STATUS_IDLE) {
415 /*
416 * Our software ack messages may be blocked because there are
417 * no swack resources available. As long as none of them
418 * has timed out hardware will NACK our message and its
419 * state will stay IDLE.
420 */
421 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
422 stat->s_stimeout++;
423 return FLUSH_GIVEUP;
424 } else if (descriptor_status ==
425 DESC_STATUS_DESTINATION_TIMEOUT) {
426 stat->s_dtimeout++;
427 ttime = get_cycles();
428
429 /*
430 * Our retries may be blocked by all destination
431 * swack resources being consumed, and a timeout
432 * pending. In that case hardware returns the
433 * ERROR that looks like a destination timeout.
434 */
435 if (cycles_2_us(ttime - bcp->send_message) <
436 timeout_us) {
437 bcp->conseccompletes = 0;
438 return FLUSH_RETRY_PLUGGED;
439 }
440
441 bcp->conseccompletes = 0;
442 return FLUSH_RETRY_TIMEOUT;
443 } else {
444 /*
445 * descriptor_status is still BUSY
446 */
447 cpu_relax();
448 }
449 }
450 bcp->conseccompletes++;
451 return FLUSH_COMPLETE;
452}
453
454static inline cycles_t
455sec_2_cycles(unsigned long sec)
456{
457 unsigned long ns;
458 cycles_t cyc;
459
460 ns = sec * 1000000000;
461 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
462 return cyc;
463}
464
465/*
466 * conditionally add 1 to *v, unless *v is >= u
467 * return 0 if we cannot add 1 to *v because it is >= u
468 * return 1 if we can add 1 to *v because it is < u
469 * the add is atomic
470 *
471 * This is close to atomic_add_unless(), but this allows the 'u' value
472 * to be lowered below the current 'v'. atomic_add_unless can only stop
473 * on equal.
474 */
475static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
476{
477 spin_lock(lock);
478 if (atomic_read(v) >= u) {
479 spin_unlock(lock);
480 return 0;
481 }
482 atomic_inc(v);
483 spin_unlock(lock);
484 return 1;
485}
486
487/*
488 * Our retries are blocked by all destination swack resources being
489 * in use, and a timeout is pending. In that case hardware immediately
490 * returns the ERROR that looks like a destination timeout.
491 */
492static void
493destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp,
494 struct bau_control *hmaster, struct ptc_stats *stat)
495{
496 udelay(bcp->plugged_delay);
497 bcp->plugged_tries++;
498 if (bcp->plugged_tries >= bcp->plugsb4reset) {
499 bcp->plugged_tries = 0;
500 quiesce_local_uvhub(hmaster);
501 spin_lock(&hmaster->queue_lock);
502 uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
503 spin_unlock(&hmaster->queue_lock);
504 end_uvhub_quiesce(hmaster);
505 bcp->ipi_attempts++;
506 stat->s_resets_plug++;
507 }
508}
509
510static void
511destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
512 struct bau_control *hmaster, struct ptc_stats *stat)
513{
514 hmaster->max_bau_concurrent = 1;
515 bcp->timeout_tries++;
516 if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
517 bcp->timeout_tries = 0;
518 quiesce_local_uvhub(hmaster);
519 spin_lock(&hmaster->queue_lock);
520 uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
521 spin_unlock(&hmaster->queue_lock);
522 end_uvhub_quiesce(hmaster);
523 bcp->ipi_attempts++;
524 stat->s_resets_timeout++;
525 }
526}
527
528/*
529 * Completions are taking a very long time due to a congested numalink
530 * network.
531 */
532static void
533disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
534{
535 int tcpu;
536 struct bau_control *tbcp;
537
538 /* let only one cpu do this disabling */
539 spin_lock(&disable_lock);
540 if (!baudisabled && bcp->period_requests &&
541 ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
542 /* it becomes this cpu's job to turn on the use of the
543 BAU again */
544 baudisabled = 1;
545 bcp->set_bau_off = 1;
546 bcp->set_bau_on_time = get_cycles() +
547 sec_2_cycles(bcp->congested_period);
548 stat->s_bau_disabled++;
549 for_each_present_cpu(tcpu) {
550 tbcp = &per_cpu(bau_control, tcpu);
551 tbcp->baudisabled = 1;
552 }
553 }
554 spin_unlock(&disable_lock);
555}
556
557/**
558 * uv_flush_send_and_wait
559 *
560 * Send a broadcast and wait for it to complete.
561 *
562 * The flush_mask contains the cpus the broadcast is to be sent to including
563 * cpus that are on the local uvhub.
564 *
565 * Returns 0 if all flushing represented in the mask was done.
566 * Returns 1 if it gives up entirely and the original cpu mask is to be
567 * returned to the kernel.
568 */
569int uv_flush_send_and_wait(struct bau_desc *bau_desc,
570 struct cpumask *flush_mask, struct bau_control *bcp)
571{
572 int right_shift;
573 int completion_status = 0;
574 int seq_number = 0;
575 long try = 0;
576 int cpu = bcp->uvhub_cpu;
577 int this_cpu = bcp->cpu;
578 unsigned long mmr_offset;
579 unsigned long index;
580 cycles_t time1;
581 cycles_t time2;
582 cycles_t elapsed;
583 struct ptc_stats *stat = bcp->statp;
584 struct bau_control *smaster = bcp->socket_master;
585 struct bau_control *hmaster = bcp->uvhub_master;
586
587 if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
588 &hmaster->active_descriptor_count,
589 hmaster->max_bau_concurrent)) {
590 stat->s_throttles++;
591 do {
592 cpu_relax();
593 } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
594 &hmaster->active_descriptor_count,
595 hmaster->max_bau_concurrent));
596 }
597 while (hmaster->uvhub_quiesce)
598 cpu_relax();
599
600 if (cpu < UV_CPUS_PER_ACT_STATUS) {
601 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
602 right_shift = cpu * UV_ACT_STATUS_SIZE;
603 } else {
604 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
605 right_shift =
606 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
607 }
608 time1 = get_cycles();
609 do {
610 if (try == 0) {
611 bau_desc->header.msg_type = MSG_REGULAR;
612 seq_number = bcp->message_number++;
613 } else {
614 bau_desc->header.msg_type = MSG_RETRY;
615 stat->s_retry_messages++;
616 }
617 bau_desc->header.sequence = seq_number;
618 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
619 bcp->uvhub_cpu;
620 bcp->send_message = get_cycles();
621 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
622 try++;
623 completion_status = uv_wait_completion(bau_desc, mmr_offset,
624 right_shift, this_cpu, bcp, smaster, try);
625
626 if (completion_status == FLUSH_RETRY_PLUGGED) {
627 destination_plugged(bau_desc, bcp, hmaster, stat);
628 } else if (completion_status == FLUSH_RETRY_TIMEOUT) {
629 destination_timeout(bau_desc, bcp, hmaster, stat);
630 }
631 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
632 bcp->ipi_attempts = 0;
633 completion_status = FLUSH_GIVEUP;
634 break;
635 }
636 cpu_relax();
637 } while ((completion_status == FLUSH_RETRY_PLUGGED) ||
638 (completion_status == FLUSH_RETRY_TIMEOUT));
639 time2 = get_cycles();
640 bcp->plugged_tries = 0;
641 bcp->timeout_tries = 0;
642 if ((completion_status == FLUSH_COMPLETE) &&
643 (bcp->conseccompletes > bcp->complete_threshold) &&
644 (hmaster->max_bau_concurrent <
645 hmaster->max_bau_concurrent_constant))
646 hmaster->max_bau_concurrent++;
647 while (hmaster->uvhub_quiesce)
648 cpu_relax();
649 atomic_dec(&hmaster->active_descriptor_count);
650 if (time2 > time1) {
651 elapsed = time2 - time1;
652 stat->s_time += elapsed;
653 if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
654 bcp->period_requests++;
655 bcp->period_time += elapsed;
656 if ((elapsed > congested_cycles) &&
657 (bcp->period_requests > bcp->congested_reps)) {
658 disable_for_congestion(bcp, stat);
659 }
660 }
661 } else
662 stat->s_requestor--;
663 if (completion_status == FLUSH_COMPLETE && try > 1)
664 stat->s_retriesok++;
665 else if (completion_status == FLUSH_GIVEUP) {
666 stat->s_giveup++;
667 return 1;
668 }
669 return 0;
670}
671
672/**
673 * uv_flush_tlb_others - globally purge translation cache of a virtual
674 * address or all TLB's
675 * @cpumask: mask of all cpu's in which the address is to be removed
676 * @mm: mm_struct containing virtual address range
677 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
678 * @cpu: the current cpu
679 *
680 * This is the entry point for initiating any UV global TLB shootdown.
681 *
682 * Purges the translation caches of all specified processors of the given
683 * virtual address, or purges all TLB's on specified processors.
684 *
685 * The caller has derived the cpumask from the mm_struct. This function
686 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
687 *
688 * The cpumask is converted into a uvhubmask of the uvhubs containing
689 * those cpus.
690 *
691 * Note that this function should be called with preemption disabled.
692 *
693 * Returns NULL if all remote flushing was done.
694 * Returns pointer to cpumask if some remote flushing remains to be
695 * done. The returned pointer is valid till preemption is re-enabled.
696 */
697const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
698 struct mm_struct *mm,
699 unsigned long va, unsigned int cpu)
700{
701 int tcpu;
702 int uvhub;
703 int locals = 0;
704 int remotes = 0;
705 int hubs = 0;
706 struct bau_desc *bau_desc;
707 struct cpumask *flush_mask;
708 struct ptc_stats *stat;
709 struct bau_control *bcp;
710 struct bau_control *tbcp;
711
712 /* kernel was booted 'nobau' */
713 if (nobau)
714 return cpumask;
715
716 bcp = &per_cpu(bau_control, cpu);
717 stat = bcp->statp;
718
719 /* bau was disabled due to slow response */
720 if (bcp->baudisabled) {
721 /* the cpu that disabled it must re-enable it */
722 if (bcp->set_bau_off) {
723 if (get_cycles() >= bcp->set_bau_on_time) {
724 stat->s_bau_reenabled++;
725 baudisabled = 0;
726 for_each_present_cpu(tcpu) {
727 tbcp = &per_cpu(bau_control, tcpu);
728 tbcp->baudisabled = 0;
729 tbcp->period_requests = 0;
730 tbcp->period_time = 0;
731 }
732 }
733 }
734 return cpumask;
735 }
736
737 /*
738 * Each sending cpu has a per-cpu mask which it fills from the caller's
739 * cpu mask. All cpus are converted to uvhubs and copied to the
740 * activation descriptor.
741 */
742 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
743 /* don't actually do a shootdown of the local cpu */
744 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
745 if (cpu_isset(cpu, *cpumask))
746 stat->s_ntargself++;
747
748 bau_desc = bcp->descriptor_base;
749 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
750 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
751
752 /* cpu statistics */
753 for_each_cpu(tcpu, flush_mask) {
754 uvhub = uv_cpu_to_blade_id(tcpu);
755 bau_uvhub_set(uvhub, &bau_desc->distribution);
756 if (uvhub == bcp->uvhub)
757 locals++;
758 else
759 remotes++;
760 }
761 if ((locals + remotes) == 0)
762 return NULL;
763 stat->s_requestor++;
764 stat->s_ntargcpu += remotes + locals;
765 stat->s_ntargremotes += remotes;
766 stat->s_ntarglocals += locals;
767 remotes = bau_uvhub_weight(&bau_desc->distribution);
768
769 /* uvhub statistics */
770 hubs = bau_uvhub_weight(&bau_desc->distribution);
771 if (locals) {
772 stat->s_ntarglocaluvhub++;
773 stat->s_ntargremoteuvhub += (hubs - 1);
774 } else
775 stat->s_ntargremoteuvhub += hubs;
776 stat->s_ntarguvhub += hubs;
777 if (hubs >= 16)
778 stat->s_ntarguvhub16++;
779 else if (hubs >= 8)
780 stat->s_ntarguvhub8++;
781 else if (hubs >= 4)
782 stat->s_ntarguvhub4++;
783 else if (hubs >= 2)
784 stat->s_ntarguvhub2++;
785 else
786 stat->s_ntarguvhub1++;
787
788 bau_desc->payload.address = va;
789 bau_desc->payload.sending_cpu = cpu;
790
791 /*
792 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
793 * or 1 if it gave up and the original cpumask should be returned.
794 */
795 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
796 return NULL;
797 else
798 return cpumask;
799}
800
801/*
802 * The BAU message interrupt comes here. (registered by set_intr_gate)
803 * See entry_64.S
804 *
805 * We received a broadcast assist message.
806 *
807 * Interrupts are disabled; this interrupt could represent
808 * the receipt of several messages.
809 *
810 * All cores/threads on this hub get this interrupt.
811 * The last one to see it does the software ack.
812 * (the resource will not be freed until noninterruptable cpus see this
813 * interrupt; hardware may timeout the s/w ack and reply ERROR)
814 */
815void uv_bau_message_interrupt(struct pt_regs *regs)
816{
817 int count = 0;
818 cycles_t time_start;
819 struct bau_payload_queue_entry *msg;
820 struct bau_control *bcp;
821 struct ptc_stats *stat;
822 struct msg_desc msgdesc;
823
824 time_start = get_cycles();
825 bcp = &per_cpu(bau_control, smp_processor_id());
826 stat = bcp->statp;
827 msgdesc.va_queue_first = bcp->va_queue_first;
828 msgdesc.va_queue_last = bcp->va_queue_last;
829 msg = bcp->bau_msg_head;
830 while (msg->sw_ack_vector) {
831 count++;
832 msgdesc.msg_slot = msg - msgdesc.va_queue_first;
833 msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1;
834 msgdesc.msg = msg;
835 uv_bau_process_message(&msgdesc, bcp);
836 msg++;
837 if (msg > msgdesc.va_queue_last)
838 msg = msgdesc.va_queue_first;
839 bcp->bau_msg_head = msg;
840 }
841 stat->d_time += (get_cycles() - time_start);
842 if (!count)
843 stat->d_nomsg++;
844 else if (count > 1)
845 stat->d_multmsg++;
846 ack_APIC_irq();
847}
848
849/*
850 * uv_enable_timeouts
851 *
852 * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
853 * shootdown message timeouts enabled. The timeout does not cause
854 * an interrupt, but causes an error message to be returned to
855 * the sender.
856 */
857static void uv_enable_timeouts(void)
858{
859 int uvhub;
860 int nuvhubs;
861 int pnode;
862 unsigned long mmr_image;
863
864 nuvhubs = uv_num_possible_blades();
865
866 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
867 if (!uv_blade_nr_possible_cpus(uvhub))
868 continue;
869
870 pnode = uv_blade_to_pnode(uvhub);
871 mmr_image =
872 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
873 /*
874 * Set the timeout period and then lock it in, in three
875 * steps; captures and locks in the period.
876 *
877 * To program the period, the SOFT_ACK_MODE must be off.
878 */
879 mmr_image &= ~((unsigned long)1 <<
880 UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
881 uv_write_global_mmr64
882 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
883 /*
884 * Set the 4-bit period.
885 */
886 mmr_image &= ~((unsigned long)0xf <<
887 UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
888 mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
889 UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
890 uv_write_global_mmr64
891 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
892 /*
893 * Subsequent reversals of the timebase bit (3) cause an
894 * immediate timeout of one or all INTD resources as
895 * indicated in bits 2:0 (7 causes all of them to timeout).
896 */
897 mmr_image |= ((unsigned long)1 <<
898 UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
899 uv_write_global_mmr64
900 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
901 }
902}
903
904static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
905{
906 if (*offset < num_possible_cpus())
907 return offset;
908 return NULL;
909}
910
911static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
912{
913 (*offset)++;
914 if (*offset < num_possible_cpus())
915 return offset;
916 return NULL;
917}
918
919static void uv_ptc_seq_stop(struct seq_file *file, void *data)
920{
921}
922
923static inline unsigned long long
924microsec_2_cycles(unsigned long microsec)
925{
926 unsigned long ns;
927 unsigned long long cyc;
928
929 ns = microsec * 1000;
930 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
931 return cyc;
932}
933
934/*
935 * Display the statistics thru /proc.
936 * 'data' points to the cpu number
937 */
938static int uv_ptc_seq_show(struct seq_file *file, void *data)
939{
940 struct ptc_stats *stat;
941 int cpu;
942
943 cpu = *(loff_t *)data;
944
945 if (!cpu) {
946 seq_printf(file,
947 "# cpu sent stime self locals remotes ncpus localhub ");
948 seq_printf(file,
949 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
950 seq_printf(file,
951 "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
952 seq_printf(file,
953 "retries rok resetp resett giveup sto bz throt ");
954 seq_printf(file,
955 "sw_ack recv rtime all ");
956 seq_printf(file,
957 "one mult none retry canc nocan reset rcan ");
958 seq_printf(file,
959 "disable enable\n");
960 }
961 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
962 stat = &per_cpu(ptcstats, cpu);
963 /* source side statistics */
964 seq_printf(file,
965 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
966 cpu, stat->s_requestor, cycles_2_us(stat->s_time),
967 stat->s_ntargself, stat->s_ntarglocals,
968 stat->s_ntargremotes, stat->s_ntargcpu,
969 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
970 stat->s_ntarguvhub, stat->s_ntarguvhub16);
971 seq_printf(file, "%ld %ld %ld %ld %ld ",
972 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
973 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
974 stat->s_dtimeout);
975 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
976 stat->s_retry_messages, stat->s_retriesok,
977 stat->s_resets_plug, stat->s_resets_timeout,
978 stat->s_giveup, stat->s_stimeout,
979 stat->s_busy, stat->s_throttles);
980
981 /* destination side statistics */
982 seq_printf(file,
983 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
984 uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
985 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
986 stat->d_requestee, cycles_2_us(stat->d_time),
987 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
988 stat->d_nomsg, stat->d_retries, stat->d_canceled,
989 stat->d_nocanceled, stat->d_resets,
990 stat->d_rcanceled);
991 seq_printf(file, "%ld %ld\n",
992 stat->s_bau_disabled, stat->s_bau_reenabled);
993 }
994
995 return 0;
996}
997
998/*
999 * Display the tunables thru debugfs
1000 */
1001static ssize_t tunables_read(struct file *file, char __user *userbuf,
1002 size_t count, loff_t *ppos)
1003{
1004 char *buf;
1005 int ret;
1006
1007 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
1008 "max_bau_concurrent plugged_delay plugsb4reset",
1009 "timeoutsb4reset ipi_reset_limit complete_threshold",
1010 "congested_response_us congested_reps congested_period",
1011 max_bau_concurrent, plugged_delay, plugsb4reset,
1012 timeoutsb4reset, ipi_reset_limit, complete_threshold,
1013 congested_response_us, congested_reps, congested_period);
1014
1015 if (!buf)
1016 return -ENOMEM;
1017
1018 ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
1019 kfree(buf);
1020 return ret;
1021}
1022
1023/*
1024 * -1: resetf the statistics
1025 * 0: display meaning of the statistics
1026 */
1027static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
1028 size_t count, loff_t *data)
1029{
1030 int cpu;
1031 long input_arg;
1032 char optstr[64];
1033 struct ptc_stats *stat;
1034
1035 if (count == 0 || count > sizeof(optstr))
1036 return -EINVAL;
1037 if (copy_from_user(optstr, user, count))
1038 return -EFAULT;
1039 optstr[count - 1] = '\0';
1040 if (strict_strtol(optstr, 10, &input_arg) < 0) {
1041 printk(KERN_DEBUG "%s is invalid\n", optstr);
1042 return -EINVAL;
1043 }
1044
1045 if (input_arg == 0) {
1046 printk(KERN_DEBUG "# cpu: cpu number\n");
1047 printk(KERN_DEBUG "Sender statistics:\n");
1048 printk(KERN_DEBUG
1049 "sent: number of shootdown messages sent\n");
1050 printk(KERN_DEBUG
1051 "stime: time spent sending messages\n");
1052 printk(KERN_DEBUG
1053 "numuvhubs: number of hubs targeted with shootdown\n");
1054 printk(KERN_DEBUG
1055 "numuvhubs16: number times 16 or more hubs targeted\n");
1056 printk(KERN_DEBUG
1057 "numuvhubs8: number times 8 or more hubs targeted\n");
1058 printk(KERN_DEBUG
1059 "numuvhubs4: number times 4 or more hubs targeted\n");
1060 printk(KERN_DEBUG
1061 "numuvhubs2: number times 2 or more hubs targeted\n");
1062 printk(KERN_DEBUG
1063 "numuvhubs1: number times 1 hub targeted\n");
1064 printk(KERN_DEBUG
1065 "numcpus: number of cpus targeted with shootdown\n");
1066 printk(KERN_DEBUG
1067 "dto: number of destination timeouts\n");
1068 printk(KERN_DEBUG
1069 "retries: destination timeout retries sent\n");
1070 printk(KERN_DEBUG
1071 "rok: : destination timeouts successfully retried\n");
1072 printk(KERN_DEBUG
1073 "resetp: ipi-style resource resets for plugs\n");
1074 printk(KERN_DEBUG
1075 "resett: ipi-style resource resets for timeouts\n");
1076 printk(KERN_DEBUG
1077 "giveup: fall-backs to ipi-style shootdowns\n");
1078 printk(KERN_DEBUG
1079 "sto: number of source timeouts\n");
1080 printk(KERN_DEBUG
1081 "bz: number of stay-busy's\n");
1082 printk(KERN_DEBUG
1083 "throt: number times spun in throttle\n");
1084 printk(KERN_DEBUG "Destination side statistics:\n");
1085 printk(KERN_DEBUG
1086 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
1087 printk(KERN_DEBUG
1088 "recv: shootdown messages received\n");
1089 printk(KERN_DEBUG
1090 "rtime: time spent processing messages\n");
1091 printk(KERN_DEBUG
1092 "all: shootdown all-tlb messages\n");
1093 printk(KERN_DEBUG
1094 "one: shootdown one-tlb messages\n");
1095 printk(KERN_DEBUG
1096 "mult: interrupts that found multiple messages\n");
1097 printk(KERN_DEBUG
1098 "none: interrupts that found no messages\n");
1099 printk(KERN_DEBUG
1100 "retry: number of retry messages processed\n");
1101 printk(KERN_DEBUG
1102 "canc: number messages canceled by retries\n");
1103 printk(KERN_DEBUG
1104 "nocan: number retries that found nothing to cancel\n");
1105 printk(KERN_DEBUG
1106 "reset: number of ipi-style reset requests processed\n");
1107 printk(KERN_DEBUG
1108 "rcan: number messages canceled by reset requests\n");
1109 printk(KERN_DEBUG
1110 "disable: number times use of the BAU was disabled\n");
1111 printk(KERN_DEBUG
1112 "enable: number times use of the BAU was re-enabled\n");
1113 } else if (input_arg == -1) {
1114 for_each_present_cpu(cpu) {
1115 stat = &per_cpu(ptcstats, cpu);
1116 memset(stat, 0, sizeof(struct ptc_stats));
1117 }
1118 }
1119
1120 return count;
1121}
1122
1123static int local_atoi(const char *name)
1124{
1125 int val = 0;
1126
1127 for (;; name++) {
1128 switch (*name) {
1129 case '0' ... '9':
1130 val = 10*val+(*name-'0');
1131 break;
1132 default:
1133 return val;
1134 }
1135 }
1136}
1137
1138/*
1139 * set the tunables
1140 * 0 values reset them to defaults
1141 */
1142static ssize_t tunables_write(struct file *file, const char __user *user,
1143 size_t count, loff_t *data)
1144{
1145 int cpu;
1146 int cnt = 0;
1147 int val;
1148 char *p;
1149 char *q;
1150 char instr[64];
1151 struct bau_control *bcp;
1152
1153 if (count == 0 || count > sizeof(instr)-1)
1154 return -EINVAL;
1155 if (copy_from_user(instr, user, count))
1156 return -EFAULT;
1157
1158 instr[count] = '\0';
1159 /* count the fields */
1160 p = instr + strspn(instr, WHITESPACE);
1161 q = p;
1162 for (; *p; p = q + strspn(q, WHITESPACE)) {
1163 q = p + strcspn(p, WHITESPACE);
1164 cnt++;
1165 if (q == p)
1166 break;
1167 }
1168 if (cnt != 9) {
1169 printk(KERN_INFO "bau tunable error: should be 9 numbers\n");
1170 return -EINVAL;
1171 }
1172
1173 p = instr + strspn(instr, WHITESPACE);
1174 q = p;
1175 for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
1176 q = p + strcspn(p, WHITESPACE);
1177 val = local_atoi(p);
1178 switch (cnt) {
1179 case 0:
1180 if (val == 0) {
1181 max_bau_concurrent = MAX_BAU_CONCURRENT;
1182 max_bau_concurrent_constant =
1183 MAX_BAU_CONCURRENT;
1184 continue;
1185 }
1186 bcp = &per_cpu(bau_control, smp_processor_id());
1187 if (val < 1 || val > bcp->cpus_in_uvhub) {
1188 printk(KERN_DEBUG
1189 "Error: BAU max concurrent %d is invalid\n",
1190 val);
1191 return -EINVAL;
1192 }
1193 max_bau_concurrent = val;
1194 max_bau_concurrent_constant = val;
1195 continue;
1196 case 1:
1197 if (val == 0)
1198 plugged_delay = PLUGGED_DELAY;
1199 else
1200 plugged_delay = val;
1201 continue;
1202 case 2:
1203 if (val == 0)
1204 plugsb4reset = PLUGSB4RESET;
1205 else
1206 plugsb4reset = val;
1207 continue;
1208 case 3:
1209 if (val == 0)
1210 timeoutsb4reset = TIMEOUTSB4RESET;
1211 else
1212 timeoutsb4reset = val;
1213 continue;
1214 case 4:
1215 if (val == 0)
1216 ipi_reset_limit = IPI_RESET_LIMIT;
1217 else
1218 ipi_reset_limit = val;
1219 continue;
1220 case 5:
1221 if (val == 0)
1222 complete_threshold = COMPLETE_THRESHOLD;
1223 else
1224 complete_threshold = val;
1225 continue;
1226 case 6:
1227 if (val == 0)
1228 congested_response_us = CONGESTED_RESPONSE_US;
1229 else
1230 congested_response_us = val;
1231 continue;
1232 case 7:
1233 if (val == 0)
1234 congested_reps = CONGESTED_REPS;
1235 else
1236 congested_reps = val;
1237 continue;
1238 case 8:
1239 if (val == 0)
1240 congested_period = CONGESTED_PERIOD;
1241 else
1242 congested_period = val;
1243 continue;
1244 }
1245 if (q == p)
1246 break;
1247 }
1248 for_each_present_cpu(cpu) {
1249 bcp = &per_cpu(bau_control, cpu);
1250 bcp->max_bau_concurrent = max_bau_concurrent;
1251 bcp->max_bau_concurrent_constant = max_bau_concurrent;
1252 bcp->plugged_delay = plugged_delay;
1253 bcp->plugsb4reset = plugsb4reset;
1254 bcp->timeoutsb4reset = timeoutsb4reset;
1255 bcp->ipi_reset_limit = ipi_reset_limit;
1256 bcp->complete_threshold = complete_threshold;
1257 bcp->congested_response_us = congested_response_us;
1258 bcp->congested_reps = congested_reps;
1259 bcp->congested_period = congested_period;
1260 }
1261 return count;
1262}
1263
1264static const struct seq_operations uv_ptc_seq_ops = {
1265 .start = uv_ptc_seq_start,
1266 .next = uv_ptc_seq_next,
1267 .stop = uv_ptc_seq_stop,
1268 .show = uv_ptc_seq_show
1269};
1270
1271static int uv_ptc_proc_open(struct inode *inode, struct file *file)
1272{
1273 return seq_open(file, &uv_ptc_seq_ops);
1274}
1275
1276static int tunables_open(struct inode *inode, struct file *file)
1277{
1278 return 0;
1279}
1280
1281static const struct file_operations proc_uv_ptc_operations = {
1282 .open = uv_ptc_proc_open,
1283 .read = seq_read,
1284 .write = uv_ptc_proc_write,
1285 .llseek = seq_lseek,
1286 .release = seq_release,
1287};
1288
1289static const struct file_operations tunables_fops = {
1290 .open = tunables_open,
1291 .read = tunables_read,
1292 .write = tunables_write,
1293 .llseek = default_llseek,
1294};
1295
1296static int __init uv_ptc_init(void)
1297{
1298 struct proc_dir_entry *proc_uv_ptc;
1299
1300 if (!is_uv_system())
1301 return 0;
1302
1303 proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
1304 &proc_uv_ptc_operations);
1305 if (!proc_uv_ptc) {
1306 printk(KERN_ERR "unable to create %s proc entry\n",
1307 UV_PTC_BASENAME);
1308 return -EINVAL;
1309 }
1310
1311 tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
1312 if (!tunables_dir) {
1313 printk(KERN_ERR "unable to create debugfs directory %s\n",
1314 UV_BAU_TUNABLES_DIR);
1315 return -EINVAL;
1316 }
1317 tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
1318 tunables_dir, NULL, &tunables_fops);
1319 if (!tunables_file) {
1320 printk(KERN_ERR "unable to create debugfs file %s\n",
1321 UV_BAU_TUNABLES_FILE);
1322 return -EINVAL;
1323 }
1324 return 0;
1325}
1326
1327/*
1328 * initialize the sending side's sending buffers
1329 */
1330static void
1331uv_activation_descriptor_init(int node, int pnode)
1332{
1333 int i;
1334 int cpu;
1335 unsigned long pa;
1336 unsigned long m;
1337 unsigned long n;
1338 struct bau_desc *bau_desc;
1339 struct bau_desc *bd2;
1340 struct bau_control *bcp;
1341
1342 /*
1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1344 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
1345 */
1346 bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
1347 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
1348 BUG_ON(!bau_desc);
1349
1350 pa = uv_gpa(bau_desc); /* need the real nasid*/
1351 n = pa >> uv_nshift;
1352 m = pa & uv_mmask;
1353
1354 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1355 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1356
1357 /*
1358 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1359 * cpu even though we only use the first one; one descriptor can
1360 * describe a broadcast to 256 uv hubs.
1361 */
1362 for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
1363 i++, bd2++) {
1364 memset(bd2, 0, sizeof(struct bau_desc));
1365 bd2->header.sw_ack_flag = 1;
1366 /*
1367 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
1368 * in the partition. The bit map will indicate uvhub numbers,
1369 * which are 0-N in a partition. Pnodes are unique system-wide.
1370 */
1371 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
1372 bd2->header.dest_subnodeid = 0x10; /* the LB */
1373 bd2->header.command = UV_NET_ENDPOINT_INTD;
1374 bd2->header.int_both = 1;
1375 /*
1376 * all others need to be set to zero:
1377 * fairness chaining multilevel count replied_to
1378 */
1379 }
1380 for_each_present_cpu(cpu) {
1381 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
1382 continue;
1383 bcp = &per_cpu(bau_control, cpu);
1384 bcp->descriptor_base = bau_desc;
1385 }
1386}
1387
1388/*
1389 * initialize the destination side's receiving buffers
1390 * entered for each uvhub in the partition
1391 * - node is first node (kernel memory notion) on the uvhub
1392 * - pnode is the uvhub's physical identifier
1393 */
1394static void
1395uv_payload_queue_init(int node, int pnode)
1396{
1397 int pn;
1398 int cpu;
1399 char *cp;
1400 unsigned long pa;
1401 struct bau_payload_queue_entry *pqp;
1402 struct bau_payload_queue_entry *pqp_malloc;
1403 struct bau_control *bcp;
1404
1405 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
1406 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
1407 GFP_KERNEL, node);
1408 BUG_ON(!pqp);
1409 pqp_malloc = pqp;
1410
1411 cp = (char *)pqp + 31;
1412 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
1413
1414 for_each_present_cpu(cpu) {
1415 if (pnode != uv_cpu_to_pnode(cpu))
1416 continue;
1417 /* for every cpu on this pnode: */
1418 bcp = &per_cpu(bau_control, cpu);
1419 bcp->va_queue_first = pqp;
1420 bcp->bau_msg_head = pqp;
1421 bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
1422 }
1423 /*
1424 * need the pnode of where the memory was really allocated
1425 */
1426 pa = uv_gpa(pqp);
1427 pn = pa >> uv_nshift;
1428 uv_write_global_mmr64(pnode,
1429 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
1430 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
1431 uv_physnodeaddr(pqp));
1432 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
1433 uv_physnodeaddr(pqp));
1434 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
1435 (unsigned long)
1436 uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
1437 /* in effect, all msg_type's are set to MSG_NOOP */
1438 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
1439}
1440
1441/*
1442 * Initialization of each UV hub's structures
1443 */
1444static void __init uv_init_uvhub(int uvhub, int vector)
1445{
1446 int node;
1447 int pnode;
1448 unsigned long apicid;
1449
1450 node = uvhub_to_first_node(uvhub);
1451 pnode = uv_blade_to_pnode(uvhub);
1452 uv_activation_descriptor_init(node, pnode);
1453 uv_payload_queue_init(node, pnode);
1454 /*
1455 * the below initialization can't be in firmware because the
1456 * messaging IRQ will be determined by the OS
1457 */
1458 apicid = uvhub_to_first_apicid(uvhub);
1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1460 ((apicid << 32) | vector));
1461}
1462
1463/*
1464 * We will set BAU_MISC_CONTROL with a timeout period.
1465 * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
1466 * So the destination timeout period has be be calculated from them.
1467 */
1468static int
1469calculate_destination_timeout(void)
1470{
1471 unsigned long mmr_image;
1472 int mult1;
1473 int mult2;
1474 int index;
1475 int base;
1476 int ret;
1477 unsigned long ts_ns;
1478
1479 mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
1480 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
1481 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1482 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1483 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1484 base = timeout_base_ns[index];
1485 ts_ns = base * mult1 * mult2;
1486 ret = ts_ns / 1000;
1487 return ret;
1488}
1489
1490/*
1491 * initialize the bau_control structure for each cpu
1492 */
1493static void __init uv_init_per_cpu(int nuvhubs)
1494{
1495 int i;
1496 int cpu;
1497 int pnode;
1498 int uvhub;
1499 int have_hmaster;
1500 short socket = 0;
1501 unsigned short socket_mask;
1502 unsigned char *uvhub_mask;
1503 struct bau_control *bcp;
1504 struct uvhub_desc *bdp;
1505 struct socket_desc *sdp;
1506 struct bau_control *hmaster = NULL;
1507 struct bau_control *smaster = NULL;
1508 struct socket_desc {
1509 short num_cpus;
1510 short cpu_number[16];
1511 };
1512 struct uvhub_desc {
1513 unsigned short socket_mask;
1514 short num_cpus;
1515 short uvhub;
1516 short pnode;
1517 struct socket_desc socket[2];
1518 };
1519 struct uvhub_desc *uvhub_descs;
1520
1521 timeout_us = calculate_destination_timeout();
1522
1523 uvhub_descs = (struct uvhub_desc *)
1524 kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
1525 memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
1526 uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
1527 for_each_present_cpu(cpu) {
1528 bcp = &per_cpu(bau_control, cpu);
1529 memset(bcp, 0, sizeof(struct bau_control));
1530 pnode = uv_cpu_hub_info(cpu)->pnode;
1531 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1532 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1533 bdp = &uvhub_descs[uvhub];
1534 bdp->num_cpus++;
1535 bdp->uvhub = uvhub;
1536 bdp->pnode = pnode;
1537 /* kludge: 'assuming' one node per socket, and assuming that
1538 disabling a socket just leaves a gap in node numbers */
1539 socket = (cpu_to_node(cpu) & 1);
1540 bdp->socket_mask |= (1 << socket);
1541 sdp = &bdp->socket[socket];
1542 sdp->cpu_number[sdp->num_cpus] = cpu;
1543 sdp->num_cpus++;
1544 }
1545 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1546 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
1547 continue;
1548 have_hmaster = 0;
1549 bdp = &uvhub_descs[uvhub];
1550 socket_mask = bdp->socket_mask;
1551 socket = 0;
1552 while (socket_mask) {
1553 if (!(socket_mask & 1))
1554 goto nextsocket;
1555 sdp = &bdp->socket[socket];
1556 for (i = 0; i < sdp->num_cpus; i++) {
1557 cpu = sdp->cpu_number[i];
1558 bcp = &per_cpu(bau_control, cpu);
1559 bcp->cpu = cpu;
1560 if (i == 0) {
1561 smaster = bcp;
1562 if (!have_hmaster) {
1563 have_hmaster++;
1564 hmaster = bcp;
1565 }
1566 }
1567 bcp->cpus_in_uvhub = bdp->num_cpus;
1568 bcp->cpus_in_socket = sdp->num_cpus;
1569 bcp->socket_master = smaster;
1570 bcp->uvhub = bdp->uvhub;
1571 bcp->uvhub_master = hmaster;
1572 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
1573 blade_processor_id;
1574 }
1575nextsocket:
1576 socket++;
1577 socket_mask = (socket_mask >> 1);
1578 }
1579 }
1580 kfree(uvhub_descs);
1581 kfree(uvhub_mask);
1582 for_each_present_cpu(cpu) {
1583 bcp = &per_cpu(bau_control, cpu);
1584 bcp->baudisabled = 0;
1585 bcp->statp = &per_cpu(ptcstats, cpu);
1586 /* time interval to catch a hardware stay-busy bug */
1587 bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
1588 bcp->max_bau_concurrent = max_bau_concurrent;
1589 bcp->max_bau_concurrent_constant = max_bau_concurrent;
1590 bcp->plugged_delay = plugged_delay;
1591 bcp->plugsb4reset = plugsb4reset;
1592 bcp->timeoutsb4reset = timeoutsb4reset;
1593 bcp->ipi_reset_limit = ipi_reset_limit;
1594 bcp->complete_threshold = complete_threshold;
1595 bcp->congested_response_us = congested_response_us;
1596 bcp->congested_reps = congested_reps;
1597 bcp->congested_period = congested_period;
1598 }
1599}
1600
1601/*
1602 * Initialization of BAU-related structures
1603 */
1604static int __init uv_bau_init(void)
1605{
1606 int uvhub;
1607 int pnode;
1608 int nuvhubs;
1609 int cur_cpu;
1610 int vector;
1611 unsigned long mmr;
1612
1613 if (!is_uv_system())
1614 return 0;
1615
1616 if (nobau)
1617 return 0;
1618
1619 for_each_possible_cpu(cur_cpu)
1620 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
1621 GFP_KERNEL, cpu_to_node(cur_cpu));
1622
1623 uv_nshift = uv_hub_info->m_val;
1624 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
1625 nuvhubs = uv_num_possible_blades();
1626 spin_lock_init(&disable_lock);
1627 congested_cycles = microsec_2_cycles(congested_response_us);
1628
1629 uv_init_per_cpu(nuvhubs);
1630
1631 uv_partition_base_pnode = 0x7fffffff;
1632 for (uvhub = 0; uvhub < nuvhubs; uvhub++)
1633 if (uv_blade_nr_possible_cpus(uvhub) &&
1634 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1635 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1636
1637 vector = UV_BAU_MESSAGE;
1638 for_each_possible_blade(uvhub)
1639 if (uv_blade_nr_possible_cpus(uvhub))
1640 uv_init_uvhub(uvhub, vector);
1641
1642 uv_enable_timeouts();
1643 alloc_intr_gate(vector, uv_bau_message_intr1);
1644
1645 for_each_possible_blade(uvhub) {
1646 if (uv_blade_nr_possible_cpus(uvhub)) {
1647 pnode = uv_blade_to_pnode(uvhub);
1648 /* INIT the bau */
1649 uv_write_global_mmr64(pnode,
1650 UVH_LB_BAU_SB_ACTIVATION_CONTROL,
1651 ((unsigned long)1 << 63));
1652 mmr = 1; /* should be 1 to broadcast to both sockets */
1653 uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST,
1654 mmr);
1655 }
1656 }
1657
1658 return 0;
1659}
1660core_initcall(uv_bau_init);
1661fs_initcall(uv_ptc_init);
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
deleted file mode 100644
index 7b24460917d5..000000000000
--- a/arch/x86/kernel/uv_irq.c
+++ /dev/null
@@ -1,285 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ functions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/module.h>
12#include <linux/rbtree.h>
13#include <linux/slab.h>
14#include <linux/irq.h>
15
16#include <asm/apic.h>
17#include <asm/uv/uv_irq.h>
18#include <asm/uv/uv_hub.h>
19
20/* MMR offset and pnode of hub sourcing interrupts for a given irq */
21struct uv_irq_2_mmr_pnode{
22 struct rb_node list;
23 unsigned long offset;
24 int pnode;
25 int irq;
26};
27
28static spinlock_t uv_irq_lock;
29static struct rb_root uv_irq_root;
30
31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
32
33static void uv_noop(struct irq_data *data) { }
34
35static void uv_ack_apic(struct irq_data *data)
36{
37 ack_APIC_irq();
38}
39
40static struct irq_chip uv_irq_chip = {
41 .name = "UV-CORE",
42 .irq_mask = uv_noop,
43 .irq_unmask = uv_noop,
44 .irq_eoi = uv_ack_apic,
45 .irq_set_affinity = uv_set_irq_affinity,
46};
47
48/*
49 * Add offset and pnode information of the hub sourcing interrupts to the
50 * rb tree for a specific irq.
51 */
52static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
53{
54 struct rb_node **link = &uv_irq_root.rb_node;
55 struct rb_node *parent = NULL;
56 struct uv_irq_2_mmr_pnode *n;
57 struct uv_irq_2_mmr_pnode *e;
58 unsigned long irqflags;
59
60 n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
61 uv_blade_to_memory_nid(blade));
62 if (!n)
63 return -ENOMEM;
64
65 n->irq = irq;
66 n->offset = offset;
67 n->pnode = uv_blade_to_pnode(blade);
68 spin_lock_irqsave(&uv_irq_lock, irqflags);
69 /* Find the right place in the rbtree: */
70 while (*link) {
71 parent = *link;
72 e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
73
74 if (unlikely(irq == e->irq)) {
75 /* irq entry exists */
76 e->pnode = uv_blade_to_pnode(blade);
77 e->offset = offset;
78 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
79 kfree(n);
80 return 0;
81 }
82
83 if (irq < e->irq)
84 link = &(*link)->rb_left;
85 else
86 link = &(*link)->rb_right;
87 }
88
89 /* Insert the node into the rbtree. */
90 rb_link_node(&n->list, parent, link);
91 rb_insert_color(&n->list, &uv_irq_root);
92
93 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
94 return 0;
95}
96
97/* Retrieve offset and pnode information from the rb tree for a specific irq */
98int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
99{
100 struct uv_irq_2_mmr_pnode *e;
101 struct rb_node *n;
102 unsigned long irqflags;
103
104 spin_lock_irqsave(&uv_irq_lock, irqflags);
105 n = uv_irq_root.rb_node;
106 while (n) {
107 e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
108
109 if (e->irq == irq) {
110 *offset = e->offset;
111 *pnode = e->pnode;
112 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
113 return 0;
114 }
115
116 if (irq < e->irq)
117 n = n->rb_left;
118 else
119 n = n->rb_right;
120 }
121 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
122 return -1;
123}
124
125/*
126 * Re-target the irq to the specified CPU and enable the specified MMR located
127 * on the specified blade to allow the sending of MSIs to the specified CPU.
128 */
129static int
130arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
131 unsigned long mmr_offset, int limit)
132{
133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
134 struct irq_cfg *cfg = get_irq_chip_data(irq);
135 unsigned long mmr_value;
136 struct uv_IO_APIC_route_entry *entry;
137 int mmr_pnode, err;
138
139 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
140 sizeof(unsigned long));
141
142 err = assign_irq_vector(irq, cfg, eligible_cpu);
143 if (err != 0)
144 return err;
145
146 if (limit == UV_AFFINITY_CPU)
147 irq_set_status_flags(irq, IRQ_NO_BALANCING);
148 else
149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
150
151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
152 irq_name);
153
154 mmr_value = 0;
155 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
156 entry->vector = cfg->vector;
157 entry->delivery_mode = apic->irq_delivery_mode;
158 entry->dest_mode = apic->irq_dest_mode;
159 entry->polarity = 0;
160 entry->trigger = 0;
161 entry->mask = 0;
162 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
163
164 mmr_pnode = uv_blade_to_pnode(mmr_blade);
165 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
166
167 if (cfg->move_in_progress)
168 send_cleanup_vector(cfg);
169
170 return irq;
171}
172
173/*
174 * Disable the specified MMR located on the specified blade so that MSIs are
175 * longer allowed to be sent.
176 */
177static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
178{
179 unsigned long mmr_value;
180 struct uv_IO_APIC_route_entry *entry;
181
182 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
183 sizeof(unsigned long));
184
185 mmr_value = 0;
186 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
187 entry->mask = 1;
188
189 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
190}
191
192static int
193uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
194 bool force)
195{
196 struct irq_cfg *cfg = data->chip_data;
197 unsigned int dest;
198 unsigned long mmr_value, mmr_offset;
199 struct uv_IO_APIC_route_entry *entry;
200 int mmr_pnode;
201
202 if (__ioapic_set_affinity(data, mask, &dest))
203 return -1;
204
205 mmr_value = 0;
206 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
207
208 entry->vector = cfg->vector;
209 entry->delivery_mode = apic->irq_delivery_mode;
210 entry->dest_mode = apic->irq_dest_mode;
211 entry->polarity = 0;
212 entry->trigger = 0;
213 entry->mask = 0;
214 entry->dest = dest;
215
216 /* Get previously stored MMR and pnode of hub sourcing interrupts */
217 if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
218 return -1;
219
220 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
221
222 if (cfg->move_in_progress)
223 send_cleanup_vector(cfg);
224
225 return 0;
226}
227
228/*
229 * Set up a mapping of an available irq and vector, and enable the specified
230 * MMR that defines the MSI that is to be sent to the specified CPU when an
231 * interrupt is raised.
232 */
233int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
234 unsigned long mmr_offset, int limit)
235{
236 int irq, ret;
237
238 irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
239
240 if (irq <= 0)
241 return -EBUSY;
242
243 ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
244 limit);
245 if (ret == irq)
246 uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
247 else
248 destroy_irq(irq);
249
250 return ret;
251}
252EXPORT_SYMBOL_GPL(uv_setup_irq);
253
254/*
255 * Tear down a mapping of an irq and vector, and disable the specified MMR that
256 * defined the MSI that was to be sent to the specified CPU when an interrupt
257 * was raised.
258 *
259 * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
260 */
261void uv_teardown_irq(unsigned int irq)
262{
263 struct uv_irq_2_mmr_pnode *e;
264 struct rb_node *n;
265 unsigned long irqflags;
266
267 spin_lock_irqsave(&uv_irq_lock, irqflags);
268 n = uv_irq_root.rb_node;
269 while (n) {
270 e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
271 if (e->irq == irq) {
272 arch_disable_uv_irq(e->pnode, e->offset);
273 rb_erase(n, &uv_irq_root);
274 kfree(e);
275 break;
276 }
277 if (irq < e->irq)
278 n = n->rb_left;
279 else
280 n = n->rb_right;
281 }
282 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
283 destroy_irq(irq);
284}
285EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
deleted file mode 100644
index 309c70fb7759..000000000000
--- a/arch/x86/kernel/uv_sysfs.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/*
2 * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
20 */
21
22#include <linux/sysdev.h>
23#include <asm/uv/bios.h>
24#include <asm/uv/uv.h>
25
26struct kobject *sgi_uv_kobj;
27
28static ssize_t partition_id_show(struct kobject *kobj,
29 struct kobj_attribute *attr, char *buf)
30{
31 return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
32}
33
34static ssize_t coherence_id_show(struct kobject *kobj,
35 struct kobj_attribute *attr, char *buf)
36{
37 return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
38}
39
40static struct kobj_attribute partition_id_attr =
41 __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
42
43static struct kobj_attribute coherence_id_attr =
44 __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
45
46
47static int __init sgi_uv_sysfs_init(void)
48{
49 unsigned long ret;
50
51 if (!is_uv_system())
52 return -ENODEV;
53
54 if (!sgi_uv_kobj)
55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
56 if (!sgi_uv_kobj) {
57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
58 return -EINVAL;
59 }
60
61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
62 if (ret) {
63 printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
64 return ret;
65 }
66
67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
68 if (ret) {
69 printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
70 return ret;
71 }
72
73 return 0;
74}
75
76device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
deleted file mode 100644
index 56e421bc379b..000000000000
--- a/arch/x86/kernel/uv_time.c
+++ /dev/null
@@ -1,423 +0,0 @@
1/*
2 * SGI RTC clock/timer routines.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Dimitri Sivanich
20 */
21#include <linux/clockchips.h>
22#include <linux/slab.h>
23
24#include <asm/uv/uv_mmrs.h>
25#include <asm/uv/uv_hub.h>
26#include <asm/uv/bios.h>
27#include <asm/uv/uv.h>
28#include <asm/apic.h>
29#include <asm/cpu.h>
30
31#define RTC_NAME "sgi_rtc"
32
33static cycle_t uv_read_rtc(struct clocksource *cs);
34static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
35static void uv_rtc_timer_setup(enum clock_event_mode,
36 struct clock_event_device *);
37
38static struct clocksource clocksource_uv = {
39 .name = RTC_NAME,
40 .rating = 400,
41 .read = uv_read_rtc,
42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
43 .shift = 10,
44 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
45};
46
47static struct clock_event_device clock_event_device_uv = {
48 .name = RTC_NAME,
49 .features = CLOCK_EVT_FEAT_ONESHOT,
50 .shift = 20,
51 .rating = 400,
52 .irq = -1,
53 .set_next_event = uv_rtc_next_event,
54 .set_mode = uv_rtc_timer_setup,
55 .event_handler = NULL,
56};
57
58static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
59
60/* There is one of these allocated per node */
61struct uv_rtc_timer_head {
62 spinlock_t lock;
63 /* next cpu waiting for timer, local node relative: */
64 int next_cpu;
65 /* number of cpus on this node: */
66 int ncpus;
67 struct {
68 int lcpu; /* systemwide logical cpu number */
69 u64 expires; /* next timer expiration for this cpu */
70 } cpu[1];
71};
72
73/*
74 * Access to uv_rtc_timer_head via blade id.
75 */
76static struct uv_rtc_timer_head **blade_info __read_mostly;
77
78static int uv_rtc_evt_enable;
79
80/*
81 * Hardware interface routines
82 */
83
84/* Send IPIs to another node */
85static void uv_rtc_send_IPI(int cpu)
86{
87 unsigned long apicid, val;
88 int pnode;
89
90 apicid = cpu_physical_id(cpu);
91 pnode = uv_apicid_to_pnode(apicid);
92 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
93 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
94 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
95
96 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
97}
98
99/* Check for an RTC interrupt pending */
100static int uv_intr_pending(int pnode)
101{
102 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
103 UVH_EVENT_OCCURRED0_RTC1_MASK;
104}
105
106/* Setup interrupt and return non-zero if early expiration occurred. */
107static int uv_setup_intr(int cpu, u64 expires)
108{
109 u64 val;
110 int pnode = uv_cpu_to_pnode(cpu);
111
112 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
113 UVH_RTC1_INT_CONFIG_M_MASK);
114 uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
115
116 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
117 UVH_EVENT_OCCURRED0_RTC1_MASK);
118
119 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
120 ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
121
122 /* Set configuration */
123 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
124 /* Initialize comparator value */
125 uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
126
127 if (uv_read_rtc(NULL) <= expires)
128 return 0;
129
130 return !uv_intr_pending(pnode);
131}
132
133/*
134 * Per-cpu timer tracking routines
135 */
136
137static __init void uv_rtc_deallocate_timers(void)
138{
139 int bid;
140
141 for_each_possible_blade(bid) {
142 kfree(blade_info[bid]);
143 }
144 kfree(blade_info);
145}
146
147/* Allocate per-node list of cpu timer expiration times. */
148static __init int uv_rtc_allocate_timers(void)
149{
150 int cpu;
151
152 blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
153 if (!blade_info)
154 return -ENOMEM;
155 memset(blade_info, 0, uv_possible_blades * sizeof(void *));
156
157 for_each_present_cpu(cpu) {
158 int nid = cpu_to_node(cpu);
159 int bid = uv_cpu_to_blade_id(cpu);
160 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
161 struct uv_rtc_timer_head *head = blade_info[bid];
162
163 if (!head) {
164 head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
165 (uv_blade_nr_possible_cpus(bid) *
166 2 * sizeof(u64)),
167 GFP_KERNEL, nid);
168 if (!head) {
169 uv_rtc_deallocate_timers();
170 return -ENOMEM;
171 }
172 spin_lock_init(&head->lock);
173 head->ncpus = uv_blade_nr_possible_cpus(bid);
174 head->next_cpu = -1;
175 blade_info[bid] = head;
176 }
177
178 head->cpu[bcpu].lcpu = cpu;
179 head->cpu[bcpu].expires = ULLONG_MAX;
180 }
181
182 return 0;
183}
184
185/* Find and set the next expiring timer. */
186static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
187{
188 u64 lowest = ULLONG_MAX;
189 int c, bcpu = -1;
190
191 head->next_cpu = -1;
192 for (c = 0; c < head->ncpus; c++) {
193 u64 exp = head->cpu[c].expires;
194 if (exp < lowest) {
195 bcpu = c;
196 lowest = exp;
197 }
198 }
199 if (bcpu >= 0) {
200 head->next_cpu = bcpu;
201 c = head->cpu[bcpu].lcpu;
202 if (uv_setup_intr(c, lowest))
203 /* If we didn't set it up in time, trigger */
204 uv_rtc_send_IPI(c);
205 } else {
206 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
207 UVH_RTC1_INT_CONFIG_M_MASK);
208 }
209}
210
211/*
212 * Set expiration time for current cpu.
213 *
214 * Returns 1 if we missed the expiration time.
215 */
216static int uv_rtc_set_timer(int cpu, u64 expires)
217{
218 int pnode = uv_cpu_to_pnode(cpu);
219 int bid = uv_cpu_to_blade_id(cpu);
220 struct uv_rtc_timer_head *head = blade_info[bid];
221 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
222 u64 *t = &head->cpu[bcpu].expires;
223 unsigned long flags;
224 int next_cpu;
225
226 spin_lock_irqsave(&head->lock, flags);
227
228 next_cpu = head->next_cpu;
229 *t = expires;
230
231 /* Will this one be next to go off? */
232 if (next_cpu < 0 || bcpu == next_cpu ||
233 expires < head->cpu[next_cpu].expires) {
234 head->next_cpu = bcpu;
235 if (uv_setup_intr(cpu, expires)) {
236 *t = ULLONG_MAX;
237 uv_rtc_find_next_timer(head, pnode);
238 spin_unlock_irqrestore(&head->lock, flags);
239 return -ETIME;
240 }
241 }
242
243 spin_unlock_irqrestore(&head->lock, flags);
244 return 0;
245}
246
247/*
248 * Unset expiration time for current cpu.
249 *
250 * Returns 1 if this timer was pending.
251 */
252static int uv_rtc_unset_timer(int cpu, int force)
253{
254 int pnode = uv_cpu_to_pnode(cpu);
255 int bid = uv_cpu_to_blade_id(cpu);
256 struct uv_rtc_timer_head *head = blade_info[bid];
257 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
258 u64 *t = &head->cpu[bcpu].expires;
259 unsigned long flags;
260 int rc = 0;
261
262 spin_lock_irqsave(&head->lock, flags);
263
264 if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
265 rc = 1;
266
267 if (rc) {
268 *t = ULLONG_MAX;
269 /* Was the hardware setup for this timer? */
270 if (head->next_cpu == bcpu)
271 uv_rtc_find_next_timer(head, pnode);
272 }
273
274 spin_unlock_irqrestore(&head->lock, flags);
275
276 return rc;
277}
278
279
280/*
281 * Kernel interface routines.
282 */
283
284/*
285 * Read the RTC.
286 *
287 * Starting with HUB rev 2.0, the UV RTC register is replicated across all
288 * cachelines of it's own page. This allows faster simultaneous reads
289 * from a given socket.
290 */
291static cycle_t uv_read_rtc(struct clocksource *cs)
292{
293 unsigned long offset;
294
295 if (uv_get_min_hub_revision_id() == 1)
296 offset = 0;
297 else
298 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
299
300 return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
301}
302
303/*
304 * Program the next event, relative to now
305 */
306static int uv_rtc_next_event(unsigned long delta,
307 struct clock_event_device *ced)
308{
309 int ced_cpu = cpumask_first(ced->cpumask);
310
311 return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
312}
313
314/*
315 * Setup the RTC timer in oneshot mode
316 */
317static void uv_rtc_timer_setup(enum clock_event_mode mode,
318 struct clock_event_device *evt)
319{
320 int ced_cpu = cpumask_first(evt->cpumask);
321
322 switch (mode) {
323 case CLOCK_EVT_MODE_PERIODIC:
324 case CLOCK_EVT_MODE_ONESHOT:
325 case CLOCK_EVT_MODE_RESUME:
326 /* Nothing to do here yet */
327 break;
328 case CLOCK_EVT_MODE_UNUSED:
329 case CLOCK_EVT_MODE_SHUTDOWN:
330 uv_rtc_unset_timer(ced_cpu, 1);
331 break;
332 }
333}
334
335static void uv_rtc_interrupt(void)
336{
337 int cpu = smp_processor_id();
338 struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
339
340 if (!ced || !ced->event_handler)
341 return;
342
343 if (uv_rtc_unset_timer(cpu, 0) != 1)
344 return;
345
346 ced->event_handler(ced);
347}
348
349static int __init uv_enable_evt_rtc(char *str)
350{
351 uv_rtc_evt_enable = 1;
352
353 return 1;
354}
355__setup("uvrtcevt", uv_enable_evt_rtc);
356
357static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
358{
359 struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
360
361 *ced = clock_event_device_uv;
362 ced->cpumask = cpumask_of(smp_processor_id());
363 clockevents_register_device(ced);
364}
365
366static __init int uv_rtc_setup_clock(void)
367{
368 int rc;
369
370 if (!is_uv_system())
371 return -ENODEV;
372
373 clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
374 clocksource_uv.shift);
375
376 /* If single blade, prefer tsc */
377 if (uv_num_possible_blades() == 1)
378 clocksource_uv.rating = 250;
379
380 rc = clocksource_register(&clocksource_uv);
381 if (rc)
382 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
383 else
384 printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
385 sn_rtc_cycles_per_second/(unsigned long)1E6);
386
387 if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
388 return rc;
389
390 /* Setup and register clockevents */
391 rc = uv_rtc_allocate_timers();
392 if (rc)
393 goto error;
394
395 x86_platform_ipi_callback = uv_rtc_interrupt;
396
397 clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
398 NSEC_PER_SEC, clock_event_device_uv.shift);
399
400 clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
401 sn_rtc_cycles_per_second;
402
403 clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
404 (NSEC_PER_SEC / sn_rtc_cycles_per_second);
405
406 rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
407 if (rc) {
408 x86_platform_ipi_callback = NULL;
409 uv_rtc_deallocate_timers();
410 goto error;
411 }
412
413 printk(KERN_INFO "UV RTC clockevents registered\n");
414
415 return 0;
416
417error:
418 clocksource_unregister(&clocksource_uv);
419 printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
420
421 return rc;
422}
423arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
deleted file mode 100644
index 3371bd053b89..000000000000
--- a/arch/x86/kernel/visws_quirks.c
+++ /dev/null
@@ -1,614 +0,0 @@
1/*
2 * SGI Visual Workstation support and quirks, unmaintained.
3 *
4 * Split out from setup.c by davej@suse.de
5 *
6 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
7 *
8 * SGI Visual Workstation interrupt controller
9 *
10 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
11 * which serves as the main interrupt controller in the system. Non-legacy
12 * hardware in the system uses this controller directly. Legacy devices
13 * are connected to the PIIX4 which in turn has its 8259(s) connected to
14 * a of the Cobalt APIC entry.
15 *
16 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
17 *
18 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
19 */
20#include <linux/interrupt.h>
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/smp.h>
24
25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h>
27#include <asm/io_apic.h>
28#include <asm/fixmap.h>
29#include <asm/reboot.h>
30#include <asm/setup.h>
31#include <asm/apic.h>
32#include <asm/e820.h>
33#include <asm/time.h>
34#include <asm/io.h>
35
36#include <linux/kernel_stat.h>
37
38#include <asm/i8259.h>
39#include <asm/irq_vectors.h>
40#include <asm/visws/lithium.h>
41
42#include <linux/sched.h>
43#include <linux/kernel.h>
44#include <linux/pci.h>
45#include <linux/pci_ids.h>
46
47extern int no_broadcast;
48
49char visws_board_type = -1;
50char visws_board_rev = -1;
51
52static void __init visws_time_init(void)
53{
54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
55
56 /* Set the countdown value */
57 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
58
59 /* Start the timer */
60 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
61
62 /* Enable (unmask) the timer interrupt */
63 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
64
65 setup_default_timer_irq();
66}
67
68/* Replaces the default init_ISA_irqs in the generic setup */
69static void __init visws_pre_intr_init(void);
70
71/* Quirk for machine specific memory setup. */
72
73#define MB (1024 * 1024)
74
75unsigned long sgivwfb_mem_phys;
76unsigned long sgivwfb_mem_size;
77EXPORT_SYMBOL(sgivwfb_mem_phys);
78EXPORT_SYMBOL(sgivwfb_mem_size);
79
80long long mem_size __initdata = 0;
81
82static char * __init visws_memory_setup(void)
83{
84 long long gfx_mem_size = 8 * MB;
85
86 mem_size = boot_params.alt_mem_k;
87
88 if (!mem_size) {
89 printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
90 mem_size = 128 * MB;
91 }
92
93 /*
94 * this hardcodes the graphics memory to 8 MB
95 * it really should be sized dynamically (or at least
96 * set as a boot param)
97 */
98 if (!sgivwfb_mem_size) {
99 printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
100 sgivwfb_mem_size = 8 * MB;
101 }
102
103 /*
104 * Trim to nearest MB
105 */
106 sgivwfb_mem_size &= ~((1 << 20) - 1);
107 sgivwfb_mem_phys = mem_size - gfx_mem_size;
108
109 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
110 e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
111 e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
112
113 return "PROM";
114}
115
116static void visws_machine_emergency_restart(void)
117{
118 /*
119 * Visual Workstations restart after this
120 * register is poked on the PIIX4
121 */
122 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
123}
124
125static void visws_machine_power_off(void)
126{
127 unsigned short pm_status;
128/* extern unsigned int pci_bus0; */
129
130 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
131 outw(pm_status, PMSTS_PORT);
132
133 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
134
135 mdelay(10);
136
137#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
138 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
139
140/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
141 outl(PIIX_SPECIAL_STOP, 0xCFC);
142}
143
144static void __init visws_get_smp_config(unsigned int early)
145{
146}
147
148/*
149 * The Visual Workstation is Intel MP compliant in the hardware
150 * sense, but it doesn't have a BIOS(-configuration table).
151 * No problem for Linux.
152 */
153
154static void __init MP_processor_info(struct mpc_cpu *m)
155{
156 int ver, logical_apicid;
157 physid_mask_t apic_cpus;
158
159 if (!(m->cpuflag & CPU_ENABLED))
160 return;
161
162 logical_apicid = m->apicid;
163 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
164 m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
165 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
166 (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
167
168 if (m->cpuflag & CPU_BOOTPROCESSOR)
169 boot_cpu_physical_apicid = m->apicid;
170
171 ver = m->apicver;
172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
174 m->apicid, MAX_APICS);
175 return;
176 }
177
178 apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
179 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
180 /*
181 * Validate version
182 */
183 if (ver == 0x0) {
184 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
185 "fixing up to 0x10. (tell your hw vendor)\n",
186 m->apicid);
187 ver = 0x10;
188 }
189 apic_version[m->apicid] = ver;
190}
191
192static void __init visws_find_smp_config(void)
193{
194 struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
195 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
196
197 if (ncpus > CO_CPU_MAX) {
198 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
199 ncpus, mp);
200
201 ncpus = CO_CPU_MAX;
202 }
203
204 if (ncpus > setup_max_cpus)
205 ncpus = setup_max_cpus;
206
207#ifdef CONFIG_X86_LOCAL_APIC
208 smp_found_config = 1;
209#endif
210 while (ncpus--)
211 MP_processor_info(mp++);
212
213 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
214}
215
216static void visws_trap_init(void);
217
218void __init visws_early_detect(void)
219{
220 int raw;
221
222 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
223 >> PIIX_GPI_BD_SHIFT;
224
225 if (visws_board_type < 0)
226 return;
227
228 /*
229 * Override the default platform setup functions
230 */
231 x86_init.resources.memory_setup = visws_memory_setup;
232 x86_init.mpparse.get_smp_config = visws_get_smp_config;
233 x86_init.mpparse.find_smp_config = visws_find_smp_config;
234 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
235 x86_init.irqs.trap_init = visws_trap_init;
236 x86_init.timers.timer_init = visws_time_init;
237 x86_init.pci.init = pci_visws_init;
238 x86_init.pci.init_irq = x86_init_noop;
239
240 /*
241 * Install reboot quirks:
242 */
243 pm_power_off = visws_machine_power_off;
244 machine_ops.emergency_restart = visws_machine_emergency_restart;
245
246 /*
247 * Do not use broadcast IPIs:
248 */
249 no_broadcast = 0;
250
251#ifdef CONFIG_X86_IO_APIC
252 /*
253 * Turn off IO-APIC detection and initialization:
254 */
255 skip_ioapic_setup = 1;
256#endif
257
258 /*
259 * Get Board rev.
260 * First, we have to initialize the 307 part to allow us access
261 * to the GPIO registers. Let's map them at 0x0fc0 which is right
262 * after the PIIX4 PM section.
263 */
264 outb_p(SIO_DEV_SEL, SIO_INDEX);
265 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
266
267 outb_p(SIO_DEV_MSB, SIO_INDEX);
268 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
269
270 outb_p(SIO_DEV_LSB, SIO_INDEX);
271 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
272
273 outb_p(SIO_DEV_ENB, SIO_INDEX);
274 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
275
276 /*
277 * Now, we have to map the power management section to write
278 * a bit which enables access to the GPIO registers.
279 * What lunatic came up with this shit?
280 */
281 outb_p(SIO_DEV_SEL, SIO_INDEX);
282 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
283
284 outb_p(SIO_DEV_MSB, SIO_INDEX);
285 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
286
287 outb_p(SIO_DEV_LSB, SIO_INDEX);
288 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
289
290 outb_p(SIO_DEV_ENB, SIO_INDEX);
291 outb_p(1, SIO_DATA); /* Enable PM registers. */
292
293 /*
294 * Now, write the PM register which enables the GPIO registers.
295 */
296 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
297 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
298
299 /*
300 * Now, initialize the GPIO registers.
301 * We want them all to be inputs which is the
302 * power on default, so let's leave them alone.
303 * So, let's just read the board rev!
304 */
305 raw = inb_p(SIO_GP_DATA1);
306 raw &= 0x7f; /* 7 bits of valid board revision ID. */
307
308 if (visws_board_type == VISWS_320) {
309 if (raw < 0x6) {
310 visws_board_rev = 4;
311 } else if (raw < 0xc) {
312 visws_board_rev = 5;
313 } else {
314 visws_board_rev = 6;
315 }
316 } else if (visws_board_type == VISWS_540) {
317 visws_board_rev = 2;
318 } else {
319 visws_board_rev = raw;
320 }
321
322 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
323 (visws_board_type == VISWS_320 ? "320" :
324 (visws_board_type == VISWS_540 ? "540" :
325 "unknown")), visws_board_rev);
326}
327
328#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
329#define BCD (LI_INTB | LI_INTC | LI_INTD)
330#define ALLDEVS (A01234 | BCD)
331
332static __init void lithium_init(void)
333{
334 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
335 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
336
337 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
338 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
339 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
340/* panic("This machine is not SGI Visual Workstation 320/540"); */
341 }
342
343 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
344 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
345 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
346/* panic("This machine is not SGI Visual Workstation 320/540"); */
347 }
348
349 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
350 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
351}
352
353static __init void cobalt_init(void)
354{
355 /*
356 * On normal SMP PC this is used only with SMP, but we have to
357 * use it and set it up here to start the Cobalt clock
358 */
359 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
360 setup_local_APIC();
361 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
362 (unsigned int)apic_read(APIC_LVR),
363 (unsigned int)apic_read(APIC_ID));
364
365 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
366 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
367 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
368 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
369
370 /* Enable Cobalt APIC being careful to NOT change the ID! */
371 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
372
373 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
374 co_apic_read(CO_APIC_ID));
375}
376
377static void __init visws_trap_init(void)
378{
379 lithium_init();
380 cobalt_init();
381}
382
383/*
384 * IRQ controller / APIC support:
385 */
386
387static DEFINE_SPINLOCK(cobalt_lock);
388
389/*
390 * Set the given Cobalt APIC Redirection Table entry to point
391 * to the given IDT vector/index.
392 */
393static inline void co_apic_set(int entry, int irq)
394{
395 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
396 co_apic_write(CO_APIC_HI(entry), 0);
397}
398
399/*
400 * Cobalt (IO)-APIC functions to handle PCI devices.
401 */
402static inline int co_apic_ide0_hack(void)
403{
404 extern char visws_board_type;
405 extern char visws_board_rev;
406
407 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
408 return 5;
409 return CO_APIC_IDE0;
410}
411
412static int is_co_apic(unsigned int irq)
413{
414 if (IS_CO_APIC(irq))
415 return CO_APIC(irq);
416
417 switch (irq) {
418 case 0: return CO_APIC_CPU;
419 case CO_IRQ_IDE0: return co_apic_ide0_hack();
420 case CO_IRQ_IDE1: return CO_APIC_IDE1;
421 default: return -1;
422 }
423}
424
425
426/*
427 * This is the SGI Cobalt (IO-)APIC:
428 */
429static void enable_cobalt_irq(struct irq_data *data)
430{
431 co_apic_set(is_co_apic(data->irq), data->irq);
432}
433
434static void disable_cobalt_irq(struct irq_data *data)
435{
436 int entry = is_co_apic(data->irq);
437
438 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
439 co_apic_read(CO_APIC_LO(entry));
440}
441
442static void ack_cobalt_irq(struct irq_data *data)
443{
444 unsigned long flags;
445
446 spin_lock_irqsave(&cobalt_lock, flags);
447 disable_cobalt_irq(data);
448 apic_write(APIC_EOI, APIC_EIO_ACK);
449 spin_unlock_irqrestore(&cobalt_lock, flags);
450}
451
452static struct irq_chip cobalt_irq_type = {
453 .name = "Cobalt-APIC",
454 .irq_enable = enable_cobalt_irq,
455 .irq_disable = disable_cobalt_irq,
456 .irq_ack = ack_cobalt_irq,
457};
458
459
460/*
461 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
462 * -- not the manner expected by the code in i8259.c.
463 *
464 * there is a 'master' physical interrupt source that gets sent to
465 * the CPU. But in the chipset there are various 'virtual' interrupts
466 * waiting to be handled. We represent this to Linux through a 'master'
467 * interrupt controller type, and through a special virtual interrupt-
468 * controller. Device drivers only see the virtual interrupt sources.
469 */
470static unsigned int startup_piix4_master_irq(struct irq_data *data)
471{
472 legacy_pic->init(0);
473 enable_cobalt_irq(data);
474}
475
476static void end_piix4_master_irq(struct irq_data *data)
477{
478 unsigned long flags;
479
480 spin_lock_irqsave(&cobalt_lock, flags);
481 enable_cobalt_irq(data);
482 spin_unlock_irqrestore(&cobalt_lock, flags);
483}
484
485static struct irq_chip piix4_master_irq_type = {
486 .name = "PIIX4-master",
487 .irq_startup = startup_piix4_master_irq,
488 .irq_ack = ack_cobalt_irq,
489};
490
491static void pii4_mask(struct irq_data *data) { }
492
493static struct irq_chip piix4_virtual_irq_type = {
494 .name = "PIIX4-virtual",
495 .mask = pii4_mask,
496};
497
498/*
499 * PIIX4-8259 master/virtual functions to handle interrupt requests
500 * from legacy devices: floppy, parallel, serial, rtc.
501 *
502 * None of these get Cobalt APIC entries, neither do they have IDT
503 * entries. These interrupts are purely virtual and distributed from
504 * the 'master' interrupt source: CO_IRQ_8259.
505 *
506 * When the 8259 interrupts its handler figures out which of these
507 * devices is interrupting and dispatches to its handler.
508 *
509 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
510 * enable_irq gets the right irq. This 'master' irq is never directly
511 * manipulated by any driver.
512 */
513static irqreturn_t piix4_master_intr(int irq, void *dev_id)
514{
515 unsigned long flags;
516 int realirq;
517
518 raw_spin_lock_irqsave(&i8259A_lock, flags);
519
520 /* Find out what's interrupting in the PIIX4 master 8259 */
521 outb(0x0c, 0x20); /* OCW3 Poll command */
522 realirq = inb(0x20);
523
524 /*
525 * Bit 7 == 0 means invalid/spurious
526 */
527 if (unlikely(!(realirq & 0x80)))
528 goto out_unlock;
529
530 realirq &= 7;
531
532 if (unlikely(realirq == 2)) {
533 outb(0x0c, 0xa0);
534 realirq = inb(0xa0);
535
536 if (unlikely(!(realirq & 0x80)))
537 goto out_unlock;
538
539 realirq = (realirq & 7) + 8;
540 }
541
542 /* mask and ack interrupt */
543 cached_irq_mask |= 1 << realirq;
544 if (unlikely(realirq > 7)) {
545 inb(0xa1);
546 outb(cached_slave_mask, 0xa1);
547 outb(0x60 + (realirq & 7), 0xa0);
548 outb(0x60 + 2, 0x20);
549 } else {
550 inb(0x21);
551 outb(cached_master_mask, 0x21);
552 outb(0x60 + realirq, 0x20);
553 }
554
555 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
556
557 /*
558 * handle this 'virtual interrupt' as a Cobalt one now.
559 */
560 generic_handle_irq(realirq);
561
562 return IRQ_HANDLED;
563
564out_unlock:
565 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
566 return IRQ_NONE;
567}
568
569static struct irqaction master_action = {
570 .handler = piix4_master_intr,
571 .name = "PIIX4-8259",
572};
573
574static struct irqaction cascade_action = {
575 .handler = no_action,
576 .name = "cascade",
577};
578
579static inline void set_piix4_virtual_irq_type(void)
580{
581 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
582 piix4_virtual_irq_type.disable = i8259A_chip.mask;
583 piix4_virtual_irq_type.unmask = i8259A_chip.unmask;
584}
585
586static void __init visws_pre_intr_init(void)
587{
588 int i;
589
590 set_piix4_virtual_irq_type();
591
592 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
593 struct irq_chip *chip = NULL;
594
595 if (i == 0)
596 chip = &cobalt_irq_type;
597 else if (i == CO_IRQ_IDE0)
598 chip = &cobalt_irq_type;
599 else if (i == CO_IRQ_IDE1)
600 >chip = &cobalt_irq_type;
601 else if (i == CO_IRQ_8259)
602 chip = &piix4_master_irq_type;
603 else if (i < CO_IRQ_APIC0)
604 chip = &piix4_virtual_irq_type;
605 else if (IS_CO_APIC(i))
606 chip = &cobalt_irq_type;
607
608 if (chip)
609 set_irq_chip(i, chip);
610 }
611
612 setup_irq(CO_IRQ_8259, &master_action);
613 setup_irq(2, &cascade_action);
614}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index cd6da6bf3eca..ceb2911aa439 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -6,10 +6,12 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h> 7#include <linux/ioport.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pci.h>
9 10
10#include <asm/bios_ebda.h> 11#include <asm/bios_ebda.h>
11#include <asm/paravirt.h> 12#include <asm/paravirt.h>
12#include <asm/pci_x86.h> 13#include <asm/pci_x86.h>
14#include <asm/pci.h>
13#include <asm/mpspec.h> 15#include <asm/mpspec.h>
14#include <asm/setup.h> 16#include <asm/setup.h>
15#include <asm/apic.h> 17#include <asm/apic.h>
@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
99}; 101};
100 102
101EXPORT_SYMBOL_GPL(x86_platform); 103EXPORT_SYMBOL_GPL(x86_platform);
104struct x86_msi_ops x86_msi = {
105 .setup_msi_irqs = native_setup_msi_irqs,
106 .teardown_msi_irq = native_teardown_msi_irq,
107 .teardown_msi_irqs = default_teardown_msi_irqs,
108};