aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c60
-rw-r--r--arch/x86/kernel/acpi/sleep.c6
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_nb.c135
-rw-r--r--arch/x86/kernel/aperture_64.c10
-rw-r--r--arch/x86/kernel/apic/apic.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c36
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c215
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c148
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c26
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
-rw-r--r--arch/x86/kernel/crash_dump_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/efi.c613
-rw-r--r--arch/x86/kernel/efi_32.c112
-rw-r--r--arch/x86/kernel/efi_64.c114
-rw-r--r--arch/x86/kernel/efi_stub_32.S123
-rw-r--r--arch/x86/kernel/efi_stub_64.S116
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S20
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head_32.S55
-rw-r--r--arch/x86/kernel/hpet.c53
-rw-r--r--arch/x86/kernel/irq_32.c25
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c14
-rw-r--r--arch/x86/kernel/kvmclock.c6
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/microcode_core.c3
-rw-r--r--arch/x86/kernel/microcode_intel.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c7
-rw-r--r--arch/x86/kernel/mrst.c311
-rw-r--r--arch/x86/kernel/olpc-xo1.c140
-rw-r--r--arch/x86/kernel/olpc.c281
-rw-r--r--arch/x86/kernel/olpc_ofw.c112
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/ptrace.c17
-rw-r--r--arch/x86/kernel/pvclock.c41
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c12
-rw-r--r--arch/x86/kernel/scx200_32.c131
-rw-r--r--arch/x86/kernel/setup.c28
-rw-r--r--arch/x86/kernel/sfi.c120
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/kernel/tlb_uv.c1655
-rw-r--r--arch/x86/kernel/trampoline.c16
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/uv_irq.c285
-rw-r--r--arch/x86/kernel/uv_sysfs.c76
-rw-r--r--arch/x86/kernel/uv_time.c423
-rw-r--r--arch/x86/kernel/visws_quirks.c614
-rw-r--r--arch/x86/kernel/vm86_32.c10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kernel/x86_init.c7
67 files changed, 603 insertions, 6011 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2c833d8c4141..9e13763b6092 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,6 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o 38obj-$(CONFIG_IRQ_WORK) += irq_work.o
39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
40obj-$(CONFIG_X86_32) += probe_roms_32.o 39obj-$(CONFIG_X86_32) += probe_roms_32.o
41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
42obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
@@ -58,7 +57,6 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o
58obj-$(CONFIG_STACKTRACE) += stacktrace.o 57obj-$(CONFIG_STACKTRACE) += stacktrace.o
59obj-y += cpu/ 58obj-y += cpu/
60obj-y += acpi/ 59obj-y += acpi/
61obj-$(CONFIG_SFI) += sfi.o
62obj-y += reboot.o 60obj-y += reboot.o
63obj-$(CONFIG_MCA) += mca_32.o 61obj-$(CONFIG_MCA) += mca_32.o
64obj-$(CONFIG_X86_MSR) += msr.o 62obj-$(CONFIG_X86_MSR) += msr.o
@@ -82,7 +80,6 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
82obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 80obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
83obj-$(CONFIG_KPROBES) += kprobes.o 81obj-$(CONFIG_KPROBES) += kprobes.o
84obj-$(CONFIG_MODULES) += module.o 82obj-$(CONFIG_MODULES) += module.o
85obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
86obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 83obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
87obj-$(CONFIG_KGDB) += kgdb.o 84obj-$(CONFIG_KGDB) += kgdb.o
88obj-$(CONFIG_VM86) += vm86_32.o 85obj-$(CONFIG_VM86) += vm86_32.o
@@ -104,14 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
104 101
105obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 102obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
106 103
107obj-$(CONFIG_SCx200) += scx200.o
108scx200-y += scx200_32.o
109
110obj-$(CONFIG_OLPC) += olpc.o
111obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
112obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
113obj-$(CONFIG_X86_MRST) += mrst.o
114
115microcode-y := microcode_core.o 104microcode-y := microcode_core.o
116microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 105microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
117microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 106microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
@@ -124,7 +113,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
124### 113###
125# 64 bit specific files 114# 64 bit specific files
126ifeq ($(CONFIG_X86_64),y) 115ifeq ($(CONFIG_X86_64),y)
127 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
128 obj-$(CONFIG_AUDIT) += audit_64.o 116 obj-$(CONFIG_AUDIT) += audit_64.o
129 117
130 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 118 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c05872aa3ce0..71232b941b6c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
513 return 0; 513 return 0;
514} 514}
515 515
516/* 516static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
517 * success: return IRQ number (>=0) 517 int trigger, int polarity)
518 * failure: return < 0
519 */
520int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
521{ 518{
522 unsigned int irq;
523 unsigned int plat_gsi = gsi;
524
525#ifdef CONFIG_PCI 519#ifdef CONFIG_PCI
526 /* 520 /*
527 * Make sure all (legacy) PCI IRQs are set as level-triggered. 521 * Make sure all (legacy) PCI IRQs are set as level-triggered.
528 */ 522 */
529 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 523 if (trigger == ACPI_LEVEL_SENSITIVE)
530 if (trigger == ACPI_LEVEL_SENSITIVE) 524 eisa_set_level_irq(gsi);
531 eisa_set_level_irq(gsi);
532 }
533#endif 525#endif
534 526
527 return gsi;
528}
529
530static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
531 int trigger, int polarity)
532{
535#ifdef CONFIG_X86_IO_APIC 533#ifdef CONFIG_X86_IO_APIC
536 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { 534 gsi = mp_register_gsi(dev, gsi, trigger, polarity);
537 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
538 }
539#endif 535#endif
536
537 return gsi;
538}
539
540int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
541 int trigger, int polarity) = acpi_register_gsi_pic;
542
543/*
544 * success: return IRQ number (>=0)
545 * failure: return < 0
546 */
547int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
548{
549 unsigned int irq;
550 unsigned int plat_gsi = gsi;
551
552 plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
540 irq = gsi_to_irq(plat_gsi); 553 irq = gsi_to_irq(plat_gsi);
541 554
542 return irq; 555 return irq;
543} 556}
544 557
558void __init acpi_set_irq_model_pic(void)
559{
560 acpi_irq_model = ACPI_IRQ_MODEL_PIC;
561 __acpi_register_gsi = acpi_register_gsi_pic;
562 acpi_ioapic = 0;
563}
564
565void __init acpi_set_irq_model_ioapic(void)
566{
567 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
568 __acpi_register_gsi = acpi_register_gsi_ioapic;
569 acpi_ioapic = 1;
570}
571
545/* 572/*
546 * ACPI based hotplug support for CPU 573 * ACPI based hotplug support for CPU
547 */ 574 */
@@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void)
1259 */ 1286 */
1260 error = acpi_parse_madt_ioapic_entries(); 1287 error = acpi_parse_madt_ioapic_entries();
1261 if (!error) { 1288 if (!error) {
1262 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 1289 acpi_set_irq_model_ioapic();
1263 acpi_ioapic = 1;
1264 1290
1265 smp_found_config = 1; 1291 smp_found_config = 1;
1266 } 1292 }
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e1252074ea40..69fd72aa5594 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -13,6 +13,10 @@
13#include <asm/segment.h> 13#include <asm/segment.h>
14#include <asm/desc.h> 14#include <asm/desc.h>
15 15
16#ifdef CONFIG_X86_32
17#include <asm/pgtable.h>
18#endif
19
16#include "realmode/wakeup.h" 20#include "realmode/wakeup.h"
17#include "sleep.h" 21#include "sleep.h"
18 22
@@ -91,7 +95,7 @@ int acpi_save_state_mem(void)
91 95
92#ifndef CONFIG_64BIT 96#ifndef CONFIG_64BIT
93 header->pmode_entry = (u32)&wakeup_pmode_return; 97 header->pmode_entry = (u32)&wakeup_pmode_return;
94 header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET); 98 header->pmode_cr3 = (u32)__pa(&initial_page_table);
95 saved_magic = 0x12345678; 99 saved_magic = 0x12345678;
96#else /* CONFIG_64BIT */ 100#else /* CONFIG_64BIT */
97 header->trampoline_segment = setup_trampoline() >> 4; 101 header->trampoline_segment = setup_trampoline() >> 4;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a36bb90aef53..5079f24c955a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -638,71 +638,32 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 641 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
642 return addr; 642 return addr;
643} 643}
644 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; 647#ifdef CONFIG_X86_64
648unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
649#else
650unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
651#endif
648 652
649void __init arch_init_ideal_nop5(void) 653void __init arch_init_ideal_nop5(void)
650{ 654{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /* 655 /*
657 * There is no good nop for all x86 archs. 656 * There is no good nop for all x86 archs. This selection
658 * We will default to using the P6_NOP5, but first we 657 * algorithm should be unified with the one in find_nop_table(),
659 * will test to make sure that the nop will actually 658 * but this should be good enough for now.
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 * 659 *
668 * TODO: check the cpuid to determine the best nop. 660 * For cases other than the ones below, use the safe (as in
661 * always functional) defaults above.
669 */ 662 */
670 asm volatile ( 663#ifdef CONFIG_X86_64
671 "ftrace_test_jmp:" 664 /* Don't use these on 32 bits due to broken virtualizers */
672 "jmp ftrace_test_p6nop\n" 665 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
673 "nop\n" 666 memcpy(ideal_nop5, p6_nops[5], 5);
674 "nop\n" 667#endif
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707} 668}
708#endif 669#endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..affacb5e0065 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,116 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
19 {} 19 {}
20}; 20};
21EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23struct k8_northbridge_info k8_northbridges; 23struct amd_northbridge_info amd_northbridges;
24EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(amd_northbridges);
25 25
26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_northbridge(struct pci_dev *dev,
27 struct pci_device_id *ids)
27{ 28{
28 do { 29 do {
29 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 30 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
30 if (!dev) 31 if (!dev)
31 break; 32 break;
32 } while (!pci_match_id(&k8_nb_ids[0], dev)); 33 } while (!pci_match_id(ids, dev));
33 return dev; 34 return dev;
34} 35}
35 36
36int cache_k8_northbridges(void) 37int amd_cache_northbridges(void)
37{ 38{
38 int i; 39 int i = 0;
39 struct pci_dev *dev; 40 struct amd_northbridge *nb;
41 struct pci_dev *misc;
40 42
41 if (k8_northbridges.num) 43 if (amd_nb_num())
42 return 0; 44 return 0;
43 45
44 dev = NULL; 46 misc = NULL;
45 while ((dev = next_k8_northbridge(dev)) != NULL) 47 while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
46 k8_northbridges.num++; 48 i++;
47 49
48 /* some CPU families (e.g. family 0x11) do not support GART */ 50 if (i == 0)
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 51 return 0;
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
52 52
53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * 53 nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
54 sizeof(void *), GFP_KERNEL); 54 if (!nb)
55 if (!k8_northbridges.nb_misc)
56 return -ENOMEM; 55 return -ENOMEM;
57 56
58 if (!k8_northbridges.num) { 57 amd_northbridges.nb = nb;
59 k8_northbridges.nb_misc[0] = NULL; 58 amd_northbridges.num = i;
60 return 0;
61 }
62 59
63 if (k8_northbridges.gart_supported) { 60 misc = NULL;
64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32), 61 for (i = 0; i != amd_nb_num(); i++) {
65 GFP_KERNEL); 62 node_to_amd_nb(i)->misc = misc =
66 if (!flush_words) { 63 next_northbridge(misc, amd_nb_misc_ids);
67 kfree(k8_northbridges.nb_misc); 64 }
68 return -ENOMEM; 65
69 } 66 /* some CPU families (e.g. family 0x11) do not support GART */
70 } 67 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
68 boot_cpu_data.x86 == 0x15)
69 amd_northbridges.flags |= AMD_NB_GART;
70
71 /*
72 * Some CPU families support L3 Cache Index Disable. There are some
73 * limitations because of E382 and E388 on family 0x10.
74 */
75 if (boot_cpu_data.x86 == 0x10 &&
76 boot_cpu_data.x86_model >= 0x8 &&
77 (boot_cpu_data.x86_model > 0x9 ||
78 boot_cpu_data.x86_mask >= 0x1))
79 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
71 80
72 dev = NULL;
73 i = 0;
74 while ((dev = next_k8_northbridge(dev)) != NULL) {
75 k8_northbridges.nb_misc[i] = dev;
76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
78 }
79 k8_northbridges.nb_misc[i] = NULL;
80 return 0; 81 return 0;
81} 82}
82EXPORT_SYMBOL_GPL(cache_k8_northbridges); 83EXPORT_SYMBOL_GPL(amd_cache_northbridges);
83 84
84/* Ignores subdevice/subvendor but as far as I can figure out 85/* Ignores subdevice/subvendor but as far as I can figure out
85 they're useless anyways */ 86 they're useless anyways */
86int __init early_is_k8_nb(u32 device) 87int __init early_is_amd_nb(u32 device)
87{ 88{
88 struct pci_device_id *id; 89 struct pci_device_id *id;
89 u32 vendor = device & 0xffff; 90 u32 vendor = device & 0xffff;
90 device >>= 16; 91 device >>= 16;
91 for (id = k8_nb_ids; id->vendor; id++) 92 for (id = amd_nb_misc_ids; id->vendor; id++)
92 if (vendor == id->vendor && device == id->device) 93 if (vendor == id->vendor && device == id->device)
93 return 1; 94 return 1;
94 return 0; 95 return 0;
95} 96}
96 97
97void k8_flush_garts(void) 98int amd_cache_gart(void)
99{
100 int i;
101
102 if (!amd_nb_has_feature(AMD_NB_GART))
103 return 0;
104
105 flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
106 if (!flush_words) {
107 amd_northbridges.flags &= ~AMD_NB_GART;
108 return -ENOMEM;
109 }
110
111 for (i = 0; i != amd_nb_num(); i++)
112 pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
113 &flush_words[i]);
114
115 return 0;
116}
117
118void amd_flush_garts(void)
98{ 119{
99 int flushed, i; 120 int flushed, i;
100 unsigned long flags; 121 unsigned long flags;
101 static DEFINE_SPINLOCK(gart_lock); 122 static DEFINE_SPINLOCK(gart_lock);
102 123
103 if (!k8_northbridges.gart_supported) 124 if (!amd_nb_has_feature(AMD_NB_GART))
104 return; 125 return;
105 126
106 /* Avoid races between AGP and IOMMU. In theory it's not needed 127 /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
109 that it doesn't matter to serialize more. -AK */ 130 that it doesn't matter to serialize more. -AK */
110 spin_lock_irqsave(&gart_lock, flags); 131 spin_lock_irqsave(&gart_lock, flags);
111 flushed = 0; 132 flushed = 0;
112 for (i = 0; i < k8_northbridges.num; i++) { 133 for (i = 0; i < amd_nb_num(); i++) {
113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, 134 pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
114 flush_words[i]|1); 135 flush_words[i] | 1);
115 flushed++; 136 flushed++;
116 } 137 }
117 for (i = 0; i < k8_northbridges.num; i++) { 138 for (i = 0; i < amd_nb_num(); i++) {
118 u32 w; 139 u32 w;
119 /* Make sure the hardware actually executed the flush*/ 140 /* Make sure the hardware actually executed the flush*/
120 for (;;) { 141 for (;;) {
121 pci_read_config_dword(k8_northbridges.nb_misc[i], 142 pci_read_config_dword(node_to_amd_nb(i)->misc,
122 0x9c, &w); 143 0x9c, &w);
123 if (!(w & 1)) 144 if (!(w & 1))
124 break; 145 break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
129 if (!flushed) 150 if (!flushed)
130 printk("nothing to flush?\n"); 151 printk("nothing to flush?\n");
131} 152}
132EXPORT_SYMBOL_GPL(k8_flush_garts); 153EXPORT_SYMBOL_GPL(amd_flush_garts);
133 154
134static __init int init_k8_nbs(void) 155static __init int init_amd_nbs(void)
135{ 156{
136 int err = 0; 157 int err = 0;
137 158
138 err = cache_k8_northbridges(); 159 err = amd_cache_northbridges();
139 160
140 if (err < 0) 161 if (err < 0)
141 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); 162 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
163
164 if (amd_cache_gart() < 0)
165 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
166 "GART support disabled.\n");
142 167
143 return err; 168 return err;
144} 169}
145 170
146/* This has to go after the PCI subsystem */ 171/* This has to go after the PCI subsystem */
147fs_initcall(init_k8_nbs); 172fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..dcd7c83e1659 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
206 * Do an PCI bus scan by hand because we're running before the PCI 206 * Do an PCI bus scan by hand because we're running before the PCI
207 * subsystem. 207 * subsystem.
208 * 208 *
209 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan 209 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
210 * generically. It's probably overkill to always scan all slots because 210 * generically. It's probably overkill to always scan all slots because
211 * the AGP bridges should be always an own bus on the HT hierarchy, 211 * the AGP bridges should be always an own bus on the HT hierarchy,
212 * but do it here for future safety. 212 * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
303 dev_limit = bus_dev_ranges[i].dev_limit; 303 dev_limit = bus_dev_ranges[i].dev_limit;
304 304
305 for (slot = dev_base; slot < dev_limit; slot++) { 305 for (slot = dev_base; slot < dev_limit; slot++) {
306 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 306 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
358 dev_limit = bus_dev_ranges[i].dev_limit; 358 dev_limit = bus_dev_ranges[i].dev_limit;
359 359
360 for (slot = dev_base; slot < dev_limit; slot++) { 360 for (slot = dev_base; slot < dev_limit; slot++) {
361 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 361 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
400 dev_limit = bus_dev_ranges[i].dev_limit; 400 dev_limit = bus_dev_ranges[i].dev_limit;
401 401
402 for (slot = dev_base; slot < dev_limit; slot++) { 402 for (slot = dev_base; slot < dev_limit; slot++) {
403 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 403 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
404 continue; 404 continue;
405 405
406 iommu_detected = 1; 406 iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
518 dev_base = bus_dev_ranges[i].dev_base; 518 dev_base = bus_dev_ranges[i].dev_base;
519 dev_limit = bus_dev_ranges[i].dev_limit; 519 dev_limit = bus_dev_ranges[i].dev_limit;
520 for (slot = dev_base; slot < dev_limit; slot++) { 520 for (slot = dev_base; slot < dev_limit; slot++) {
521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 521 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
522 continue; 522 continue;
523 523
524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 463839645f9b..c48a64510844 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,7 +52,6 @@
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 54#include <asm/tsc.h>
55#include <asm/atomic.h>
56 55
57unsigned int num_processors; 56unsigned int num_processors;
58 57
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4f026a632c95..4abf08aab3d4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3113,7 +3113,7 @@ void destroy_irq(unsigned int irq)
3113 3113
3114 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3114 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3115 3115
3116 if (intr_remapping_enabled) 3116 if (irq_remapped(cfg))
3117 free_irte(irq); 3117 free_irte(irq);
3118 raw_spin_lock_irqsave(&vector_lock, flags); 3118 raw_spin_lock_irqsave(&vector_lock, flags);
3119 __clear_irq_vector(irq, cfg); 3119 __clear_irq_vector(irq, cfg);
@@ -3335,7 +3335,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3335 return 0; 3335 return 0;
3336} 3336}
3337 3337
3338int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3338int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3339{ 3339{
3340 int node, ret, sub_handle, index = 0; 3340 int node, ret, sub_handle, index = 0;
3341 unsigned int irq, irq_want; 3341 unsigned int irq, irq_want;
@@ -3393,7 +3393,7 @@ error:
3393 return ret; 3393 return ret;
3394} 3394}
3395 3395
3396void arch_teardown_msi_irq(unsigned int irq) 3396void native_teardown_msi_irq(unsigned int irq)
3397{ 3397{
3398 destroy_irq(irq); 3398 destroy_irq(irq);
3399} 3399}
@@ -3654,6 +3654,11 @@ static void __init probe_nr_irqs_gsi(void)
3654 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); 3654 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3655} 3655}
3656 3656
3657int get_nr_irqs_gsi(void)
3658{
3659 return nr_irqs_gsi;
3660}
3661
3657#ifdef CONFIG_SPARSE_IRQ 3662#ifdef CONFIG_SPARSE_IRQ
3658int __init arch_probe_nr_irqs(void) 3663int __init arch_probe_nr_irqs(void)
3659{ 3664{
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54cb248..194539aea175 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -41,6 +41,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
41 41
42static enum uv_system_type uv_system_type; 42static enum uv_system_type uv_system_type;
43static u64 gru_start_paddr, gru_end_paddr; 43static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid;
44int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
45EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
46static DEFINE_SPINLOCK(uv_nmi_lock); 47static DEFINE_SPINLOCK(uv_nmi_lock);
@@ -70,12 +71,27 @@ static int early_get_nodeid(void)
70 return node_id.s.node_id; 71 return node_id.s.node_id;
71} 72}
72 73
74static void __init early_get_apic_pnode_shift(void)
75{
76 unsigned long *mmr;
77
78 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
79 uvh_apicid.v = *mmr;
80 early_iounmap(mmr, sizeof(*mmr));
81 if (!uvh_apicid.v)
82 /*
83 * Old bios, use default value
84 */
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86}
87
73static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
74{ 89{
75 int nodeid; 90 int nodeid;
76 91
77 if (!strcmp(oem_id, "SGI")) { 92 if (!strcmp(oem_id, "SGI")) {
78 nodeid = early_get_nodeid(); 93 nodeid = early_get_nodeid();
94 early_get_apic_pnode_shift();
79 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 95 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
80 x86_platform.nmi_init = uv_nmi_init; 96 x86_platform.nmi_init = uv_nmi_init;
81 if (!strcmp(oem_table_id, "UVL")) 97 if (!strcmp(oem_table_id, "UVL"))
@@ -84,7 +100,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
84 uv_system_type = UV_X2APIC; 100 uv_system_type = UV_X2APIC;
85 else if (!strcmp(oem_table_id, "UVH")) { 101 else if (!strcmp(oem_table_id, "UVH")) {
86 __get_cpu_var(x2apic_extra_bits) = 102 __get_cpu_var(x2apic_extra_bits) =
87 nodeid << (UV_APIC_PNODE_SHIFT - 1); 103 nodeid << (uvh_apicid.s.pnode_shift - 1);
88 uv_system_type = UV_NON_UNIQUE_APIC; 104 uv_system_type = UV_NON_UNIQUE_APIC;
89 return 1; 105 return 1;
90 } 106 }
@@ -363,14 +379,14 @@ struct redir_addr {
363#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 379#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
364 380
365static __initdata struct redir_addr redir_addrs[] = { 381static __initdata struct redir_addr redir_addrs[] = {
366 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, 382 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
367 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, 383 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
368 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, 384 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
369}; 385};
370 386
371static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) 387static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
372{ 388{
373 union uvh_si_alias0_overlay_config_u alias; 389 union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
374 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; 390 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
375 int i; 391 int i;
376 392
@@ -644,7 +660,7 @@ void uv_nmi_init(void)
644 660
645void __init uv_system_init(void) 661void __init uv_system_init(void)
646{ 662{
647 union uvh_si_addr_map_config_u m_n_config; 663 union uvh_rh_gam_config_mmr_u m_n_config;
648 union uvh_node_id_u node_id; 664 union uvh_node_id_u node_id;
649 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 665 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
650 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 666 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
@@ -654,7 +670,7 @@ void __init uv_system_init(void)
654 670
655 map_low_mmrs(); 671 map_low_mmrs();
656 672
657 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 673 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
658 m_val = m_n_config.s.m_skt; 674 m_val = m_n_config.s.m_skt;
659 n_val = m_n_config.s.n_skt; 675 n_val = m_n_config.s.n_skt;
660 mmr_base = 676 mmr_base =
@@ -716,6 +732,10 @@ void __init uv_system_init(void)
716 int apicid = per_cpu(x86_cpu_to_apicid, cpu); 732 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
717 733
718 nid = cpu_to_node(cpu); 734 nid = cpu_to_node(cpu);
735 /*
736 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
737 */
738 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
719 pnode = uv_apicid_to_pnode(apicid); 739 pnode = uv_apicid_to_pnode(apicid);
720 blade = boot_pnode_to_blade(pnode); 740 blade = boot_pnode_to_blade(pnode);
721 lcpu = uv_blade_info[blade].nr_possible_cpus; 741 lcpu = uv_blade_info[blade].nr_possible_cpus;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 4c9c67bf09b7..0e4f24c2a746 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -189,8 +189,8 @@
189 * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01. 189 * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01.
190 * 190 *
191 * [This document is available free from Intel by calling 800.628.8686 (fax 191 * [This document is available free from Intel by calling 800.628.8686 (fax
192 * 916.356.6100) or 800.548.4725; or via anonymous ftp from 192 * 916.356.6100) or 800.548.4725; or from
193 * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also 193 * http://www.microsoft.com/whdc/archive/amp_12.mspx It is also
194 * available from Microsoft by calling 206.882.8080.] 194 * available from Microsoft by calling 206.882.8080.]
195 * 195 *
196 * APM 1.2 Reference: 196 * APM 1.2 Reference:
@@ -1926,6 +1926,7 @@ static const struct file_operations apm_bios_fops = {
1926 .unlocked_ioctl = do_ioctl, 1926 .unlocked_ioctl = do_ioctl,
1927 .open = do_open, 1927 .open = do_open,
1928 .release = do_release, 1928 .release = do_release,
1929 .llseek = noop_llseek,
1929}; 1930};
1930 1931
1931static struct miscdevice apm_device = { 1932static struct miscdevice apm_device = {
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dfdbf6403895..1a4088dda37a 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -99,9 +99,7 @@ void foo(void)
99 99
100 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 100 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
101 DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); 101 DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
102 DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); 102 DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
103 DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
104 DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
105 103
106 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 104 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
107 105
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
deleted file mode 100644
index 8bc57baaa9ad..000000000000
--- a/arch/x86/kernel/bios_uv.c
+++ /dev/null
@@ -1,215 +0,0 @@
1/*
2 * BIOS run time interface routines.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson <rja@sgi.com>
20 */
21
22#include <linux/efi.h>
23#include <asm/efi.h>
24#include <linux/io.h>
25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h>
27
28static struct uv_systab uv_systab;
29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{
32 struct uv_systab *tab = &uv_systab;
33 s64 ret;
34
35 if (!tab->function)
36 /*
37 * BIOS does not support UV systab
38 */
39 return BIOS_STATUS_UNIMPLEMENTED;
40
41 ret = efi_call6((void *)__va(tab->function), (u64)which,
42 a1, a2, a3, a4, a5);
43 return ret;
44}
45EXPORT_SYMBOL_GPL(uv_bios_call);
46
47s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
48 u64 a4, u64 a5)
49{
50 unsigned long bios_flags;
51 s64 ret;
52
53 local_irq_save(bios_flags);
54 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
55 local_irq_restore(bios_flags);
56
57 return ret;
58}
59
60s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
61 u64 a4, u64 a5)
62{
63 s64 ret;
64
65 preempt_disable();
66 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
67 preempt_enable();
68
69 return ret;
70}
71
72
73long sn_partition_id;
74EXPORT_SYMBOL_GPL(sn_partition_id);
75long sn_coherency_id;
76EXPORT_SYMBOL_GPL(sn_coherency_id);
77long sn_region_size;
78EXPORT_SYMBOL_GPL(sn_region_size);
79long system_serial_number;
80EXPORT_SYMBOL_GPL(system_serial_number);
81int uv_type;
82EXPORT_SYMBOL_GPL(uv_type);
83
84
85s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
86 long *region, long *ssn)
87{
88 s64 ret;
89 u64 v0, v1;
90 union partition_info_u part;
91
92 ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
93 (u64)(&v0), (u64)(&v1), 0, 0);
94 if (ret != BIOS_STATUS_SUCCESS)
95 return ret;
96
97 part.val = v0;
98 if (uvtype)
99 *uvtype = part.hub_version;
100 if (partid)
101 *partid = part.partition_id;
102 if (coher)
103 *coher = part.coherence_id;
104 if (region)
105 *region = part.region_size;
106 if (ssn)
107 *ssn = v1;
108 return ret;
109}
110EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
111
112int
113uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
114 unsigned long *intr_mmr_offset)
115{
116 u64 watchlist;
117 s64 ret;
118
119 /*
120 * bios returns watchlist number or negative error number.
121 */
122 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
123 mq_size, (u64)intr_mmr_offset,
124 (u64)&watchlist, 0);
125 if (ret < BIOS_STATUS_SUCCESS)
126 return ret;
127
128 return watchlist;
129}
130EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
131
132int
133uv_bios_mq_watchlist_free(int blade, int watchlist_num)
134{
135 return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
136 blade, watchlist_num, 0, 0, 0);
137}
138EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
139
140s64
141uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
142{
143 return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
144 perms, 0, 0);
145}
146EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
147
148s64
149uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
150{
151 s64 ret;
152
153 ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
154 (u64)addr, buf, (u64)len, 0);
155 return ret;
156}
157EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
158
159s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
160{
161 return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
162 (u64)ticks_per_second, 0, 0, 0);
163}
164EXPORT_SYMBOL_GPL(uv_bios_freq_base);
165
166/*
167 * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
168 * @decode: true to enable target, false to disable target
169 * @domain: PCI domain number
170 * @bus: PCI bus number
171 *
172 * Returns:
173 * 0: Success
174 * -EINVAL: Invalid domain or bus number
175 * -ENOSYS: Capability not available
176 * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
177 */
178int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
179{
180 return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
181 (u64)decode, (u64)domain, (u64)bus, 0, 0);
182}
183EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
184
185
186#ifdef CONFIG_EFI
187void uv_bios_init(void)
188{
189 struct uv_systab *tab;
190
191 if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
192 (efi.uv_systab == (unsigned long)NULL)) {
193 printk(KERN_CRIT "No EFI UV System Table.\n");
194 uv_systab.function = (unsigned long)NULL;
195 return;
196 }
197
198 tab = (struct uv_systab *)ioremap(efi.uv_systab,
199 sizeof(struct uv_systab));
200 if (strncmp(tab->signature, "UVST", 4) != 0)
201 printk(KERN_ERR "bad signature in UV system table!");
202
203 /*
204 * Copy table to permanent spot for later use.
205 */
206 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
207 iounmap(tab);
208
209 printk(KERN_INFO "EFI UV System Table Revision %d\n",
210 uv_systab.revision);
211}
212#else /* !CONFIG_EFI */
213
214void uv_bios_init(void) { }
215#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index cd8da247dda1..a2baafb2fe6d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -701,6 +701,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
701 per_cpu(acfreq_data, policy->cpu) = NULL; 701 per_cpu(acfreq_data, policy->cpu) = NULL;
702 acpi_processor_unregister_performance(data->acpi_data, 702 acpi_processor_unregister_performance(data->acpi_data,
703 policy->cpu); 703 policy->cpu);
704 kfree(data->freq_table);
704 kfree(data); 705 kfree(data);
705 } 706 }
706 707
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 733093d60436..141abebc4516 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = {
393 * Detects nForce2 A2 and C1 stepping 393 * Detects nForce2 A2 and C1 stepping
394 * 394 *
395 */ 395 */
396static unsigned int nforce2_detect_chipset(void) 396static int nforce2_detect_chipset(void)
397{ 397{
398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
399 PCI_DEVICE_ID_NVIDIA_NFORCE2, 399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index fc09f142d94d..d9f51367666b 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq;
35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS 35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
36 * and MSR_TMTA_LONGRUN_CTRL 36 * and MSR_TMTA_LONGRUN_CTRL
37 */ 37 */
38static void __init longrun_get_policy(struct cpufreq_policy *policy) 38static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
39{ 39{
40 u32 msr_lo, msr_hi; 40 u32 msr_lo, msr_hi;
41 41
@@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu)
165 * TMTA rules: 165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) 166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */ 167 */
168static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, 168static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq) 169 unsigned int *high_freq)
170{ 170{
171 u32 msr_lo, msr_hi; 171 u32 msr_lo, msr_hi;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 695f17731e23..d16c2c53d6bf 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -284,9 +284,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
284 /* Don't do the funky fallback heuristics the AMD version employs 284 /* Don't do the funky fallback heuristics the AMD version employs
285 for now. */ 285 for now. */
286 node = apicid_to_node[apicid]; 286 node = apicid_to_node[apicid];
287 if (node == NUMA_NO_NODE) 287 if (node == NUMA_NO_NODE || !node_online(node)) {
288 node = first_node(node_online_map);
289 else if (!node_online(node)) {
290 /* reuse the value from init_cpu_to_node() */ 288 /* reuse the value from init_cpu_to_node() */
291 node = cpu_to_node(cpu); 289 node = cpu_to_node(cpu);
292 } 290 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 12cd823c8d03..9ecf81f9b90f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
149}; 149};
150 150
151struct amd_l3_cache { 151struct amd_l3_cache {
152 struct pci_dev *dev; 152 struct amd_northbridge *nb;
153 bool can_disable;
154 unsigned indices; 153 unsigned indices;
155 u8 subcaches[4]; 154 u8 subcaches[4];
156}; 155};
@@ -311,14 +310,12 @@ struct _cache_attr {
311/* 310/*
312 * L3 cache descriptors 311 * L3 cache descriptors
313 */ 312 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 313static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{ 314{
318 unsigned int sc0, sc1, sc2, sc3; 315 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0; 316 u32 val = 0;
320 317
321 pci_read_config_dword(l3->dev, 0x1C4, &val); 318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
322 319
323 /* calculate subcache sizes */ 320 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0)); 321 l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -327,49 +324,17 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
327 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); 324 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
328 325
329 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; 326 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
330} 328}
331 329
332static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 330static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
333{ 331 int index)
334 struct amd_l3_cache *l3;
335 struct pci_dev *dev = node_to_k8_nb_misc(node);
336
337 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
338 if (!l3) {
339 printk(KERN_WARNING "Error allocating L3 struct\n");
340 return NULL;
341 }
342
343 l3->dev = dev;
344
345 amd_calc_l3_indices(l3);
346
347 return l3;
348}
349
350static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
351 int index)
352{ 332{
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
353 int node; 334 int node;
354 335
355 if (boot_cpu_data.x86 != 0x10) 336 /* only for L3, and not in virtualized environments */
356 return; 337 if (index < 3 || amd_nb_num() == 0)
357
358 if (index < 3)
359 return;
360
361 /* see errata #382 and #388 */
362 if (boot_cpu_data.x86_model < 0x8)
363 return;
364
365 if ((boot_cpu_data.x86_model == 0x8 ||
366 boot_cpu_data.x86_model == 0x9)
367 &&
368 boot_cpu_data.x86_mask < 0x1)
369 return;
370
371 /* not in virtualized environments */
372 if (k8_northbridges.num == 0)
373 return; 338 return;
374 339
375 /* 340 /*
@@ -377,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
377 * never freed but this is done only on shutdown so it doesn't matter. 342 * never freed but this is done only on shutdown so it doesn't matter.
378 */ 343 */
379 if (!l3_caches) { 344 if (!l3_caches) {
380 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); 345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
381 346
382 l3_caches = kzalloc(size, GFP_ATOMIC); 347 l3_caches = kzalloc(size, GFP_ATOMIC);
383 if (!l3_caches) 348 if (!l3_caches)
@@ -386,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
386 351
387 node = amd_get_nb_id(smp_processor_id()); 352 node = amd_get_nb_id(smp_processor_id());
388 353
389 if (!l3_caches[node]) { 354 if (!l3_caches[node].nb) {
390 l3_caches[node] = amd_init_l3_cache(node); 355 l3_caches[node].nb = node_to_amd_nb(node);
391 l3_caches[node]->can_disable = true; 356 amd_calc_l3_indices(&l3_caches[node]);
392 } 357 }
393 358
394 WARN_ON(!l3_caches[node]); 359 this_leaf->l3 = &l3_caches[node];
395
396 this_leaf->l3 = l3_caches[node];
397} 360}
398 361
399/* 362/*
@@ -407,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
407{ 370{
408 unsigned int reg = 0; 371 unsigned int reg = 0;
409 372
410 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg); 373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
411 374
412 /* check whether this slot is activated already */ 375 /* check whether this slot is activated already */
413 if (reg & (3UL << 30)) 376 if (reg & (3UL << 30))
@@ -421,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
421{ 384{
422 int index; 385 int index;
423 386
424 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
425 return -EINVAL; 389 return -EINVAL;
426 390
427 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -456,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
456 if (!l3->subcaches[i]) 420 if (!l3->subcaches[i])
457 continue; 421 continue;
458 422
459 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
460 424
461 /* 425 /*
462 * We need to WBINVD on a core on the node containing the L3 426 * We need to WBINVD on a core on the node containing the L3
@@ -466,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
466 wbinvd_on_cpu(cpu); 430 wbinvd_on_cpu(cpu);
467 431
468 reg |= BIT(31); 432 reg |= BIT(31);
469 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
470 } 434 }
471} 435}
472 436
@@ -523,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
523 if (!capable(CAP_SYS_ADMIN)) 487 if (!capable(CAP_SYS_ADMIN))
524 return -EPERM; 488 return -EPERM;
525 489
526 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
527 return -EINVAL; 492 return -EINVAL;
528 493
529 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -544,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
544#define STORE_CACHE_DISABLE(slot) \ 509#define STORE_CACHE_DISABLE(slot) \
545static ssize_t \ 510static ssize_t \
546store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
547 const char *buf, size_t count) \ 512 const char *buf, size_t count) \
548{ \ 513{ \
549 return store_cache_disable(this_leaf, buf, count, slot); \ 514 return store_cache_disable(this_leaf, buf, count, slot); \
550} 515}
@@ -557,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
557 show_cache_disable_1, store_cache_disable_1); 522 show_cache_disable_1, store_cache_disable_1);
558 523
559#else /* CONFIG_AMD_NB */ 524#else /* CONFIG_AMD_NB */
560static void __cpuinit 525#define amd_init_l3_cache(x, y)
561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
562{
563};
564#endif /* CONFIG_AMD_NB */ 526#endif /* CONFIG_AMD_NB */
565 527
566static int 528static int
@@ -574,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
574 536
575 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
576 amd_cpuid4(index, &eax, &ebx, &ecx); 538 amd_cpuid4(index, &eax, &ebx, &ecx);
577 amd_check_l3_disable(this_leaf, index); 539 amd_init_l3_cache(this_leaf, index);
578 } else { 540 } else {
579 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
580 } 542 }
@@ -982,30 +944,48 @@ define_one_ro(size);
982define_one_ro(shared_cpu_map); 944define_one_ro(shared_cpu_map);
983define_one_ro(shared_cpu_list); 945define_one_ro(shared_cpu_list);
984 946
985#define DEFAULT_SYSFS_CACHE_ATTRS \
986 &type.attr, \
987 &level.attr, \
988 &coherency_line_size.attr, \
989 &physical_line_partition.attr, \
990 &ways_of_associativity.attr, \
991 &number_of_sets.attr, \
992 &size.attr, \
993 &shared_cpu_map.attr, \
994 &shared_cpu_list.attr
995
996static struct attribute *default_attrs[] = { 947static struct attribute *default_attrs[] = {
997 DEFAULT_SYSFS_CACHE_ATTRS, 948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
998 NULL 957 NULL
999}; 958};
1000 959
1001static struct attribute *default_l3_attrs[] = {
1002 DEFAULT_SYSFS_CACHE_ATTRS,
1003#ifdef CONFIG_AMD_NB 960#ifdef CONFIG_AMD_NB
1004 &cache_disable_0.attr, 961static struct attribute ** __cpuinit amd_l3_attrs(void)
1005 &cache_disable_1.attr, 962{
963 static struct attribute **attrs;
964 int n;
965
966 if (attrs)
967 return attrs;
968
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
970
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
973
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
977
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
980
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
984 }
985
986 return attrs;
987}
1006#endif 988#endif
1007 NULL
1008};
1009 989
1010static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 990static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1011{ 991{
@@ -1116,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1116 1096
1117 this_leaf = CPUID4_INFO_IDX(cpu, i); 1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1118 1098
1119 if (this_leaf->l3 && this_leaf->l3->can_disable) 1099 ktype_cache.default_attrs = default_attrs;
1120 ktype_cache.default_attrs = default_l3_attrs; 1100#ifdef CONFIG_AMD_NB
1121 else 1101 if (this_leaf->l3)
1122 ktype_cache.default_attrs = default_attrs; 1102 ktype_cache.default_attrs = amd_l3_attrs();
1123 1103#endif
1124 retval = kobject_init_and_add(&(this_object->kobj), 1104 retval = kobject_init_and_add(&(this_object->kobj),
1125 &ktype_cache, 1105 &ktype_cache,
1126 per_cpu(ici_cache_kobject, cpu), 1106 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8a85dd1b1aa1..1e8d66c1336a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -192,6 +192,7 @@ static const struct file_operations severities_coverage_fops = {
192 .release = seq_release, 192 .release = seq_release,
193 .read = seq_read, 193 .read = seq_read,
194 .write = severities_coverage_write, 194 .write = severities_coverage_write,
195 .llseek = seq_lseek,
195}; 196};
196 197
197static int __init severities_debugfs_init(void) 198static int __init severities_debugfs_init(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ed41562909fe..7a35b72d7c03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1665,6 +1665,7 @@ struct file_operations mce_chrdev_ops = {
1665 .read = mce_read, 1665 .read = mce_read,
1666 .poll = mce_poll, 1666 .poll = mce_poll,
1667 .unlocked_ioctl = mce_ioctl, 1667 .unlocked_ioctl = mce_ioctl,
1668 .llseek = no_llseek,
1668}; 1669};
1669EXPORT_SYMBOL_GPL(mce_chrdev_ops); 1670EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1670 1671
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fe73c1844a9a..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -49,7 +49,6 @@ static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{ 50{
51 unsigned long offset, addr = (unsigned long)from; 51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0; 52 unsigned long size, len = 0;
54 struct page *page; 53 struct page *page;
55 void *map; 54 void *map;
@@ -63,9 +62,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
63 offset = addr & (PAGE_SIZE - 1); 62 offset = addr & (PAGE_SIZE - 1);
64 size = min(PAGE_SIZE - offset, n - len); 63 size = min(PAGE_SIZE - offset, n - len);
65 64
66 map = kmap_atomic(page, type); 65 map = kmap_atomic(page);
67 memcpy(to, map+offset, size); 66 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type); 67 kunmap_atomic(map);
69 put_page(page); 68 put_page(page);
70 69
71 len += size; 70 len += size;
@@ -238,6 +237,7 @@ struct x86_pmu {
238 * Intel DebugStore bits 237 * Intel DebugStore bits
239 */ 238 */
240 int bts, pebs; 239 int bts, pebs;
240 int bts_active, pebs_active;
241 int pebs_record_size; 241 int pebs_record_size;
242 void (*drain_pebs)(struct pt_regs *regs); 242 void (*drain_pebs)(struct pt_regs *regs);
243 struct event_constraint *pebs_constraints; 243 struct event_constraint *pebs_constraints;
@@ -381,7 +381,7 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static int reserve_ds_buffers(void); 384static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 385static void release_ds_buffers(void);
386 386
387static void hw_perf_event_destroy(struct perf_event *event) 387static void hw_perf_event_destroy(struct perf_event *event)
@@ -478,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
479 (hwc->sample_period == 1)) { 479 (hwc->sample_period == 1)) {
480 /* BTS is not supported by this architecture. */ 480 /* BTS is not supported by this architecture. */
481 if (!x86_pmu.bts) 481 if (!x86_pmu.bts_active)
482 return -EOPNOTSUPP; 482 return -EOPNOTSUPP;
483 483
484 /* BTS is currently only allowed for user-mode. */ 484 /* BTS is currently only allowed for user-mode. */
@@ -497,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
497 int precise = 0; 497 int precise = 0;
498 498
499 /* Support for constant skid */ 499 /* Support for constant skid */
500 if (x86_pmu.pebs) 500 if (x86_pmu.pebs_active) {
501 precise++; 501 precise++;
502 502
503 /* Support for IP fixup */ 503 /* Support for IP fixup */
504 if (x86_pmu.lbr_nr) 504 if (x86_pmu.lbr_nr)
505 precise++; 505 precise++;
506 }
506 507
507 if (event->attr.precise_ip > precise) 508 if (event->attr.precise_ip > precise)
508 return -EOPNOTSUPP; 509 return -EOPNOTSUPP;
@@ -544,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
544 if (atomic_read(&active_events) == 0) { 545 if (atomic_read(&active_events) == 0) {
545 if (!reserve_pmc_hardware()) 546 if (!reserve_pmc_hardware())
546 err = -EBUSY; 547 err = -EBUSY;
547 else { 548 else
548 err = reserve_ds_buffers(); 549 reserve_ds_buffers();
549 if (err)
550 release_pmc_hardware();
551 }
552 } 550 }
553 if (!err) 551 if (!err)
554 atomic_inc(&active_events); 552 atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 46d58448c3af..e421b8cd6944 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
280 struct amd_nb *nb; 280 struct amd_nb *nb;
281 int i; 281 int i;
282 282
283 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); 283 nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
284 cpu_to_node(cpu));
284 if (!nb) 285 if (!nb)
285 return NULL; 286 return NULL;
286 287
287 memset(nb, 0, sizeof(*nb));
288 nb->nb_id = nb_id; 288 nb->nb_id = nb_id;
289 289
290 /* 290 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75} 75}
76 76
77static int alloc_pebs_buffer(int cpu)
78{
79 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
80 int node = cpu_to_node(cpu);
81 int max, thresh = 1; /* always use a single PEBS record */
82 void *buffer;
83
84 if (!x86_pmu.pebs)
85 return 0;
86
87 buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
88 if (unlikely(!buffer))
89 return -ENOMEM;
90
91 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
92
93 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
94 ds->pebs_index = ds->pebs_buffer_base;
95 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
96 max * x86_pmu.pebs_record_size;
97
98 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
99 thresh * x86_pmu.pebs_record_size;
100
101 return 0;
102}
103
104static void release_pebs_buffer(int cpu)
105{
106 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
107
108 if (!ds || !x86_pmu.pebs)
109 return;
110
111 kfree((void *)(unsigned long)ds->pebs_buffer_base);
112 ds->pebs_buffer_base = 0;
113}
114
115static int alloc_bts_buffer(int cpu)
116{
117 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
118 int node = cpu_to_node(cpu);
119 int max, thresh;
120 void *buffer;
121
122 if (!x86_pmu.bts)
123 return 0;
124
125 buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
126 if (unlikely(!buffer))
127 return -ENOMEM;
128
129 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
130 thresh = max / 16;
131
132 ds->bts_buffer_base = (u64)(unsigned long)buffer;
133 ds->bts_index = ds->bts_buffer_base;
134 ds->bts_absolute_maximum = ds->bts_buffer_base +
135 max * BTS_RECORD_SIZE;
136 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
137 thresh * BTS_RECORD_SIZE;
138
139 return 0;
140}
141
142static void release_bts_buffer(int cpu)
143{
144 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
145
146 if (!ds || !x86_pmu.bts)
147 return;
148
149 kfree((void *)(unsigned long)ds->bts_buffer_base);
150 ds->bts_buffer_base = 0;
151}
152
153static int alloc_ds_buffer(int cpu)
154{
155 int node = cpu_to_node(cpu);
156 struct debug_store *ds;
157
158 ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
159 if (unlikely(!ds))
160 return -ENOMEM;
161
162 per_cpu(cpu_hw_events, cpu).ds = ds;
163
164 return 0;
165}
166
167static void release_ds_buffer(int cpu)
168{
169 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170
171 if (!ds)
172 return;
173
174 per_cpu(cpu_hw_events, cpu).ds = NULL;
175 kfree(ds);
176}
177
77static void release_ds_buffers(void) 178static void release_ds_buffers(void)
78{ 179{
79 int cpu; 180 int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
82 return; 183 return;
83 184
84 get_online_cpus(); 185 get_online_cpus();
85
86 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu); 187 fini_debug_store_on_cpu(cpu);
88 188
89 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 190 release_pebs_buffer(cpu);
91 191 release_bts_buffer(cpu);
92 if (!ds) 192 release_ds_buffer(cpu);
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 } 193 }
101
102 put_online_cpus(); 194 put_online_cpus();
103} 195}
104 196
105static int reserve_ds_buffers(void) 197static void reserve_ds_buffers(void)
106{ 198{
107 int cpu, err = 0; 199 int bts_err = 0, pebs_err = 0;
200 int cpu;
201
202 x86_pmu.bts_active = 0;
203 x86_pmu.pebs_active = 0;
108 204
109 if (!x86_pmu.bts && !x86_pmu.pebs) 205 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0; 206 return;
207
208 if (!x86_pmu.bts)
209 bts_err = 1;
210
211 if (!x86_pmu.pebs)
212 pebs_err = 1;
111 213
112 get_online_cpus(); 214 get_online_cpus();
113 215
114 for_each_possible_cpu(cpu) { 216 for_each_possible_cpu(cpu) {
115 struct debug_store *ds; 217 if (alloc_ds_buffer(cpu)) {
116 void *buffer; 218 bts_err = 1;
117 int max, thresh; 219 pebs_err = 1;
220 }
221
222 if (!bts_err && alloc_bts_buffer(cpu))
223 bts_err = 1;
118 224
119 err = -ENOMEM; 225 if (!pebs_err && alloc_pebs_buffer(cpu))
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 226 pebs_err = 1;
121 if (unlikely(!ds)) 227
228 if (bts_err && pebs_err)
122 break; 229 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds; 230 }
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140 231
141 if (x86_pmu.pebs) { 232 if (bts_err) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 233 for_each_possible_cpu(cpu)
143 if (unlikely(!buffer)) 234 release_bts_buffer(cpu);
144 break; 235 }
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158 236
159 err = 0; 237 if (pebs_err) {
238 for_each_possible_cpu(cpu)
239 release_pebs_buffer(cpu);
160 } 240 }
161 241
162 if (err) 242 if (bts_err && pebs_err) {
163 release_ds_buffers(); 243 for_each_possible_cpu(cpu)
164 else { 244 release_ds_buffer(cpu);
245 } else {
246 if (x86_pmu.bts && !bts_err)
247 x86_pmu.bts_active = 1;
248
249 if (x86_pmu.pebs && !pebs_err)
250 x86_pmu.pebs_active = 1;
251
165 for_each_online_cpu(cpu) 252 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu); 253 init_debug_store_on_cpu(cpu);
167 } 254 }
168 255
169 put_online_cpus(); 256 put_online_cpus();
170
171 return err;
172} 257}
173 258
174/* 259/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
233 if (!event) 318 if (!event)
234 return 0; 319 return 0;
235 320
236 if (!ds) 321 if (!x86_pmu.bts_active)
237 return 0; 322 return 0;
238 323
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 324 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
503 struct pebs_record_core *at, *top; 588 struct pebs_record_core *at, *top;
504 int n; 589 int n;
505 590
506 if (!ds || !x86_pmu.pebs) 591 if (!x86_pmu.pebs_active)
507 return; 592 return;
508 593
509 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 594 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
545 u64 status = 0; 630 u64 status = 0;
546 int bit, n; 631 int bit, n;
547 632
548 if (!ds || !x86_pmu.pebs) 633 if (!x86_pmu.pebs_active)
549 return; 634 return;
550 635
551 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 636 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
630 715
631#else /* CONFIG_CPU_SUP_INTEL */ 716#else /* CONFIG_CPU_SUP_INTEL */
632 717
633static int reserve_ds_buffers(void) 718static void reserve_ds_buffers(void)
634{ 719{
635 return 0;
636} 720}
637 721
638static void release_ds_buffers(void) 722static void release_ds_buffers(void)
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 67414550c3cc..d5cd13945d5a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -61,7 +61,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
61 if (!is_crashed_pfn_valid(pfn)) 61 if (!is_crashed_pfn_valid(pfn))
62 return -EFAULT; 62 return -EFAULT;
63 63
64 vaddr = kmap_atomic_pfn(pfn, KM_PTE0); 64 vaddr = kmap_atomic_pfn(pfn);
65 65
66 if (!userbuf) { 66 if (!userbuf) {
67 memcpy(buf, (vaddr + offset), csize); 67 memcpy(buf, (vaddr + offset), csize);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0f6376ffa2d9..1bc7f75a5bda 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,11 +82,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
82 if (kstack_end(stack)) 82 if (kstack_end(stack))
83 break; 83 break;
84 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 84 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
85 printk("\n%s", log_lvl); 85 printk(KERN_CONT "\n");
86 printk(" %08lx", *stack++); 86 printk(KERN_CONT " %08lx", *stack++);
87 touch_nmi_watchdog(); 87 touch_nmi_watchdog();
88 } 88 }
89 printk("\n"); 89 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 90 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
91} 91}
92 92
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 57a21f11c791..6a340485249a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -265,20 +265,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
265 if (stack >= irq_stack && stack <= irq_stack_end) { 265 if (stack >= irq_stack && stack <= irq_stack_end) {
266 if (stack == irq_stack_end) { 266 if (stack == irq_stack_end) {
267 stack = (unsigned long *) (irq_stack_end[-1]); 267 stack = (unsigned long *) (irq_stack_end[-1]);
268 printk(" <EOI> "); 268 printk(KERN_CONT " <EOI> ");
269 } 269 }
270 } else { 270 } else {
271 if (((long) stack & (THREAD_SIZE-1)) == 0) 271 if (((long) stack & (THREAD_SIZE-1)) == 0)
272 break; 272 break;
273 } 273 }
274 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 274 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
275 printk("\n%s", log_lvl); 275 printk(KERN_CONT "\n");
276 printk(" %016lx", *stack++); 276 printk(KERN_CONT " %016lx", *stack++);
277 touch_nmi_watchdog(); 277 touch_nmi_watchdog();
278 } 278 }
279 preempt_enable(); 279 preempt_enable();
280 280
281 printk("\n"); 281 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 282 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
283} 283}
284 284
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
deleted file mode 100644
index 0fe27d7c6258..000000000000
--- a/arch/x86/kernel/efi.c
+++ /dev/null
@@ -1,613 +0,0 @@
1/*
2 * Common EFI (Extensible Firmware Interface) support functions
3 * Based on Extensible Firmware Interface Specification version 1.0
4 *
5 * Copyright (C) 1999 VA Linux Systems
6 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
7 * Copyright (C) 1999-2002 Hewlett-Packard Co.
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 * Stephane Eranian <eranian@hpl.hp.com>
10 * Copyright (C) 2005-2008 Intel Co.
11 * Fenghua Yu <fenghua.yu@intel.com>
12 * Bibo Mao <bibo.mao@intel.com>
13 * Chandramouli Narayanan <mouli@linux.intel.com>
14 * Huang Ying <ying.huang@intel.com>
15 *
16 * Copied from efi_32.c to eliminate the duplicated code between EFI
17 * 32/64 support code. --ying 2007-10-26
18 *
19 * All EFI Runtime Services are not implemented yet as EFI only
20 * supports physical mode addressing on SoftSDV. This is to be fixed
21 * in a future version. --drummond 1999-07-20
22 *
23 * Implemented EFI runtime services and virtual mode calls. --davidm
24 *
25 * Goutham Rao: <goutham.rao@intel.com>
26 * Skip non-WB memory and ignore empty memory ranges.
27 */
28
29#include <linux/kernel.h>
30#include <linux/init.h>
31#include <linux/efi.h>
32#include <linux/bootmem.h>
33#include <linux/memblock.h>
34#include <linux/spinlock.h>
35#include <linux/uaccess.h>
36#include <linux/time.h>
37#include <linux/io.h>
38#include <linux/reboot.h>
39#include <linux/bcd.h>
40
41#include <asm/setup.h>
42#include <asm/efi.h>
43#include <asm/time.h>
44#include <asm/cacheflush.h>
45#include <asm/tlbflush.h>
46#include <asm/x86_init.h>
47
48#define EFI_DEBUG 1
49#define PFX "EFI: "
50
51int efi_enabled;
52EXPORT_SYMBOL(efi_enabled);
53
54struct efi efi;
55EXPORT_SYMBOL(efi);
56
57struct efi_memory_map memmap;
58
59static struct efi efi_phys __initdata;
60static efi_system_table_t efi_systab __initdata;
61
62static int __init setup_noefi(char *arg)
63{
64 efi_enabled = 0;
65 return 0;
66}
67early_param("noefi", setup_noefi);
68
69int add_efi_memmap;
70EXPORT_SYMBOL(add_efi_memmap);
71
72static int __init setup_add_efi_memmap(char *arg)
73{
74 add_efi_memmap = 1;
75 return 0;
76}
77early_param("add_efi_memmap", setup_add_efi_memmap);
78
79
80static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
81{
82 return efi_call_virt2(get_time, tm, tc);
83}
84
85static efi_status_t virt_efi_set_time(efi_time_t *tm)
86{
87 return efi_call_virt1(set_time, tm);
88}
89
90static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
91 efi_bool_t *pending,
92 efi_time_t *tm)
93{
94 return efi_call_virt3(get_wakeup_time,
95 enabled, pending, tm);
96}
97
98static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
99{
100 return efi_call_virt2(set_wakeup_time,
101 enabled, tm);
102}
103
104static efi_status_t virt_efi_get_variable(efi_char16_t *name,
105 efi_guid_t *vendor,
106 u32 *attr,
107 unsigned long *data_size,
108 void *data)
109{
110 return efi_call_virt5(get_variable,
111 name, vendor, attr,
112 data_size, data);
113}
114
115static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
116 efi_char16_t *name,
117 efi_guid_t *vendor)
118{
119 return efi_call_virt3(get_next_variable,
120 name_size, name, vendor);
121}
122
123static efi_status_t virt_efi_set_variable(efi_char16_t *name,
124 efi_guid_t *vendor,
125 unsigned long attr,
126 unsigned long data_size,
127 void *data)
128{
129 return efi_call_virt5(set_variable,
130 name, vendor, attr,
131 data_size, data);
132}
133
134static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
135{
136 return efi_call_virt1(get_next_high_mono_count, count);
137}
138
139static void virt_efi_reset_system(int reset_type,
140 efi_status_t status,
141 unsigned long data_size,
142 efi_char16_t *data)
143{
144 efi_call_virt4(reset_system, reset_type, status,
145 data_size, data);
146}
147
148static efi_status_t virt_efi_set_virtual_address_map(
149 unsigned long memory_map_size,
150 unsigned long descriptor_size,
151 u32 descriptor_version,
152 efi_memory_desc_t *virtual_map)
153{
154 return efi_call_virt4(set_virtual_address_map,
155 memory_map_size, descriptor_size,
156 descriptor_version, virtual_map);
157}
158
159static efi_status_t __init phys_efi_set_virtual_address_map(
160 unsigned long memory_map_size,
161 unsigned long descriptor_size,
162 u32 descriptor_version,
163 efi_memory_desc_t *virtual_map)
164{
165 efi_status_t status;
166
167 efi_call_phys_prelog();
168 status = efi_call_phys4(efi_phys.set_virtual_address_map,
169 memory_map_size, descriptor_size,
170 descriptor_version, virtual_map);
171 efi_call_phys_epilog();
172 return status;
173}
174
175static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
176 efi_time_cap_t *tc)
177{
178 efi_status_t status;
179
180 efi_call_phys_prelog();
181 status = efi_call_phys2(efi_phys.get_time, tm, tc);
182 efi_call_phys_epilog();
183 return status;
184}
185
186int efi_set_rtc_mmss(unsigned long nowtime)
187{
188 int real_seconds, real_minutes;
189 efi_status_t status;
190 efi_time_t eft;
191 efi_time_cap_t cap;
192
193 status = efi.get_time(&eft, &cap);
194 if (status != EFI_SUCCESS) {
195 printk(KERN_ERR "Oops: efitime: can't read time!\n");
196 return -1;
197 }
198
199 real_seconds = nowtime % 60;
200 real_minutes = nowtime / 60;
201 if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
202 real_minutes += 30;
203 real_minutes %= 60;
204 eft.minute = real_minutes;
205 eft.second = real_seconds;
206
207 status = efi.set_time(&eft);
208 if (status != EFI_SUCCESS) {
209 printk(KERN_ERR "Oops: efitime: can't write time!\n");
210 return -1;
211 }
212 return 0;
213}
214
215unsigned long efi_get_time(void)
216{
217 efi_status_t status;
218 efi_time_t eft;
219 efi_time_cap_t cap;
220
221 status = efi.get_time(&eft, &cap);
222 if (status != EFI_SUCCESS)
223 printk(KERN_ERR "Oops: efitime: can't read time!\n");
224
225 return mktime(eft.year, eft.month, eft.day, eft.hour,
226 eft.minute, eft.second);
227}
228
229/*
230 * Tell the kernel about the EFI memory map. This might include
231 * more than the max 128 entries that can fit in the e820 legacy
232 * (zeropage) memory map.
233 */
234
235static void __init do_add_efi_memmap(void)
236{
237 void *p;
238
239 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
240 efi_memory_desc_t *md = p;
241 unsigned long long start = md->phys_addr;
242 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
243 int e820_type;
244
245 switch (md->type) {
246 case EFI_LOADER_CODE:
247 case EFI_LOADER_DATA:
248 case EFI_BOOT_SERVICES_CODE:
249 case EFI_BOOT_SERVICES_DATA:
250 case EFI_CONVENTIONAL_MEMORY:
251 if (md->attribute & EFI_MEMORY_WB)
252 e820_type = E820_RAM;
253 else
254 e820_type = E820_RESERVED;
255 break;
256 case EFI_ACPI_RECLAIM_MEMORY:
257 e820_type = E820_ACPI;
258 break;
259 case EFI_ACPI_MEMORY_NVS:
260 e820_type = E820_NVS;
261 break;
262 case EFI_UNUSABLE_MEMORY:
263 e820_type = E820_UNUSABLE;
264 break;
265 default:
266 /*
267 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
268 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
269 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
270 */
271 e820_type = E820_RESERVED;
272 break;
273 }
274 e820_add_region(start, size, e820_type);
275 }
276 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
277}
278
279void __init efi_memblock_x86_reserve_range(void)
280{
281 unsigned long pmap;
282
283#ifdef CONFIG_X86_32
284 pmap = boot_params.efi_info.efi_memmap;
285#else
286 pmap = (boot_params.efi_info.efi_memmap |
287 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
288#endif
289 memmap.phys_map = (void *)pmap;
290 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
291 boot_params.efi_info.efi_memdesc_size;
292 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
293 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
294 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
295 "EFI memmap");
296}
297
298#if EFI_DEBUG
299static void __init print_efi_memmap(void)
300{
301 efi_memory_desc_t *md;
302 void *p;
303 int i;
304
305 for (p = memmap.map, i = 0;
306 p < memmap.map_end;
307 p += memmap.desc_size, i++) {
308 md = p;
309 printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
310 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
311 i, md->type, md->attribute, md->phys_addr,
312 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
313 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
314 }
315}
316#endif /* EFI_DEBUG */
317
318void __init efi_init(void)
319{
320 efi_config_table_t *config_tables;
321 efi_runtime_services_t *runtime;
322 efi_char16_t *c16;
323 char vendor[100] = "unknown";
324 int i = 0;
325 void *tmp;
326
327#ifdef CONFIG_X86_32
328 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
329#else
330 efi_phys.systab = (efi_system_table_t *)
331 (boot_params.efi_info.efi_systab |
332 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
333#endif
334
335 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
336 sizeof(efi_system_table_t));
337 if (efi.systab == NULL)
338 printk(KERN_ERR "Couldn't map the EFI system table!\n");
339 memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
340 early_iounmap(efi.systab, sizeof(efi_system_table_t));
341 efi.systab = &efi_systab;
342
343 /*
344 * Verify the EFI Table
345 */
346 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
347 printk(KERN_ERR "EFI system table signature incorrect!\n");
348 if ((efi.systab->hdr.revision >> 16) == 0)
349 printk(KERN_ERR "Warning: EFI system table version "
350 "%d.%02d, expected 1.00 or greater!\n",
351 efi.systab->hdr.revision >> 16,
352 efi.systab->hdr.revision & 0xffff);
353
354 /*
355 * Show what we know for posterity
356 */
357 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
358 if (c16) {
359 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
360 vendor[i] = *c16++;
361 vendor[i] = '\0';
362 } else
363 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
364 early_iounmap(tmp, 2);
365
366 printk(KERN_INFO "EFI v%u.%.02u by %s\n",
367 efi.systab->hdr.revision >> 16,
368 efi.systab->hdr.revision & 0xffff, vendor);
369
370 /*
371 * Let's see what config tables the firmware passed to us.
372 */
373 config_tables = early_ioremap(
374 efi.systab->tables,
375 efi.systab->nr_tables * sizeof(efi_config_table_t));
376 if (config_tables == NULL)
377 printk(KERN_ERR "Could not map EFI Configuration Table!\n");
378
379 printk(KERN_INFO);
380 for (i = 0; i < efi.systab->nr_tables; i++) {
381 if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
382 efi.mps = config_tables[i].table;
383 printk(" MPS=0x%lx ", config_tables[i].table);
384 } else if (!efi_guidcmp(config_tables[i].guid,
385 ACPI_20_TABLE_GUID)) {
386 efi.acpi20 = config_tables[i].table;
387 printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
388 } else if (!efi_guidcmp(config_tables[i].guid,
389 ACPI_TABLE_GUID)) {
390 efi.acpi = config_tables[i].table;
391 printk(" ACPI=0x%lx ", config_tables[i].table);
392 } else if (!efi_guidcmp(config_tables[i].guid,
393 SMBIOS_TABLE_GUID)) {
394 efi.smbios = config_tables[i].table;
395 printk(" SMBIOS=0x%lx ", config_tables[i].table);
396#ifdef CONFIG_X86_UV
397 } else if (!efi_guidcmp(config_tables[i].guid,
398 UV_SYSTEM_TABLE_GUID)) {
399 efi.uv_systab = config_tables[i].table;
400 printk(" UVsystab=0x%lx ", config_tables[i].table);
401#endif
402 } else if (!efi_guidcmp(config_tables[i].guid,
403 HCDP_TABLE_GUID)) {
404 efi.hcdp = config_tables[i].table;
405 printk(" HCDP=0x%lx ", config_tables[i].table);
406 } else if (!efi_guidcmp(config_tables[i].guid,
407 UGA_IO_PROTOCOL_GUID)) {
408 efi.uga = config_tables[i].table;
409 printk(" UGA=0x%lx ", config_tables[i].table);
410 }
411 }
412 printk("\n");
413 early_iounmap(config_tables,
414 efi.systab->nr_tables * sizeof(efi_config_table_t));
415
416 /*
417 * Check out the runtime services table. We need to map
418 * the runtime services table so that we can grab the physical
419 * address of several of the EFI runtime functions, needed to
420 * set the firmware into virtual mode.
421 */
422 runtime = early_ioremap((unsigned long)efi.systab->runtime,
423 sizeof(efi_runtime_services_t));
424 if (runtime != NULL) {
425 /*
426 * We will only need *early* access to the following
427 * two EFI runtime services before set_virtual_address_map
428 * is invoked.
429 */
430 efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
431 efi_phys.set_virtual_address_map =
432 (efi_set_virtual_address_map_t *)
433 runtime->set_virtual_address_map;
434 /*
435 * Make efi_get_time can be called before entering
436 * virtual mode.
437 */
438 efi.get_time = phys_efi_get_time;
439 } else
440 printk(KERN_ERR "Could not map the EFI runtime service "
441 "table!\n");
442 early_iounmap(runtime, sizeof(efi_runtime_services_t));
443
444 /* Map the EFI memory map */
445 memmap.map = early_ioremap((unsigned long)memmap.phys_map,
446 memmap.nr_map * memmap.desc_size);
447 if (memmap.map == NULL)
448 printk(KERN_ERR "Could not map the EFI memory map!\n");
449 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
450
451 if (memmap.desc_size != sizeof(efi_memory_desc_t))
452 printk(KERN_WARNING
453 "Kernel-defined memdesc doesn't match the one from EFI!\n");
454
455 if (add_efi_memmap)
456 do_add_efi_memmap();
457
458#ifdef CONFIG_X86_32
459 x86_platform.get_wallclock = efi_get_time;
460 x86_platform.set_wallclock = efi_set_rtc_mmss;
461#endif
462
463 /* Setup for EFI runtime service */
464 reboot_type = BOOT_EFI;
465
466#if EFI_DEBUG
467 print_efi_memmap();
468#endif
469}
470
471static void __init runtime_code_page_mkexec(void)
472{
473 efi_memory_desc_t *md;
474 void *p;
475 u64 addr, npages;
476
477 /* Make EFI runtime service code area executable */
478 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
479 md = p;
480
481 if (md->type != EFI_RUNTIME_SERVICES_CODE)
482 continue;
483
484 addr = md->virt_addr;
485 npages = md->num_pages;
486 memrange_efi_to_native(&addr, &npages);
487 set_memory_x(addr, npages);
488 }
489}
490
491/*
492 * This function will switch the EFI runtime services to virtual mode.
493 * Essentially, look through the EFI memmap and map every region that
494 * has the runtime attribute bit set in its memory descriptor and update
495 * that memory descriptor with the virtual address obtained from ioremap().
496 * This enables the runtime services to be called without having to
497 * thunk back into physical mode for every invocation.
498 */
499void __init efi_enter_virtual_mode(void)
500{
501 efi_memory_desc_t *md;
502 efi_status_t status;
503 unsigned long size;
504 u64 end, systab, addr, npages, end_pfn;
505 void *p, *va;
506
507 efi.systab = NULL;
508 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
509 md = p;
510 if (!(md->attribute & EFI_MEMORY_RUNTIME))
511 continue;
512
513 size = md->num_pages << EFI_PAGE_SHIFT;
514 end = md->phys_addr + size;
515
516 end_pfn = PFN_UP(end);
517 if (end_pfn <= max_low_pfn_mapped
518 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
519 && end_pfn <= max_pfn_mapped))
520 va = __va(md->phys_addr);
521 else
522 va = efi_ioremap(md->phys_addr, size, md->type);
523
524 md->virt_addr = (u64) (unsigned long) va;
525
526 if (!va) {
527 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
528 (unsigned long long)md->phys_addr);
529 continue;
530 }
531
532 if (!(md->attribute & EFI_MEMORY_WB)) {
533 addr = md->virt_addr;
534 npages = md->num_pages;
535 memrange_efi_to_native(&addr, &npages);
536 set_memory_uc(addr, npages);
537 }
538
539 systab = (u64) (unsigned long) efi_phys.systab;
540 if (md->phys_addr <= systab && systab < end) {
541 systab += md->virt_addr - md->phys_addr;
542 efi.systab = (efi_system_table_t *) (unsigned long) systab;
543 }
544 }
545
546 BUG_ON(!efi.systab);
547
548 status = phys_efi_set_virtual_address_map(
549 memmap.desc_size * memmap.nr_map,
550 memmap.desc_size,
551 memmap.desc_version,
552 memmap.phys_map);
553
554 if (status != EFI_SUCCESS) {
555 printk(KERN_ALERT "Unable to switch EFI into virtual mode "
556 "(status=%lx)!\n", status);
557 panic("EFI call to SetVirtualAddressMap() failed!");
558 }
559
560 /*
561 * Now that EFI is in virtual mode, update the function
562 * pointers in the runtime service table to the new virtual addresses.
563 *
564 * Call EFI services through wrapper functions.
565 */
566 efi.get_time = virt_efi_get_time;
567 efi.set_time = virt_efi_set_time;
568 efi.get_wakeup_time = virt_efi_get_wakeup_time;
569 efi.set_wakeup_time = virt_efi_set_wakeup_time;
570 efi.get_variable = virt_efi_get_variable;
571 efi.get_next_variable = virt_efi_get_next_variable;
572 efi.set_variable = virt_efi_set_variable;
573 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
574 efi.reset_system = virt_efi_reset_system;
575 efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
576 if (__supported_pte_mask & _PAGE_NX)
577 runtime_code_page_mkexec();
578 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
579 memmap.map = NULL;
580}
581
582/*
583 * Convenience functions to obtain memory types and attributes
584 */
585u32 efi_mem_type(unsigned long phys_addr)
586{
587 efi_memory_desc_t *md;
588 void *p;
589
590 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
591 md = p;
592 if ((md->phys_addr <= phys_addr) &&
593 (phys_addr < (md->phys_addr +
594 (md->num_pages << EFI_PAGE_SHIFT))))
595 return md->type;
596 }
597 return 0;
598}
599
600u64 efi_mem_attributes(unsigned long phys_addr)
601{
602 efi_memory_desc_t *md;
603 void *p;
604
605 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
606 md = p;
607 if ((md->phys_addr <= phys_addr) &&
608 (phys_addr < (md->phys_addr +
609 (md->num_pages << EFI_PAGE_SHIFT))))
610 return md->attribute;
611 }
612 return 0;
613}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
deleted file mode 100644
index 5cab48ee61a4..000000000000
--- a/arch/x86/kernel/efi_32.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * Extensible Firmware Interface
3 *
4 * Based on Extensible Firmware Interface Specification version 1.0
5 *
6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 * Copyright (C) 1999-2002 Hewlett-Packard Co.
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 *
12 * All EFI Runtime Services are not implemented yet as EFI only
13 * supports physical mode addressing on SoftSDV. This is to be fixed
14 * in a future version. --drummond 1999-07-20
15 *
16 * Implemented EFI runtime services and virtual mode calls. --davidm
17 *
18 * Goutham Rao: <goutham.rao@intel.com>
19 * Skip non-WB memory and ignore empty memory ranges.
20 */
21
22#include <linux/kernel.h>
23#include <linux/types.h>
24#include <linux/ioport.h>
25#include <linux/efi.h>
26
27#include <asm/io.h>
28#include <asm/page.h>
29#include <asm/pgtable.h>
30#include <asm/tlbflush.h>
31#include <asm/efi.h>
32
33/*
34 * To make EFI call EFI runtime service in physical addressing mode we need
35 * prelog/epilog before/after the invocation to disable interrupt, to
36 * claim EFI runtime service handler exclusively and to duplicate a memory in
37 * low memory space say 0 - 3G.
38 */
39
40static unsigned long efi_rt_eflags;
41static pgd_t efi_bak_pg_dir_pointer[2];
42
43void efi_call_phys_prelog(void)
44{
45 unsigned long cr4;
46 unsigned long temp;
47 struct desc_ptr gdt_descr;
48
49 local_irq_save(efi_rt_eflags);
50
51 /*
52 * If I don't have PAE, I should just duplicate two entries in page
53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory.
55 */
56 cr4 = read_cr4_safe();
57
58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd =
60 swapper_pg_dir[pgd_index(0)].pgd;
61 swapper_pg_dir[0].pgd =
62 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
63 } else {
64 efi_bak_pg_dir_pointer[0].pgd =
65 swapper_pg_dir[pgd_index(0)].pgd;
66 efi_bak_pg_dir_pointer[1].pgd =
67 swapper_pg_dir[pgd_index(0x400000)].pgd;
68 swapper_pg_dir[pgd_index(0)].pgd =
69 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
70 temp = PAGE_OFFSET + 0x400000;
71 swapper_pg_dir[pgd_index(0x400000)].pgd =
72 swapper_pg_dir[pgd_index(temp)].pgd;
73 }
74
75 /*
76 * After the lock is released, the original page table is restored.
77 */
78 __flush_tlb_all();
79
80 gdt_descr.address = __pa(get_cpu_gdt_table(0));
81 gdt_descr.size = GDT_SIZE - 1;
82 load_gdt(&gdt_descr);
83}
84
85void efi_call_phys_epilog(void)
86{
87 unsigned long cr4;
88 struct desc_ptr gdt_descr;
89
90 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
91 gdt_descr.size = GDT_SIZE - 1;
92 load_gdt(&gdt_descr);
93
94 cr4 = read_cr4_safe();
95
96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd =
98 efi_bak_pg_dir_pointer[0].pgd;
99 } else {
100 swapper_pg_dir[pgd_index(0)].pgd =
101 efi_bak_pg_dir_pointer[0].pgd;
102 swapper_pg_dir[pgd_index(0x400000)].pgd =
103 efi_bak_pg_dir_pointer[1].pgd;
104 }
105
106 /*
107 * After the lock is released, the original page table is restored.
108 */
109 __flush_tlb_all();
110
111 local_irq_restore(efi_rt_eflags);
112}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
deleted file mode 100644
index ac0621a7ac3d..000000000000
--- a/arch/x86/kernel/efi_64.c
+++ /dev/null
@@ -1,114 +0,0 @@
1/*
2 * x86_64 specific EFI support functions
3 * Based on Extensible Firmware Interface Specification version 1.0
4 *
5 * Copyright (C) 2005-2008 Intel Co.
6 * Fenghua Yu <fenghua.yu@intel.com>
7 * Bibo Mao <bibo.mao@intel.com>
8 * Chandramouli Narayanan <mouli@linux.intel.com>
9 * Huang Ying <ying.huang@intel.com>
10 *
11 * Code to convert EFI to E820 map has been implemented in elilo bootloader
12 * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
13 * is setup appropriately for EFI runtime code.
14 * - mouli 06/14/2007.
15 *
16 */
17
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/mm.h>
21#include <linux/types.h>
22#include <linux/spinlock.h>
23#include <linux/bootmem.h>
24#include <linux/ioport.h>
25#include <linux/module.h>
26#include <linux/efi.h>
27#include <linux/uaccess.h>
28#include <linux/io.h>
29#include <linux/reboot.h>
30
31#include <asm/setup.h>
32#include <asm/page.h>
33#include <asm/e820.h>
34#include <asm/pgtable.h>
35#include <asm/tlbflush.h>
36#include <asm/proto.h>
37#include <asm/efi.h>
38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
40
41static pgd_t save_pgd __initdata;
42static unsigned long efi_flags __initdata;
43
44static void __init early_mapping_set_exec(unsigned long start,
45 unsigned long end,
46 int executable)
47{
48 unsigned long num_pages;
49
50 start &= PMD_MASK;
51 end = (end + PMD_SIZE - 1) & PMD_MASK;
52 num_pages = (end - start) >> PAGE_SHIFT;
53 if (executable)
54 set_memory_x((unsigned long)__va(start), num_pages);
55 else
56 set_memory_nx((unsigned long)__va(start), num_pages);
57}
58
59static void __init early_runtime_code_mapping_set_exec(int executable)
60{
61 efi_memory_desc_t *md;
62 void *p;
63
64 if (!(__supported_pte_mask & _PAGE_NX))
65 return;
66
67 /* Make EFI runtime service code area executable */
68 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
69 md = p;
70 if (md->type == EFI_RUNTIME_SERVICES_CODE) {
71 unsigned long end;
72 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
73 early_mapping_set_exec(md->phys_addr, end, executable);
74 }
75 }
76}
77
78void __init efi_call_phys_prelog(void)
79{
80 unsigned long vaddress;
81
82 early_runtime_code_mapping_set_exec(1);
83 local_irq_save(efi_flags);
84 vaddress = (unsigned long)__va(0x0UL);
85 save_pgd = *pgd_offset_k(0x0UL);
86 set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
87 __flush_tlb_all();
88}
89
90void __init efi_call_phys_epilog(void)
91{
92 /*
93 * After the lock is released, the original page table is restored.
94 */
95 set_pgd(pgd_offset_k(0x0UL), save_pgd);
96 __flush_tlb_all();
97 local_irq_restore(efi_flags);
98 early_runtime_code_mapping_set_exec(0);
99}
100
101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
102 u32 type)
103{
104 unsigned long last_map_pfn;
105
106 if (type == EFI_MEMORY_MAPPED_IO)
107 return ioremap(phys_addr, size);
108
109 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
110 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
111 return NULL;
112
113 return (void __iomem *)__va(phys_addr);
114}
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
deleted file mode 100644
index fbe66e626c09..000000000000
--- a/arch/x86/kernel/efi_stub_32.S
+++ /dev/null
@@ -1,123 +0,0 @@
1/*
2 * EFI call stub for IA32.
3 *
4 * This stub allows us to make EFI calls in physical mode with interrupts
5 * turned off.
6 */
7
8#include <linux/linkage.h>
9#include <asm/page_types.h>
10
11/*
12 * efi_call_phys(void *, ...) is a function with variable parameters.
13 * All the callers of this function assure that all the parameters are 4-bytes.
14 */
15
16/*
17 * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
18 * So we'd better save all of them at the beginning of this function and restore
19 * at the end no matter how many we use, because we can not assure EFI runtime
20 * service functions will comply with gcc calling convention, too.
21 */
22
23.text
24ENTRY(efi_call_phys)
25 /*
26 * 0. The function can only be called in Linux kernel. So CS has been
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog.
31 */
32
33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and
37 * epilog.
38 */
39 movl $1f, %edx
40 subl $__PAGE_OFFSET, %edx
41 jmp *%edx
421:
43
44 /*
45 * 2. Now on the top of stack is the return
46 * address in the caller of efi_call_phys(), then parameter 1,
47 * parameter 2, ..., param n. To make things easy, we save the return
48 * address of efi_call_phys in a global variable.
49 */
50 popl %edx
51 movl %edx, saved_return_addr
52 /* get the function pointer into ECX*/
53 popl %ecx
54 movl %ecx, efi_rt_function_ptr
55 movl $2f, %edx
56 subl $__PAGE_OFFSET, %edx
57 pushl %edx
58
59 /*
60 * 3. Clear PG bit in %CR0.
61 */
62 movl %cr0, %edx
63 andl $0x7fffffff, %edx
64 movl %edx, %cr0
65 jmp 1f
661:
67
68 /*
69 * 4. Adjust stack pointer.
70 */
71 subl $__PAGE_OFFSET, %esp
72
73 /*
74 * 5. Call the physical function.
75 */
76 jmp *%ecx
77
782:
79 /*
80 * 6. After EFI runtime service returns, control will return to
81 * following instruction. We'd better readjust stack pointer first.
82 */
83 addl $__PAGE_OFFSET, %esp
84
85 /*
86 * 7. Restore PG bit
87 */
88 movl %cr0, %edx
89 orl $0x80000000, %edx
90 movl %edx, %cr0
91 jmp 1f
921:
93 /*
94 * 8. Now restore the virtual mode from flat mode by
95 * adding EIP with PAGE_OFFSET.
96 */
97 movl $1f, %edx
98 jmp *%edx
991:
100
101 /*
102 * 9. Balance the stack. And because EAX contain the return value,
103 * we'd better not clobber it.
104 */
105 leal efi_rt_function_ptr, %edx
106 movl (%edx), %ecx
107 pushl %ecx
108
109 /*
110 * 10. Push the saved return address onto the stack and return.
111 */
112 leal saved_return_addr, %edx
113 movl (%edx), %ecx
114 pushl %ecx
115 ret
116ENDPROC(efi_call_phys)
117.previous
118
119.data
120saved_return_addr:
121 .long 0
122efi_rt_function_ptr:
123 .long 0
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
deleted file mode 100644
index 4c07ccab8146..000000000000
--- a/arch/x86/kernel/efi_stub_64.S
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Function calling ABI conversion from Linux to EFI for x86_64
3 *
4 * Copyright (C) 2007 Intel Corp
5 * Bibo Mao <bibo.mao@intel.com>
6 * Huang Ying <ying.huang@intel.com>
7 */
8
9#include <linux/linkage.h>
10
11#define SAVE_XMM \
12 mov %rsp, %rax; \
13 subq $0x70, %rsp; \
14 and $~0xf, %rsp; \
15 mov %rax, (%rsp); \
16 mov %cr0, %rax; \
17 clts; \
18 mov %rax, 0x8(%rsp); \
19 movaps %xmm0, 0x60(%rsp); \
20 movaps %xmm1, 0x50(%rsp); \
21 movaps %xmm2, 0x40(%rsp); \
22 movaps %xmm3, 0x30(%rsp); \
23 movaps %xmm4, 0x20(%rsp); \
24 movaps %xmm5, 0x10(%rsp)
25
26#define RESTORE_XMM \
27 movaps 0x60(%rsp), %xmm0; \
28 movaps 0x50(%rsp), %xmm1; \
29 movaps 0x40(%rsp), %xmm2; \
30 movaps 0x30(%rsp), %xmm3; \
31 movaps 0x20(%rsp), %xmm4; \
32 movaps 0x10(%rsp), %xmm5; \
33 mov 0x8(%rsp), %rsi; \
34 mov %rsi, %cr0; \
35 mov (%rsp), %rsp
36
37ENTRY(efi_call0)
38 SAVE_XMM
39 subq $32, %rsp
40 call *%rdi
41 addq $32, %rsp
42 RESTORE_XMM
43 ret
44ENDPROC(efi_call0)
45
46ENTRY(efi_call1)
47 SAVE_XMM
48 subq $32, %rsp
49 mov %rsi, %rcx
50 call *%rdi
51 addq $32, %rsp
52 RESTORE_XMM
53 ret
54ENDPROC(efi_call1)
55
56ENTRY(efi_call2)
57 SAVE_XMM
58 subq $32, %rsp
59 mov %rsi, %rcx
60 call *%rdi
61 addq $32, %rsp
62 RESTORE_XMM
63 ret
64ENDPROC(efi_call2)
65
66ENTRY(efi_call3)
67 SAVE_XMM
68 subq $32, %rsp
69 mov %rcx, %r8
70 mov %rsi, %rcx
71 call *%rdi
72 addq $32, %rsp
73 RESTORE_XMM
74 ret
75ENDPROC(efi_call3)
76
77ENTRY(efi_call4)
78 SAVE_XMM
79 subq $32, %rsp
80 mov %r8, %r9
81 mov %rcx, %r8
82 mov %rsi, %rcx
83 call *%rdi
84 addq $32, %rsp
85 RESTORE_XMM
86 ret
87ENDPROC(efi_call4)
88
89ENTRY(efi_call5)
90 SAVE_XMM
91 subq $48, %rsp
92 mov %r9, 32(%rsp)
93 mov %r8, %r9
94 mov %rcx, %r8
95 mov %rsi, %rcx
96 call *%rdi
97 addq $48, %rsp
98 RESTORE_XMM
99 ret
100ENDPROC(efi_call5)
101
102ENTRY(efi_call6)
103 SAVE_XMM
104 mov (%rsp), %rax
105 mov 8(%rax), %rax
106 subq $48, %rsp
107 mov %r9, 32(%rsp)
108 mov %rax, 40(%rsp)
109 mov %r8, %r9
110 mov %rcx, %r8
111 mov %rsi, %rcx
112 call *%rdi
113 addq $48, %rsp
114 RESTORE_XMM
115 ret
116ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9fb188d7bc76..59e175e89599 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -382,20 +382,20 @@ sysenter_past_esp:
382 * enough kernel state to call TRACE_IRQS_OFF can be called - but 382 * enough kernel state to call TRACE_IRQS_OFF can be called - but
383 * we immediately enable interrupts at that point anyway. 383 * we immediately enable interrupts at that point anyway.
384 */ 384 */
385 pushl_cfi $(__USER_DS) 385 pushl_cfi $__USER_DS
386 /*CFI_REL_OFFSET ss, 0*/ 386 /*CFI_REL_OFFSET ss, 0*/
387 pushl_cfi %ebp 387 pushl_cfi %ebp
388 CFI_REL_OFFSET esp, 0 388 CFI_REL_OFFSET esp, 0
389 pushfl_cfi 389 pushfl_cfi
390 orl $X86_EFLAGS_IF, (%esp) 390 orl $X86_EFLAGS_IF, (%esp)
391 pushl_cfi $(__USER_CS) 391 pushl_cfi $__USER_CS
392 /*CFI_REL_OFFSET cs, 0*/ 392 /*CFI_REL_OFFSET cs, 0*/
393 /* 393 /*
394 * Push current_thread_info()->sysenter_return to the stack. 394 * Push current_thread_info()->sysenter_return to the stack.
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 397 */
398 pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 398 pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
400 400
401 pushl_cfi %eax 401 pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a7ae7fd1010f..fe2690d71c0c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -963,22 +963,10 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
963 x86_platform_ipi smp_x86_platform_ipi 963 x86_platform_ipi smp_x86_platform_ipi
964 964
965#ifdef CONFIG_SMP 965#ifdef CONFIG_SMP
966apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ 966.irpc idx, "01234567"
967 invalidate_interrupt0 smp_invalidate_interrupt 967apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
968apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ 968 invalidate_interrupt\idx smp_invalidate_interrupt
969 invalidate_interrupt1 smp_invalidate_interrupt 969.endr
970apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
971 invalidate_interrupt2 smp_invalidate_interrupt
972apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
973 invalidate_interrupt3 smp_invalidate_interrupt
974apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
975 invalidate_interrupt4 smp_invalidate_interrupt
976apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
977 invalidate_interrupt5 smp_invalidate_interrupt
978apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
979 invalidate_interrupt6 smp_invalidate_interrupt
980apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
981 invalidate_interrupt7 smp_invalidate_interrupt
982#endif 970#endif
983 971
984apicinterrupt THRESHOLD_APIC_VECTOR \ 972apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 9a6ca2392170..763310165fa0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/bios_ebda.h> 20#include <asm/bios_ebda.h>
21#include <asm/tlbflush.h>
21 22
22static void __init i386_default_early_setup(void) 23static void __init i386_default_early_setup(void)
23{ 24{
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fa8c1b8e09fb..bcece91dd311 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -183,13 +183,12 @@ default_entry:
183#ifdef CONFIG_X86_PAE 183#ifdef CONFIG_X86_PAE
184 184
185 /* 185 /*
186 * In PAE mode swapper_pg_dir is statically defined to contain enough 186 * In PAE mode initial_page_table is statically defined to contain
187 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 187 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
188 * entries). The identity mapping is handled by pointing two PGD 188 * entries). The identity mapping is handled by pointing two PGD entries
189 * entries to the first kernel PMD. 189 * to the first kernel PMD.
190 * 190 *
191 * Note the upper half of each PMD or PTE are always zero at 191 * Note the upper half of each PMD or PTE are always zero at this stage.
192 * this stage.
193 */ 192 */
194 193
195#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ 194#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
@@ -197,7 +196,7 @@ default_entry:
197 xorl %ebx,%ebx /* %ebx is kept at zero */ 196 xorl %ebx,%ebx /* %ebx is kept at zero */
198 197
199 movl $pa(__brk_base), %edi 198 movl $pa(__brk_base), %edi
200 movl $pa(swapper_pg_pmd), %edx 199 movl $pa(initial_pg_pmd), %edx
201 movl $PTE_IDENT_ATTR, %eax 200 movl $PTE_IDENT_ATTR, %eax
20210: 20110:
203 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ 202 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
@@ -226,14 +225,14 @@ default_entry:
226 movl %eax, pa(max_pfn_mapped) 225 movl %eax, pa(max_pfn_mapped)
227 226
228 /* Do early initialization of the fixmap area */ 227 /* Do early initialization of the fixmap area */
229 movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax 228 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
230 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) 229 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
231#else /* Not PAE */ 230#else /* Not PAE */
232 231
233page_pde_offset = (__PAGE_OFFSET >> 20); 232page_pde_offset = (__PAGE_OFFSET >> 20);
234 233
235 movl $pa(__brk_base), %edi 234 movl $pa(__brk_base), %edi
236 movl $pa(swapper_pg_dir), %edx 235 movl $pa(initial_page_table), %edx
237 movl $PTE_IDENT_ATTR, %eax 236 movl $PTE_IDENT_ATTR, %eax
23810: 23710:
239 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ 238 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
@@ -257,8 +256,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
257 movl %eax, pa(max_pfn_mapped) 256 movl %eax, pa(max_pfn_mapped)
258 257
259 /* Do early initialization of the fixmap area */ 258 /* Do early initialization of the fixmap area */
260 movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax 259 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
261 movl %eax,pa(swapper_pg_dir+0xffc) 260 movl %eax,pa(initial_page_table+0xffc)
262#endif 261#endif
263 jmp 3f 262 jmp 3f
264/* 263/*
@@ -334,7 +333,7 @@ ENTRY(startup_32_smp)
334/* 333/*
335 * Enable paging 334 * Enable paging
336 */ 335 */
337 movl pa(initial_page_table), %eax 336 movl $pa(initial_page_table), %eax
338 movl %eax,%cr3 /* set the page table pointer.. */ 337 movl %eax,%cr3 /* set the page table pointer.. */
339 movl %cr0,%eax 338 movl %cr0,%eax
340 orl $X86_CR0_PG,%eax 339 orl $X86_CR0_PG,%eax
@@ -614,8 +613,6 @@ ignore_int:
614.align 4 613.align 4
615ENTRY(initial_code) 614ENTRY(initial_code)
616 .long i386_start_kernel 615 .long i386_start_kernel
617ENTRY(initial_page_table)
618 .long pa(swapper_pg_dir)
619 616
620/* 617/*
621 * BSS section 618 * BSS section
@@ -623,20 +620,18 @@ ENTRY(initial_page_table)
623__PAGE_ALIGNED_BSS 620__PAGE_ALIGNED_BSS
624 .align PAGE_SIZE_asm 621 .align PAGE_SIZE_asm
625#ifdef CONFIG_X86_PAE 622#ifdef CONFIG_X86_PAE
626swapper_pg_pmd: 623initial_pg_pmd:
627 .fill 1024*KPMDS,4,0 624 .fill 1024*KPMDS,4,0
628#else 625#else
629ENTRY(swapper_pg_dir) 626ENTRY(initial_page_table)
630 .fill 1024,4,0 627 .fill 1024,4,0
631#endif 628#endif
632swapper_pg_fixmap: 629initial_pg_fixmap:
633 .fill 1024,4,0 630 .fill 1024,4,0
634#ifdef CONFIG_X86_TRAMPOLINE
635ENTRY(trampoline_pg_dir)
636 .fill 1024,4,0
637#endif
638ENTRY(empty_zero_page) 631ENTRY(empty_zero_page)
639 .fill 4096,1,0 632 .fill 4096,1,0
633ENTRY(swapper_pg_dir)
634 .fill 1024,4,0
640 635
641/* 636/*
642 * This starts the data section. 637 * This starts the data section.
@@ -645,20 +640,20 @@ ENTRY(empty_zero_page)
645__PAGE_ALIGNED_DATA 640__PAGE_ALIGNED_DATA
646 /* Page-aligned for the benefit of paravirt? */ 641 /* Page-aligned for the benefit of paravirt? */
647 .align PAGE_SIZE_asm 642 .align PAGE_SIZE_asm
648ENTRY(swapper_pg_dir) 643ENTRY(initial_page_table)
649 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 644 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
650# if KPMDS == 3 645# if KPMDS == 3
651 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 646 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
652 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 647 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
653 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 648 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0
654# elif KPMDS == 2 649# elif KPMDS == 2
655 .long 0,0 650 .long 0,0
656 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 651 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
657 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 652 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
658# elif KPMDS == 1 653# elif KPMDS == 1
659 .long 0,0 654 .long 0,0
660 .long 0,0 655 .long 0,0
661 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 656 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
662# else 657# else
663# error "Kernel PMDs should be 1, 2 or 3" 658# error "Kernel PMDs should be 1, 2 or 3"
664# endif 659# endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index efaf906daf93..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -380,44 +380,35 @@ static int hpet_next_event(unsigned long delta,
380 struct clock_event_device *evt, int timer) 380 struct clock_event_device *evt, int timer)
381{ 381{
382 u32 cnt; 382 u32 cnt;
383 s32 res;
383 384
384 cnt = hpet_readl(HPET_COUNTER); 385 cnt = hpet_readl(HPET_COUNTER);
385 cnt += (u32) delta; 386 cnt += (u32) delta;
386 hpet_writel(cnt, HPET_Tn_CMP(timer)); 387 hpet_writel(cnt, HPET_Tn_CMP(timer));
387 388
388 /* 389 /*
389 * We need to read back the CMP register on certain HPET 390 * HPETs are a complete disaster. The compare register is
390 * implementations (ATI chipsets) which seem to delay the 391 * based on a equal comparison and neither provides a less
391 * transfer of the compare register into the internal compare 392 * than or equal functionality (which would require to take
392 * logic. With small deltas this might actually be too late as 393 * the wraparound into account) nor a simple count down event
393 * the counter could already be higher than the compare value 394 * mode. Further the write to the comparator register is
394 * at that point and we would wait for the next hpet interrupt 395 * delayed internally up to two HPET clock cycles in certain
395 * forever. We found out that reading the CMP register back 396 * chipsets (ATI, ICH9,10). We worked around that by reading
396 * forces the transfer so we can rely on the comparison with 397 * back the compare register, but that required another
397 * the counter register below. If the read back from the 398 * workaround for ICH9,10 chips where the first readout after
398 * compare register does not match the value we programmed 399 * write can return the old stale value. We already have a
399 * then we might have a real hardware problem. We can not do 400 * minimum delta of 5us enforced, but a NMI or SMI hitting
400 * much about it here, but at least alert the user/admin with 401 * between the counter readout and the comparator write can
401 * a prominent warning. 402 * move us behind that point easily. Now instead of reading
402 * 403 * the compare register back several times, we make the ETIME
403 * An erratum on some chipsets (ICH9,..), results in 404 * decision based on the following: Return ETIME if the
404 * comparator read immediately following a write returning old 405 * counter value after the write is less than 8 HPET cycles
405 * value. Workaround for this is to read this value second 406 * away from the event or if the counter is already ahead of
406 * time, when first read returns old value. 407 * the event.
407 *
408 * In fact the write to the comparator register is delayed up
409 * to two HPET cycles so the workaround we tried to restrict
410 * the readback to those known to be borked ATI chipsets
411 * failed miserably. So we give up on optimizations forever
412 * and penalize all HPET incarnations unconditionally.
413 */ 408 */
414 if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { 409 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
415 if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
416 printk_once(KERN_WARNING
417 "hpet: compare register read back failed.\n");
418 }
419 410
420 return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 411 return res < 8 ? -ETIME : 0;
421} 412}
422 413
423static void hpet_legacy_set_mode(enum clock_event_mode mode, 414static void hpet_legacy_set_mode(enum clock_event_mode mode,
@@ -722,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
722 713
723 switch (action & 0xf) { 714 switch (action & 0xf) {
724 case CPU_ONLINE: 715 case CPU_ONLINE:
725 INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); 716 INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
726 init_completion(&work.complete); 717 init_completion(&work.complete);
727 /* FIXME: add schedule_work_on() */ 718 /* FIXME: add schedule_work_on() */
728 schedule_delayed_work_on(cpu, &work.work, 0); 719 schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 10709f29d166..96656f207751 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -17,6 +17,7 @@
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/mm.h>
20 21
21#include <asm/apic.h> 22#include <asm/apic.h>
22 23
@@ -49,21 +50,17 @@ static inline int check_stack_overflow(void) { return 0; }
49static inline void print_stack_overflow(void) { } 50static inline void print_stack_overflow(void) { }
50#endif 51#endif
51 52
52#ifdef CONFIG_4KSTACKS
53/* 53/*
54 * per-CPU IRQ handling contexts (thread information and stack) 54 * per-CPU IRQ handling contexts (thread information and stack)
55 */ 55 */
56union irq_ctx { 56union irq_ctx {
57 struct thread_info tinfo; 57 struct thread_info tinfo;
58 u32 stack[THREAD_SIZE/sizeof(u32)]; 58 u32 stack[THREAD_SIZE/sizeof(u32)];
59} __attribute__((aligned(PAGE_SIZE))); 59} __attribute__((aligned(THREAD_SIZE)));
60 60
61static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); 61static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
62static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); 62static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
63 63
64static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
65static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
66
67static void call_on_stack(void *func, void *stack) 64static void call_on_stack(void *func, void *stack)
68{ 65{
69 asm volatile("xchgl %%ebx,%%esp \n" 66 asm volatile("xchgl %%ebx,%%esp \n"
@@ -129,7 +126,9 @@ void __cpuinit irq_ctx_init(int cpu)
129 if (per_cpu(hardirq_ctx, cpu)) 126 if (per_cpu(hardirq_ctx, cpu))
130 return; 127 return;
131 128
132 irqctx = &per_cpu(hardirq_stack, cpu); 129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
130 THREAD_FLAGS,
131 THREAD_ORDER));
133 irqctx->tinfo.task = NULL; 132 irqctx->tinfo.task = NULL;
134 irqctx->tinfo.exec_domain = NULL; 133 irqctx->tinfo.exec_domain = NULL;
135 irqctx->tinfo.cpu = cpu; 134 irqctx->tinfo.cpu = cpu;
@@ -138,7 +137,9 @@ void __cpuinit irq_ctx_init(int cpu)
138 137
139 per_cpu(hardirq_ctx, cpu) = irqctx; 138 per_cpu(hardirq_ctx, cpu) = irqctx;
140 139
141 irqctx = &per_cpu(softirq_stack, cpu); 140 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
141 THREAD_FLAGS,
142 THREAD_ORDER));
142 irqctx->tinfo.task = NULL; 143 irqctx->tinfo.task = NULL;
143 irqctx->tinfo.exec_domain = NULL; 144 irqctx->tinfo.exec_domain = NULL;
144 irqctx->tinfo.cpu = cpu; 145 irqctx->tinfo.cpu = cpu;
@@ -151,11 +152,6 @@ void __cpuinit irq_ctx_init(int cpu)
151 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); 152 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
152} 153}
153 154
154void irq_ctx_exit(int cpu)
155{
156 per_cpu(hardirq_ctx, cpu) = NULL;
157}
158
159asmlinkage void do_softirq(void) 155asmlinkage void do_softirq(void)
160{ 156{
161 unsigned long flags; 157 unsigned long flags;
@@ -187,11 +183,6 @@ asmlinkage void do_softirq(void)
187 local_irq_restore(flags); 183 local_irq_restore(flags);
188} 184}
189 185
190#else
191static inline int
192execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
193#endif
194
195bool handle_irq(unsigned irq, struct pt_regs *regs) 186bool handle_irq(unsigned irq, struct pt_regs *regs)
196{ 187{
197 struct irq_desc *desc; 188 struct irq_desc *desc;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 8afd9f321f10..90fcf62854bb 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -78,6 +78,7 @@ static int setup_data_open(struct inode *inode, struct file *file)
78static const struct file_operations fops_setup_data = { 78static const struct file_operations fops_setup_data = {
79 .read = setup_data_read, 79 .read = setup_data_read,
80 .open = setup_data_open, 80 .open = setup_data_open,
81 .llseek = default_llseek,
81}; 82};
82 83
83static int __init 84static int __init
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 852b81967a37..ec592caac4b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
387 * disable hardware debugging while it is processing gdb packets or 387 * disable hardware debugging while it is processing gdb packets or
388 * handling exception. 388 * handling exception.
389 */ 389 */
390void kgdb_disable_hw_debug(struct pt_regs *regs) 390static void kgdb_disable_hw_debug(struct pt_regs *regs)
391{ 391{
392 int i; 392 int i;
393 int cpu = raw_smp_processor_id(); 393 int cpu = raw_smp_processor_id();
@@ -477,8 +477,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
477 raw_smp_processor_id()); 477 raw_smp_processor_id());
478 } 478 }
479 479
480 kgdb_correct_hw_break();
481
482 return 0; 480 return 0;
483 } 481 }
484 482
@@ -621,7 +619,12 @@ int kgdb_arch_init(void)
621static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, 619static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
622 struct perf_sample_data *data, struct pt_regs *regs) 620 struct perf_sample_data *data, struct pt_regs *regs)
623{ 621{
624 kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); 622 struct task_struct *tsk = current;
623 int i;
624
625 for (i = 0; i < 4; i++)
626 if (breakinfo[i].enabled)
627 tsk->thread.debugreg6 |= (DR_TRAP0 << i);
625} 628}
626 629
627void kgdb_arch_late(void) 630void kgdb_arch_late(void)
@@ -644,7 +647,7 @@ void kgdb_arch_late(void)
644 if (breakinfo[i].pev) 647 if (breakinfo[i].pev)
645 continue; 648 continue;
646 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); 649 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
647 if (IS_ERR(breakinfo[i].pev)) { 650 if (IS_ERR((void * __force)breakinfo[i].pev)) {
648 printk(KERN_ERR "kgdb: Could not allocate hw" 651 printk(KERN_ERR "kgdb: Could not allocate hw"
649 "breakpoints\nDisabling the kernel debugger\n"); 652 "breakpoints\nDisabling the kernel debugger\n");
650 breakinfo[i].pev = NULL; 653 breakinfo[i].pev = NULL;
@@ -721,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = {
721 .flags = KGDB_HW_BREAKPOINT, 724 .flags = KGDB_HW_BREAKPOINT,
722 .set_hw_breakpoint = kgdb_set_hw_break, 725 .set_hw_breakpoint = kgdb_set_hw_break,
723 .remove_hw_breakpoint = kgdb_remove_hw_break, 726 .remove_hw_breakpoint = kgdb_remove_hw_break,
727 .disable_hw_break = kgdb_disable_hw_debug,
724 .remove_all_hw_break = kgdb_remove_all_hw_break, 728 .remove_all_hw_break = kgdb_remove_all_hw_break,
725 .correct_hw_break = kgdb_correct_hw_break, 729 .correct_hw_break = kgdb_correct_hw_break,
726}; 730};
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index eb9b76c716c2..ca43ce31a19c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,13 +128,15 @@ static struct clocksource kvm_clock = {
128static int kvm_register_clock(char *txt) 128static int kvm_register_clock(char *txt)
129{ 129{
130 int cpu = smp_processor_id(); 130 int cpu = smp_processor_id();
131 int low, high; 131 int low, high, ret;
132
132 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 133 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
133 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 134 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
135 ret = native_write_msr_safe(msr_kvm_system_time, low, high);
134 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 136 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
135 cpu, high, low, txt); 137 cpu, high, low, txt);
136 138
137 return native_write_msr_safe(msr_kvm_system_time, low, high); 139 return ret;
138} 140}
139 141
140#ifdef CONFIG_X86_LOCAL_APIC 142#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1af7c055c7d..ce0cb4721c9a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -212,7 +212,7 @@ static int install_equiv_cpu_table(const u8 *buf)
212 return 0; 212 return 0;
213 } 213 }
214 214
215 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 215 equiv_cpu_table = vmalloc(size);
216 if (!equiv_cpu_table) { 216 if (!equiv_cpu_table) {
217 pr_err("failed to allocate equivalent CPU table\n"); 217 pr_err("failed to allocate equivalent CPU table\n");
218 return 0; 218 return 0;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fa6551d36c10..1cca374a2bac 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -12,7 +12,7 @@
12 * Software Developer's Manual 12 * Software Developer's Manual
13 * Order Number 253668 or free download from: 13 * Order Number 253668 or free download from:
14 * 14 *
15 * http://developer.intel.com/design/pentium4/manuals/253668.htm 15 * http://developer.intel.com/Assets/PDF/manual/253668.pdf
16 * 16 *
17 * For more information, go to http://www.urbanmyth.org/microcode 17 * For more information, go to http://www.urbanmyth.org/microcode
18 * 18 *
@@ -232,6 +232,7 @@ static const struct file_operations microcode_fops = {
232 .owner = THIS_MODULE, 232 .owner = THIS_MODULE,
233 .write = microcode_write, 233 .write = microcode_write,
234 .open = microcode_open, 234 .open = microcode_open,
235 .llseek = no_llseek,
235}; 236};
236 237
237static struct miscdevice microcode_dev = { 238static struct miscdevice microcode_dev = {
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 356170262a93..dcb65cc0a053 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -12,7 +12,7 @@
12 * Software Developer's Manual 12 * Software Developer's Manual
13 * Order Number 253668 or free download from: 13 * Order Number 253668 or free download from:
14 * 14 *
15 * http://developer.intel.com/design/pentium4/manuals/253668.htm 15 * http://developer.intel.com/Assets/PDF/manual/253668.pdf
16 * 16 *
17 * For more information, go to http://www.urbanmyth.org/microcode 17 * For more information, go to http://www.urbanmyth.org/microcode
18 * 18 *
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 71825806cd44..6da143c2a6b8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -217,13 +217,13 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
217 wrmsrl(address, val); 217 wrmsrl(address, val);
218} 218}
219 219
220static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) 220static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d)
221{ 221{
222 pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; 222 pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF;
223 return 0; 223 return 0;
224} 224}
225 225
226static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { 226static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
227 { 227 {
228 .callback = set_check_enable_amd_mmconf, 228 .callback = set_check_enable_amd_mmconf,
229 .ident = "Sun Microsystems Machine", 229 .ident = "Sun Microsystems Machine",
@@ -234,7 +234,8 @@ static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
234 {} 234 {}
235}; 235};
236 236
237void __cpuinit check_enable_amd_mmconf_dmi(void) 237/* Called from a __cpuinit function, but only on the BSP. */
238void __ref check_enable_amd_mmconf_dmi(void)
238{ 239{
239 dmi_check_system(mmconf_dmi_table); 240 dmi_check_system(mmconf_dmi_table);
240} 241}
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
deleted file mode 100644
index 79ae68154e87..000000000000
--- a/arch/x86/kernel/mrst.c
+++ /dev/null
@@ -1,311 +0,0 @@
1/*
2 * mrst.c: Intel Moorestown platform specific setup code
3 *
4 * (C) Copyright 2008 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
17
18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
30 * cmdline option x86_mrst_timer can be used to override the configuration
31 * to prefer one or the other.
32 * at runtime, there are basically three timer configurations:
33 * 1. per cpu apbt clock only
34 * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
35 * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
36 *
37 * by default (without cmdline option), platform code first detects cpu type
38 * to see if we are on lincroft or penwell, then set up both lapic or apbt
39 * clocks accordingly.
40 * i.e. by default, medfield uses configuration #2, moorestown uses #1.
41 * config #3 is supported but not recommended on medfield.
42 *
43 * rating and feature summary:
44 * lapic (with C3STOP) --------- 100
45 * apbt (always-on) ------------ 110
46 * lapic (always-on,ARAT) ------ 150
47 */
48
49__cpuinitdata enum mrst_timer_options mrst_timer_options;
50
51static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
52static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
53enum mrst_cpu_type __mrst_cpu_chip;
54EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
55
56int sfi_mtimer_num;
57
58struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
59EXPORT_SYMBOL_GPL(sfi_mrtc_array);
60int sfi_mrtc_num;
61
62static inline void assign_to_mp_irq(struct mpc_intsrc *m,
63 struct mpc_intsrc *mp_irq)
64{
65 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
66}
67
68static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
69 struct mpc_intsrc *m)
70{
71 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
72}
73
74static void save_mp_irq(struct mpc_intsrc *m)
75{
76 int i;
77
78 for (i = 0; i < mp_irq_entries; i++) {
79 if (!mp_irq_cmp(&mp_irqs[i], m))
80 return;
81 }
82
83 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
84 if (++mp_irq_entries == MAX_IRQ_SOURCES)
85 panic("Max # of irq sources exceeded!!\n");
86}
87
88/* parse all the mtimer info to a static mtimer array */
89static int __init sfi_parse_mtmr(struct sfi_table_header *table)
90{
91 struct sfi_table_simple *sb;
92 struct sfi_timer_table_entry *pentry;
93 struct mpc_intsrc mp_irq;
94 int totallen;
95
96 sb = (struct sfi_table_simple *)table;
97 if (!sfi_mtimer_num) {
98 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
99 struct sfi_timer_table_entry);
100 pentry = (struct sfi_timer_table_entry *) sb->pentry;
101 totallen = sfi_mtimer_num * sizeof(*pentry);
102 memcpy(sfi_mtimer_array, pentry, totallen);
103 }
104
105 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
106 pentry = sfi_mtimer_array;
107 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
108 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
109 " irq = %d\n", totallen, (u32)pentry->phys_addr,
110 pentry->freq_hz, pentry->irq);
111 if (!pentry->irq)
112 continue;
113 mp_irq.type = MP_IOAPIC;
114 mp_irq.irqtype = mp_INT;
115/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
116 mp_irq.irqflag = 5;
117 mp_irq.srcbus = 0;
118 mp_irq.srcbusirq = pentry->irq; /* IRQ */
119 mp_irq.dstapic = MP_APIC_ALL;
120 mp_irq.dstirq = pentry->irq;
121 save_mp_irq(&mp_irq);
122 }
123
124 return 0;
125}
126
127struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
128{
129 int i;
130 if (hint < sfi_mtimer_num) {
131 if (!sfi_mtimer_usage[hint]) {
132 pr_debug("hint taken for timer %d irq %d\n",\
133 hint, sfi_mtimer_array[hint].irq);
134 sfi_mtimer_usage[hint] = 1;
135 return &sfi_mtimer_array[hint];
136 }
137 }
138 /* take the first timer available */
139 for (i = 0; i < sfi_mtimer_num;) {
140 if (!sfi_mtimer_usage[i]) {
141 sfi_mtimer_usage[i] = 1;
142 return &sfi_mtimer_array[i];
143 }
144 i++;
145 }
146 return NULL;
147}
148
149void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
150{
151 int i;
152 for (i = 0; i < sfi_mtimer_num;) {
153 if (mtmr->irq == sfi_mtimer_array[i].irq) {
154 sfi_mtimer_usage[i] = 0;
155 return;
156 }
157 i++;
158 }
159}
160
161/* parse all the mrtc info to a global mrtc array */
162int __init sfi_parse_mrtc(struct sfi_table_header *table)
163{
164 struct sfi_table_simple *sb;
165 struct sfi_rtc_table_entry *pentry;
166 struct mpc_intsrc mp_irq;
167
168 int totallen;
169
170 sb = (struct sfi_table_simple *)table;
171 if (!sfi_mrtc_num) {
172 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
173 struct sfi_rtc_table_entry);
174 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
175 totallen = sfi_mrtc_num * sizeof(*pentry);
176 memcpy(sfi_mrtc_array, pentry, totallen);
177 }
178
179 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
180 pentry = sfi_mrtc_array;
181 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
182 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
183 totallen, (u32)pentry->phys_addr, pentry->irq);
184 mp_irq.type = MP_IOAPIC;
185 mp_irq.irqtype = mp_INT;
186 mp_irq.irqflag = 0;
187 mp_irq.srcbus = 0;
188 mp_irq.srcbusirq = pentry->irq; /* IRQ */
189 mp_irq.dstapic = MP_APIC_ALL;
190 mp_irq.dstirq = pentry->irq;
191 save_mp_irq(&mp_irq);
192 }
193 return 0;
194}
195
196static unsigned long __init mrst_calibrate_tsc(void)
197{
198 unsigned long flags, fast_calibrate;
199
200 local_irq_save(flags);
201 fast_calibrate = apbt_quick_calibrate();
202 local_irq_restore(flags);
203
204 if (fast_calibrate)
205 return fast_calibrate;
206
207 return 0;
208}
209
210void __init mrst_time_init(void)
211{
212 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY:
214 break;
215 case MRST_TIMER_LAPIC_APBT:
216 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
217 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
218 break;
219 default:
220 if (!boot_cpu_has(X86_FEATURE_ARAT))
221 break;
222 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
223 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
224 return;
225 }
226 /* we need at least one APB timer */
227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
228 pre_init_apic_IRQ0();
229 apbt_time_init();
230}
231
232void __init mrst_rtc_init(void)
233{
234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
235}
236
237void __cpuinit mrst_arch_setup(void)
238{
239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
240 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
241 else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
242 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
243 else {
244 pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
245 boot_cpu_data.x86, boot_cpu_data.x86_model);
246 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
247 }
248 pr_debug("Moorestown CPU %s identified\n",
249 (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
250 "Lincroft" : "Penwell");
251}
252
253/* MID systems don't have i8042 controller */
254static int mrst_i8042_detect(void)
255{
256 return 0;
257}
258
259/*
260 * Moorestown specific x86_init function overrides and early setup
261 * calls.
262 */
263void __init x86_mrst_early_setup(void)
264{
265 x86_init.resources.probe_roms = x86_init_noop;
266 x86_init.resources.reserve_resources = x86_init_noop;
267
268 x86_init.timers.timer_init = mrst_time_init;
269 x86_init.timers.setup_percpu_clockev = x86_init_noop;
270
271 x86_init.irqs.pre_vector_init = x86_init_noop;
272
273 x86_init.oem.arch_setup = mrst_arch_setup;
274
275 x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
276
277 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
278 x86_platform.i8042_detect = mrst_i8042_detect;
279 x86_init.pci.init = pci_mrst_init;
280 x86_init.pci.fixup_irqs = x86_init_noop;
281
282 legacy_pic = &null_legacy_pic;
283
284 /* Avoid searching for BIOS MP tables */
285 x86_init.mpparse.find_smp_config = x86_init_noop;
286 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
287
288}
289
290/*
291 * if user does not want to use per CPU apb timer, just give it a lower rating
292 * than local apic timer and skip the late per cpu timer init.
293 */
294static inline int __init setup_x86_mrst_timer(char *arg)
295{
296 if (!arg)
297 return -EINVAL;
298
299 if (strcmp("apbt_only", arg) == 0)
300 mrst_timer_options = MRST_TIMER_APBT_ONLY;
301 else if (strcmp("lapic_and_apbt", arg) == 0)
302 mrst_timer_options = MRST_TIMER_LAPIC_APBT;
303 else {
304 pr_warning("X86 MRST timer option %s not recognised"
305 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
306 arg);
307 return -EINVAL;
308 }
309 return 0;
310}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer);
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c
deleted file mode 100644
index f5442c03abc3..000000000000
--- a/arch/x86/kernel/olpc-xo1.c
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2 * Support for features of the OLPC XO-1 laptop
3 *
4 * Copyright (C) 2010 One Laptop per Child
5 * Copyright (C) 2006 Red Hat, Inc.
6 * Copyright (C) 2006 Advanced Micro Devices, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/platform_device.h>
18#include <linux/pm.h>
19
20#include <asm/io.h>
21#include <asm/olpc.h>
22
23#define DRV_NAME "olpc-xo1"
24
25#define PMS_BAR 4
26#define ACPI_BAR 5
27
28/* PMC registers (PMS block) */
29#define PM_SCLK 0x10
30#define PM_IN_SLPCTL 0x20
31#define PM_WKXD 0x34
32#define PM_WKD 0x30
33#define PM_SSC 0x54
34
35/* PM registers (ACPI block) */
36#define PM1_CNT 0x08
37#define PM_GPE0_STS 0x18
38
39static unsigned long acpi_base;
40static unsigned long pms_base;
41
42static void xo1_power_off(void)
43{
44 printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
45
46 /* Enable all of these controls with 0 delay */
47 outl(0x40000000, pms_base + PM_SCLK);
48 outl(0x40000000, pms_base + PM_IN_SLPCTL);
49 outl(0x40000000, pms_base + PM_WKXD);
50 outl(0x40000000, pms_base + PM_WKD);
51
52 /* Clear status bits (possibly unnecessary) */
53 outl(0x0002ffff, pms_base + PM_SSC);
54 outl(0xffffffff, acpi_base + PM_GPE0_STS);
55
56 /* Write SLP_EN bit to start the machinery */
57 outl(0x00002000, acpi_base + PM1_CNT);
58}
59
60/* Read the base addresses from the PCI BAR info */
61static int __devinit setup_bases(struct pci_dev *pdev)
62{
63 int r;
64
65 r = pci_enable_device_io(pdev);
66 if (r) {
67 dev_err(&pdev->dev, "can't enable device IO\n");
68 return r;
69 }
70
71 r = pci_request_region(pdev, ACPI_BAR, DRV_NAME);
72 if (r) {
73 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR);
74 return r;
75 }
76
77 r = pci_request_region(pdev, PMS_BAR, DRV_NAME);
78 if (r) {
79 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR);
80 pci_release_region(pdev, ACPI_BAR);
81 return r;
82 }
83
84 acpi_base = pci_resource_start(pdev, ACPI_BAR);
85 pms_base = pci_resource_start(pdev, PMS_BAR);
86
87 return 0;
88}
89
90static int __devinit olpc_xo1_probe(struct platform_device *pdev)
91{
92 struct pci_dev *pcidev;
93 int r;
94
95 pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
96 NULL);
97 if (!pdev)
98 return -ENODEV;
99
100 r = setup_bases(pcidev);
101 if (r)
102 return r;
103
104 pm_power_off = xo1_power_off;
105
106 printk(KERN_INFO "OLPC XO-1 support registered\n");
107 return 0;
108}
109
110static int __devexit olpc_xo1_remove(struct platform_device *pdev)
111{
112 pm_power_off = NULL;
113 return 0;
114}
115
116static struct platform_driver olpc_xo1_driver = {
117 .driver = {
118 .name = DRV_NAME,
119 .owner = THIS_MODULE,
120 },
121 .probe = olpc_xo1_probe,
122 .remove = __devexit_p(olpc_xo1_remove),
123};
124
125static int __init olpc_xo1_init(void)
126{
127 return platform_driver_register(&olpc_xo1_driver);
128}
129
130static void __exit olpc_xo1_exit(void)
131{
132 platform_driver_unregister(&olpc_xo1_driver);
133}
134
135MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
136MODULE_LICENSE("GPL");
137MODULE_ALIAS("platform:olpc-xo1");
138
139module_init(olpc_xo1_init);
140module_exit(olpc_xo1_exit);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
deleted file mode 100644
index edaf3fe8dc5e..000000000000
--- a/arch/x86/kernel/olpc.c
+++ /dev/null
@@ -1,281 +0,0 @@
1/*
2 * Support for the OLPC DCON and OLPC EC access
3 *
4 * Copyright © 2006 Advanced Micro Devices, Inc.
5 * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/delay.h>
17#include <linux/spinlock.h>
18#include <linux/io.h>
19#include <linux/string.h>
20#include <linux/platform_device.h>
21
22#include <asm/geode.h>
23#include <asm/setup.h>
24#include <asm/olpc.h>
25#include <asm/olpc_ofw.h>
26
27struct olpc_platform_t olpc_platform_info;
28EXPORT_SYMBOL_GPL(olpc_platform_info);
29
30static DEFINE_SPINLOCK(ec_lock);
31
32/* what the timeout *should* be (in ms) */
33#define EC_BASE_TIMEOUT 20
34
35/* the timeout that bugs in the EC might force us to actually use */
36static int ec_timeout = EC_BASE_TIMEOUT;
37
38static int __init olpc_ec_timeout_set(char *str)
39{
40 if (get_option(&str, &ec_timeout) != 1) {
41 ec_timeout = EC_BASE_TIMEOUT;
42 printk(KERN_ERR "olpc-ec: invalid argument to "
43 "'olpc_ec_timeout=', ignoring!\n");
44 }
45 printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n",
46 ec_timeout);
47 return 1;
48}
49__setup("olpc_ec_timeout=", olpc_ec_timeout_set);
50
51/*
52 * These {i,o}bf_status functions return whether the buffers are full or not.
53 */
54
55static inline unsigned int ibf_status(unsigned int port)
56{
57 return !!(inb(port) & 0x02);
58}
59
60static inline unsigned int obf_status(unsigned int port)
61{
62 return inb(port) & 0x01;
63}
64
65#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d))
66static int __wait_on_ibf(unsigned int line, unsigned int port, int desired)
67{
68 unsigned int timeo;
69 int state = ibf_status(port);
70
71 for (timeo = ec_timeout; state != desired && timeo; timeo--) {
72 mdelay(1);
73 state = ibf_status(port);
74 }
75
76 if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
77 timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
78 printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n",
79 line, ec_timeout - timeo);
80 }
81
82 return !(state == desired);
83}
84
85#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d))
86static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
87{
88 unsigned int timeo;
89 int state = obf_status(port);
90
91 for (timeo = ec_timeout; state != desired && timeo; timeo--) {
92 mdelay(1);
93 state = obf_status(port);
94 }
95
96 if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
97 timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
98 printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n",
99 line, ec_timeout - timeo);
100 }
101
102 return !(state == desired);
103}
104
105/*
106 * This allows the kernel to run Embedded Controller commands. The EC is
107 * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the
108 * available EC commands are here:
109 * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
110 * OpenFirmware's source is available, the EC's is not.
111 */
112int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
113 unsigned char *outbuf, size_t outlen)
114{
115 unsigned long flags;
116 int ret = -EIO;
117 int i;
118 int restarts = 0;
119
120 spin_lock_irqsave(&ec_lock, flags);
121
122 /* Clear OBF */
123 for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
124 inb(0x68);
125 if (i == 10) {
126 printk(KERN_ERR "olpc-ec: timeout while attempting to "
127 "clear OBF flag!\n");
128 goto err;
129 }
130
131 if (wait_on_ibf(0x6c, 0)) {
132 printk(KERN_ERR "olpc-ec: timeout waiting for EC to "
133 "quiesce!\n");
134 goto err;
135 }
136
137restart:
138 /*
139 * Note that if we time out during any IBF checks, that's a failure;
140 * we have to return. There's no way for the kernel to clear that.
141 *
142 * If we time out during an OBF check, we can restart the command;
143 * reissuing it will clear the OBF flag, and we should be alright.
144 * The OBF flag will sometimes misbehave due to what we believe
145 * is a hardware quirk..
146 */
147 pr_devel("olpc-ec: running cmd 0x%x\n", cmd);
148 outb(cmd, 0x6c);
149
150 if (wait_on_ibf(0x6c, 0)) {
151 printk(KERN_ERR "olpc-ec: timeout waiting for EC to read "
152 "command!\n");
153 goto err;
154 }
155
156 if (inbuf && inlen) {
157 /* write data to EC */
158 for (i = 0; i < inlen; i++) {
159 if (wait_on_ibf(0x6c, 0)) {
160 printk(KERN_ERR "olpc-ec: timeout waiting for"
161 " EC accept data!\n");
162 goto err;
163 }
164 pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
165 outb(inbuf[i], 0x68);
166 }
167 }
168 if (outbuf && outlen) {
169 /* read data from EC */
170 for (i = 0; i < outlen; i++) {
171 if (wait_on_obf(0x6c, 1)) {
172 printk(KERN_ERR "olpc-ec: timeout waiting for"
173 " EC to provide data!\n");
174 if (restarts++ < 10)
175 goto restart;
176 goto err;
177 }
178 outbuf[i] = inb(0x68);
179 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
180 }
181 }
182
183 ret = 0;
184err:
185 spin_unlock_irqrestore(&ec_lock, flags);
186 return ret;
187}
188EXPORT_SYMBOL_GPL(olpc_ec_cmd);
189
190static bool __init check_ofw_architecture(void)
191{
192 size_t propsize;
193 char olpc_arch[5];
194 const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
195 void *res[] = { &propsize };
196
197 if (olpc_ofw("getprop", args, res)) {
198 printk(KERN_ERR "ofw: getprop call failed!\n");
199 return false;
200 }
201 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
202}
203
204static u32 __init get_board_revision(void)
205{
206 size_t propsize;
207 __be32 rev;
208 const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
209 void *res[] = { &propsize };
210
211 if (olpc_ofw("getprop", args, res) || propsize != 4) {
212 printk(KERN_ERR "ofw: getprop call failed!\n");
213 return cpu_to_be32(0);
214 }
215 return be32_to_cpu(rev);
216}
217
218static bool __init platform_detect(void)
219{
220 if (!check_ofw_architecture())
221 return false;
222 olpc_platform_info.flags |= OLPC_F_PRESENT;
223 olpc_platform_info.boardrev = get_board_revision();
224 return true;
225}
226
227static int __init add_xo1_platform_devices(void)
228{
229 struct platform_device *pdev;
230
231 pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
232 if (IS_ERR(pdev))
233 return PTR_ERR(pdev);
234
235 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
236 if (IS_ERR(pdev))
237 return PTR_ERR(pdev);
238
239 return 0;
240}
241
242static int __init olpc_init(void)
243{
244 int r = 0;
245
246 if (!olpc_ofw_present() || !platform_detect())
247 return 0;
248
249 spin_lock_init(&ec_lock);
250
251 /* assume B1 and above models always have a DCON */
252 if (olpc_board_at_least(olpc_board(0xb1)))
253 olpc_platform_info.flags |= OLPC_F_DCON;
254
255 /* get the EC revision */
256 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
257 (unsigned char *) &olpc_platform_info.ecver, 1);
258
259#ifdef CONFIG_PCI_OLPC
260 /* If the VSA exists let it emulate PCI, if not emulate in kernel.
261 * XO-1 only. */
262 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
263 !cs5535_has_vsa2())
264 x86_init.pci.arch_init = pci_olpc_init;
265#endif
266
267 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
268 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
269 olpc_platform_info.boardrev >> 4,
270 olpc_platform_info.ecver);
271
272 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
273 r = add_xo1_platform_devices();
274 if (r)
275 return r;
276 }
277
278 return 0;
279}
280
281postcore_initcall(olpc_init);
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
deleted file mode 100644
index 787320464379..000000000000
--- a/arch/x86/kernel/olpc_ofw.c
+++ /dev/null
@@ -1,112 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <asm/page.h>
5#include <asm/setup.h>
6#include <asm/io.h>
7#include <asm/pgtable.h>
8#include <asm/olpc_ofw.h>
9
10/* address of OFW callback interface; will be NULL if OFW isn't found */
11static int (*olpc_ofw_cif)(int *);
12
13/* page dir entry containing OFW's pgdir table; filled in by head_32.S */
14u32 olpc_ofw_pgd __initdata;
15
16static DEFINE_SPINLOCK(ofw_lock);
17
18#define MAXARGS 10
19
20void __init setup_olpc_ofw_pgd(void)
21{
22 pgd_t *base, *ofw_pde;
23
24 if (!olpc_ofw_cif)
25 return;
26
27 /* fetch OFW's PDE */
28 base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
29 if (!base) {
30 printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n");
31 olpc_ofw_cif = NULL;
32 return;
33 }
34 ofw_pde = &base[OLPC_OFW_PDE_NR];
35
36 /* install OFW's PDE permanently into the kernel's pgtable */
37 set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde);
38 /* implicit optimization barrier here due to uninline function return */
39
40 early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
41}
42
43int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
44 void **res)
45{
46 int ofw_args[MAXARGS + 3];
47 unsigned long flags;
48 int ret, i, *p;
49
50 BUG_ON(nr_args + nr_res > MAXARGS);
51
52 if (!olpc_ofw_cif)
53 return -EIO;
54
55 ofw_args[0] = (int)name;
56 ofw_args[1] = nr_args;
57 ofw_args[2] = nr_res;
58
59 p = &ofw_args[3];
60 for (i = 0; i < nr_args; i++, p++)
61 *p = (int)args[i];
62
63 /* call into ofw */
64 spin_lock_irqsave(&ofw_lock, flags);
65 ret = olpc_ofw_cif(ofw_args);
66 spin_unlock_irqrestore(&ofw_lock, flags);
67
68 if (!ret) {
69 for (i = 0; i < nr_res; i++, p++)
70 *((int *)res[i]) = *p;
71 }
72
73 return ret;
74}
75EXPORT_SYMBOL_GPL(__olpc_ofw);
76
77bool olpc_ofw_present(void)
78{
79 return olpc_ofw_cif != NULL;
80}
81EXPORT_SYMBOL_GPL(olpc_ofw_present);
82
83/* OFW cif _should_ be above this address */
84#define OFW_MIN 0xff000000
85
86/* OFW starts on a 1MB boundary */
87#define OFW_BOUND (1<<20)
88
89void __init olpc_ofw_detect(void)
90{
91 struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header;
92 unsigned long start;
93
94 /* ensure OFW booted us by checking for "OFW " string */
95 if (hdr->ofw_magic != OLPC_OFW_SIG)
96 return;
97
98 olpc_ofw_cif = (int (*)(int *))hdr->cif_handler;
99
100 if ((unsigned long)olpc_ofw_cif < OFW_MIN) {
101 printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n",
102 (unsigned long)olpc_ofw_cif);
103 olpc_ofw_cif = NULL;
104 return;
105 }
106
107 /* determine where OFW starts in memory */
108 start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND);
109 printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n",
110 (unsigned long)olpc_ofw_cif, (-start) >> 20);
111 reserve_top_address(-start);
112}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
143 143
144 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 spin_lock_irqsave(&iommu_bitmap_lock, flags);
145 if (need_flush) { 145 if (need_flush) {
146 k8_flush_garts(); 146 amd_flush_garts();
147 need_flush = false; 147 need_flush = false;
148 } 148 }
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
561{ 561{
562 int i; 562 int i;
563 563
564 if (!k8_northbridges.gart_supported) 564 if (!amd_nb_has_feature(AMD_NB_GART))
565 return; 565 return;
566 566
567 for (i = 0; i < k8_northbridges.num; i++) { 567 for (i = 0; i < amd_nb_num(); i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 568 struct pci_dev *dev = node_to_amd_nb(i)->misc;
569 569
570 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
571 } 571 }
572 572
573 /* Flush the GART-TLB to remove stale entries */ 573 /* Flush the GART-TLB to remove stale entries */
574 k8_flush_garts(); 574 amd_flush_garts();
575} 575}
576 576
577/* 577/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
596 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
597 return; 597 return;
598 598
599 if (!k8_northbridges.gart_supported) 599 if (!amd_nb_has_feature(AMD_NB_GART))
600 return; 600 return;
601 601
602 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
603 603
604 for (i = 0; i < k8_northbridges.num; i++) { 604 for (i = 0; i < amd_nb_num(); i++) {
605 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 605 struct pci_dev *dev = node_to_amd_nb(i)->misc;
606 606
607 /* 607 /*
608 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
644 * Private Northbridge GATT initialization in case we cannot use the 644 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 645 * AGP driver for some reason.
646 */ 646 */
647static __init int init_k8_gatt(struct agp_kern_info *info) 647static __init int init_amd_gatt(struct agp_kern_info *info)
648{ 648{
649 unsigned aper_size, gatt_size, new_aper_size; 649 unsigned aper_size, gatt_size, new_aper_size;
650 unsigned aper_base, new_aper_base; 650 unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 656
657 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
658 dev = NULL; 658 dev = NULL;
659 for (i = 0; i < k8_northbridges.num; i++) { 659 for (i = 0; i < amd_nb_num(); i++) {
660 dev = k8_northbridges.nb_misc[i]; 660 dev = node_to_amd_nb(i)->misc;
661 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
662 if (!new_aper_base) 662 if (!new_aper_base)
663 goto nommu; 663 goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
725 if (!no_agp) 725 if (!no_agp)
726 return; 726 return;
727 727
728 if (!k8_northbridges.gart_supported) 728 if (!amd_nb_has_feature(AMD_NB_GART))
729 return; 729 return;
730 730
731 for (i = 0; i < k8_northbridges.num; i++) { 731 for (i = 0; i < amd_nb_num(); i++) {
732 u32 ctl; 732 u32 ctl;
733 733
734 dev = k8_northbridges.nb_misc[i]; 734 dev = node_to_amd_nb(i)->misc;
735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
736 736
737 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
749 unsigned long scratch; 749 unsigned long scratch;
750 long i; 750 long i;
751 751
752 if (!k8_northbridges.gart_supported) 752 if (!amd_nb_has_feature(AMD_NB_GART))
753 return 0; 753 return 0;
754 754
755#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
756 no_agp = 1; 756 no_agp = 1;
757#else 757#else
758 /* Makefile puts PCI initialization via subsys_initcall first. */ 758 /* Makefile puts PCI initialization via subsys_initcall first. */
759 /* Add other K8 AGP bridge drivers here */ 759 /* Add other AMD AGP bridge drivers here */
760 no_agp = no_agp || 760 no_agp = no_agp ||
761 (agp_amd64_init() < 0) || 761 (agp_amd64_init() < 0) ||
762 (agp_copy_info(agp_bridge, &info) < 0); 762 (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
765 if (no_iommu || 765 if (no_iommu ||
766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
767 !gart_iommu_aperture || 767 !gart_iommu_aperture ||
768 (no_agp && init_k8_gatt(&info) < 0)) { 768 (no_agp && init_amd_gatt(&info) < 0)) {
769 if (max_pfn > MAX_DMA32_PFN) { 769 if (max_pfn > MAX_DMA32_PFN) {
770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); 770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
771 pr_warning("falling back to iommu=soft.\n"); 771 pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 70c4872cd8aa..45892dc4b72a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child)
801static const struct user_regset_view user_x86_32_view; /* Initialized below. */ 801static const struct user_regset_view user_x86_32_view; /* Initialized below. */
802#endif 802#endif
803 803
804long arch_ptrace(struct task_struct *child, long request, long addr, long data) 804long arch_ptrace(struct task_struct *child, long request,
805 unsigned long addr, unsigned long data)
805{ 806{
806 int ret; 807 int ret;
807 unsigned long __user *datap = (unsigned long __user *)data; 808 unsigned long __user *datap = (unsigned long __user *)data;
@@ -812,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
812 unsigned long tmp; 813 unsigned long tmp;
813 814
814 ret = -EIO; 815 ret = -EIO;
815 if ((addr & (sizeof(data) - 1)) || addr < 0 || 816 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
816 addr >= sizeof(struct user))
817 break; 817 break;
818 818
819 tmp = 0; /* Default return condition */ 819 tmp = 0; /* Default return condition */
@@ -830,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
830 830
831 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 831 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
832 ret = -EIO; 832 ret = -EIO;
833 if ((addr & (sizeof(data) - 1)) || addr < 0 || 833 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
834 addr >= sizeof(struct user))
835 break; 834 break;
836 835
837 if (addr < sizeof(struct user_regs_struct)) 836 if (addr < sizeof(struct user_regs_struct))
@@ -888,17 +887,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
888 887
889#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 888#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
890 case PTRACE_GET_THREAD_AREA: 889 case PTRACE_GET_THREAD_AREA:
891 if (addr < 0) 890 if ((int) addr < 0)
892 return -EIO; 891 return -EIO;
893 ret = do_get_thread_area(child, addr, 892 ret = do_get_thread_area(child, addr,
894 (struct user_desc __user *) data); 893 (struct user_desc __user *)data);
895 break; 894 break;
896 895
897 case PTRACE_SET_THREAD_AREA: 896 case PTRACE_SET_THREAD_AREA:
898 if (addr < 0) 897 if ((int) addr < 0)
899 return -EIO; 898 return -EIO;
900 ret = do_set_thread_area(child, addr, 899 ret = do_set_thread_area(child, addr,
901 (struct user_desc __user *) data, 0); 900 (struct user_desc __user *)data, 0);
902 break; 901 break;
903#endif 902#endif
904 903
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 239427ca02af..008b91eefa18 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -41,48 +41,11 @@ void pvclock_set_flags(u8 flags)
41 valid_flags = flags; 41 valid_flags = flags;
42} 42}
43 43
44/*
45 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
46 * yielding a 64-bit result.
47 */
48static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
49{
50 u64 product;
51#ifdef __i386__
52 u32 tmp1, tmp2;
53#endif
54
55 if (shift < 0)
56 delta >>= -shift;
57 else
58 delta <<= shift;
59
60#ifdef __i386__
61 __asm__ (
62 "mul %5 ; "
63 "mov %4,%%eax ; "
64 "mov %%edx,%4 ; "
65 "mul %5 ; "
66 "xor %5,%5 ; "
67 "add %4,%%eax ; "
68 "adc %5,%%edx ; "
69 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
70 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
71#elif defined(__x86_64__)
72 __asm__ (
73 "mul %%rdx ; shrd $32,%%rdx,%%rax"
74 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
75#else
76#error implement me!
77#endif
78
79 return product;
80}
81
82static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) 44static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
83{ 45{
84 u64 delta = native_read_tsc() - shadow->tsc_timestamp; 46 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
85 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); 47 return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
48 shadow->tsc_shift);
86} 49}
87 50
88/* 51/*
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 939b9e98245f..8bbe8c56916d 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -344,6 +344,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
344 vt8237_force_enable_hpet); 344 vt8237_force_enable_hpet);
345DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, 345DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
346 vt8237_force_enable_hpet); 346 vt8237_force_enable_hpet);
347DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_CX700,
348 vt8237_force_enable_hpet);
347 349
348static void ati_force_hpet_resume(void) 350static void ati_force_hpet_resume(void)
349{ 351{
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7a4cf14223ba..c495aa8d4815 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -371,16 +371,10 @@ void machine_real_restart(const unsigned char *code, int length)
371 CMOS_WRITE(0x00, 0x8f); 371 CMOS_WRITE(0x00, 0x8f);
372 spin_unlock(&rtc_lock); 372 spin_unlock(&rtc_lock);
373 373
374 /* Remap the kernel at virtual address zero, as well as offset zero
375 from the kernel segment. This assumes the kernel segment starts at
376 virtual address PAGE_OFFSET. */
377 memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
378 sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
379
380 /* 374 /*
381 * Use `swapper_pg_dir' as our page directory. 375 * Switch back to the initial page table.
382 */ 376 */
383 load_cr3(swapper_pg_dir); 377 load_cr3(initial_page_table);
384 378
385 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads 379 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
386 this on booting to tell it to "Bypass memory test (also warm 380 this on booting to tell it to "Bypass memory test (also warm
@@ -641,7 +635,7 @@ void native_machine_shutdown(void)
641 /* O.K Now that I'm on the appropriate processor, 635 /* O.K Now that I'm on the appropriate processor,
642 * stop all of the others. 636 * stop all of the others.
643 */ 637 */
644 smp_send_stop(); 638 stop_other_cpus();
645#endif 639#endif
646 640
647 lapic_shutdown(); 641 lapic_shutdown();
diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c
deleted file mode 100644
index 7e004acbe526..000000000000
--- a/arch/x86/kernel/scx200_32.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/*
2 * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
3 *
4 * National Semiconductor SCx200 support.
5 */
6
7#include <linux/module.h>
8#include <linux/errno.h>
9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <linux/mutex.h>
12#include <linux/pci.h>
13
14#include <linux/scx200.h>
15#include <linux/scx200_gpio.h>
16
17/* Verify that the configuration block really is there */
18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
19
20#define NAME "scx200"
21
22MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
23MODULE_DESCRIPTION("NatSemi SCx200 Driver");
24MODULE_LICENSE("GPL");
25
26unsigned scx200_gpio_base = 0;
27unsigned long scx200_gpio_shadow[2];
28
29unsigned scx200_cb_base = 0;
30
31static struct pci_device_id scx200_tbl[] = {
32 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
33 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
34 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
35 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
36 { },
37};
38MODULE_DEVICE_TABLE(pci,scx200_tbl);
39
40static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
41
42static struct pci_driver scx200_pci_driver = {
43 .name = "scx200",
44 .id_table = scx200_tbl,
45 .probe = scx200_probe,
46};
47
48static DEFINE_MUTEX(scx200_gpio_config_lock);
49
50static void __devinit scx200_init_shadow(void)
51{
52 int bank;
53
54 /* read the current values driven on the GPIO signals */
55 for (bank = 0; bank < 2; ++bank)
56 scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
57}
58
59static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
60{
61 unsigned base;
62
63 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
64 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
65 base = pci_resource_start(pdev, 0);
66 printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
67
68 if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
69 printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
70 return -EBUSY;
71 }
72
73 scx200_gpio_base = base;
74 scx200_init_shadow();
75
76 } else {
77 /* find the base of the Configuration Block */
78 if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
79 scx200_cb_base = SCx200_CB_BASE_FIXED;
80 } else {
81 pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
82 if (scx200_cb_probe(base)) {
83 scx200_cb_base = base;
84 } else {
85 printk(KERN_WARNING NAME ": Configuration Block not found\n");
86 return -ENODEV;
87 }
88 }
89 printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
90 }
91
92 return 0;
93}
94
95u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
96{
97 u32 config, new_config;
98
99 mutex_lock(&scx200_gpio_config_lock);
100
101 outl(index, scx200_gpio_base + 0x20);
102 config = inl(scx200_gpio_base + 0x24);
103
104 new_config = (config & mask) | bits;
105 outl(new_config, scx200_gpio_base + 0x24);
106
107 mutex_unlock(&scx200_gpio_config_lock);
108
109 return config;
110}
111
112static int __init scx200_init(void)
113{
114 printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
115
116 return pci_register_driver(&scx200_pci_driver);
117}
118
119static void __exit scx200_cleanup(void)
120{
121 pci_unregister_driver(&scx200_pci_driver);
122 release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
123}
124
125module_init(scx200_init);
126module_exit(scx200_cleanup);
127
128EXPORT_SYMBOL(scx200_gpio_base);
129EXPORT_SYMBOL(scx200_gpio_shadow);
130EXPORT_SYMBOL(scx200_gpio_configure);
131EXPORT_SYMBOL(scx200_cb_base);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b8982e0fc0c2..0afb8c7e3803 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -694,12 +694,23 @@ static u64 __init get_max_mapped(void)
694void __init setup_arch(char **cmdline_p) 694void __init setup_arch(char **cmdline_p)
695{ 695{
696 int acpi = 0; 696 int acpi = 0;
697 int k8 = 0; 697 int amd = 0;
698 unsigned long flags; 698 unsigned long flags;
699 699
700#ifdef CONFIG_X86_32 700#ifdef CONFIG_X86_32
701 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 701 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
702 visws_early_detect(); 702 visws_early_detect();
703
704 /*
705 * copy kernel address range established so far and switch
706 * to the proper swapper page table
707 */
708 clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
709 initial_page_table + KERNEL_PGD_BOUNDARY,
710 KERNEL_PGD_PTRS);
711
712 load_cr3(swapper_pg_dir);
713 __flush_tlb_all();
703#else 714#else
704 printk(KERN_INFO "Command line: %s\n", boot_command_line); 715 printk(KERN_INFO "Command line: %s\n", boot_command_line);
705#endif 716#endif
@@ -758,6 +769,8 @@ void __init setup_arch(char **cmdline_p)
758 769
759 x86_init.oem.arch_setup(); 770 x86_init.oem.arch_setup();
760 771
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
761 setup_memory_map(); 774 setup_memory_map();
762 parse_setup_data(); 775 parse_setup_data();
763 /* update the e820_saved too */ 776 /* update the e820_saved too */
@@ -968,12 +981,12 @@ void __init setup_arch(char **cmdline_p)
968 acpi = acpi_numa_init(); 981 acpi = acpi_numa_init();
969#endif 982#endif
970 983
971#ifdef CONFIG_K8_NUMA 984#ifdef CONFIG_AMD_NUMA
972 if (!acpi) 985 if (!acpi)
973 k8 = !k8_numa_init(0, max_pfn); 986 amd = !amd_numa_init(0, max_pfn);
974#endif 987#endif
975 988
976 initmem_init(0, max_pfn, acpi, k8); 989 initmem_init(0, max_pfn, acpi, amd);
977 memblock_find_dma_reserve(); 990 memblock_find_dma_reserve();
978 dma32_reserve_bootmem(); 991 dma32_reserve_bootmem();
979 992
@@ -985,7 +998,12 @@ void __init setup_arch(char **cmdline_p)
985 paging_init(); 998 paging_init();
986 x86_init.paging.pagetable_setup_done(swapper_pg_dir); 999 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
987 1000
988 setup_trampoline_page_table(); 1001#ifdef CONFIG_X86_32
1002 /* sync back kernel address range */
1003 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
1004 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1005 KERNEL_PGD_PTRS);
1006#endif
989 1007
990 tboot_probe(); 1008 tboot_probe();
991 1009
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
deleted file mode 100644
index dd4c281ffe57..000000000000
--- a/arch/x86/kernel/sfi.c
+++ /dev/null
@@ -1,120 +0,0 @@
1/*
2 * sfi.c - x86 architecture SFI support.
3 *
4 * Copyright (c) 2009, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
21#define KMSG_COMPONENT "SFI"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/acpi.h>
25#include <linux/init.h>
26#include <linux/sfi.h>
27#include <linux/io.h>
28
29#include <asm/io_apic.h>
30#include <asm/mpspec.h>
31#include <asm/setup.h>
32#include <asm/apic.h>
33
34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36
37static void __init mp_sfi_register_lapic_address(unsigned long address)
38{
39 mp_lapic_addr = address;
40
41 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
42 if (boot_cpu_physical_apicid == -1U)
43 boot_cpu_physical_apicid = read_apic_id();
44
45 pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
46}
47
48/* All CPUs enumerated by SFI must be present and enabled */
49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{
51 if (MAX_APICS - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS);
54 return;
55 }
56
57 pr_info("registering lapic[%d]\n", id);
58
59 generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
60}
61
62static int __init sfi_parse_cpus(struct sfi_table_header *table)
63{
64 struct sfi_table_simple *sb;
65 struct sfi_cpu_table_entry *pentry;
66 int i;
67 int cpu_num;
68
69 sb = (struct sfi_table_simple *)table;
70 cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
71 pentry = (struct sfi_cpu_table_entry *)sb->pentry;
72
73 for (i = 0; i < cpu_num; i++) {
74 mp_sfi_register_lapic(pentry->apic_id);
75 pentry++;
76 }
77
78 smp_found_config = 1;
79 return 0;
80}
81#endif /* CONFIG_X86_LOCAL_APIC */
82
83#ifdef CONFIG_X86_IO_APIC
84
85static int __init sfi_parse_ioapic(struct sfi_table_header *table)
86{
87 struct sfi_table_simple *sb;
88 struct sfi_apic_table_entry *pentry;
89 int i, num;
90
91 sb = (struct sfi_table_simple *)table;
92 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
93 pentry = (struct sfi_apic_table_entry *)sb->pentry;
94
95 for (i = 0; i < num; i++) {
96 mp_register_ioapic(i, pentry->phys_addr, gsi_top);
97 pentry++;
98 }
99
100 WARN(pic_mode, KERN_WARNING
101 "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
102 pic_mode = 0;
103 return 0;
104}
105#endif /* CONFIG_X86_IO_APIC */
106
107/*
108 * sfi_platform_init(): register lapics & io-apics
109 */
110int __init sfi_platform_init(void)
111{
112#ifdef CONFIG_X86_LOCAL_APIC
113 mp_sfi_register_lapic_address(sfi_lapic_addr);
114 sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
115#endif
116#ifdef CONFIG_X86_IO_APIC
117 sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
118#endif
119 return 0;
120}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..513deac7228d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
159 irq_exit(); 159 irq_exit();
160} 160}
161 161
162static void native_smp_send_stop(void) 162static void native_stop_other_cpus(int wait)
163{ 163{
164 unsigned long flags; 164 unsigned long flags;
165 unsigned long wait; 165 unsigned long timeout;
166 166
167 if (reboot_force) 167 if (reboot_force)
168 return; 168 return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
179 if (num_online_cpus() > 1) { 179 if (num_online_cpus() > 1) {
180 apic->send_IPI_allbutself(REBOOT_VECTOR); 180 apic->send_IPI_allbutself(REBOOT_VECTOR);
181 181
182 /* Don't wait longer than a second */ 182 /*
183 wait = USEC_PER_SEC; 183 * Don't wait longer than a second if the caller
184 while (num_online_cpus() > 1 && wait--) 184 * didn't ask us to wait.
185 */
186 timeout = USEC_PER_SEC;
187 while (num_online_cpus() > 1 && (wait || timeout--))
185 udelay(1); 188 udelay(1);
186 } 189 }
187 190
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
227 .smp_prepare_cpus = native_smp_prepare_cpus, 230 .smp_prepare_cpus = native_smp_prepare_cpus,
228 .smp_cpus_done = native_smp_cpus_done, 231 .smp_cpus_done = native_smp_cpus_done,
229 232
230 .smp_send_stop = native_smp_send_stop, 233 .stop_other_cpus = native_stop_other_cpus,
231 .smp_send_reschedule = native_smp_send_reschedule, 234 .smp_send_reschedule = native_smp_send_reschedule,
232 235
233 .cpu_up = native_cpu_up, 236 .cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index dfb50890b5b7..083e99d1b7df 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -299,22 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused)
299 * fragile that we want to limit the things done here to the 299 * fragile that we want to limit the things done here to the
300 * most necessary things. 300 * most necessary things.
301 */ 301 */
302 cpu_init();
303 preempt_disable();
304 smp_callin();
302 305
303#ifdef CONFIG_X86_32 306#ifdef CONFIG_X86_32
304 /* 307 /* switch away from the initial page table */
305 * Switch away from the trampoline page-table
306 *
307 * Do this before cpu_init() because it needs to access per-cpu
308 * data which may not be mapped in the trampoline page-table.
309 */
310 load_cr3(swapper_pg_dir); 308 load_cr3(swapper_pg_dir);
311 __flush_tlb_all(); 309 __flush_tlb_all();
312#endif 310#endif
313 311
314 cpu_init();
315 preempt_disable();
316 smp_callin();
317
318 /* otherwise gcc will move up smp_processor_id before the cpu_init */ 312 /* otherwise gcc will move up smp_processor_id before the cpu_init */
319 barrier(); 313 barrier();
320 /* 314 /*
@@ -753,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
753 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 747 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
754 }; 748 };
755 749
756 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); 750 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
757 751
758 alternatives_smp_switch(1); 752 alternatives_smp_switch(1);
759 753
@@ -785,7 +779,6 @@ do_rest:
785#ifdef CONFIG_X86_32 779#ifdef CONFIG_X86_32
786 /* Stack for startup_32 can be just as for start_secondary onwards */ 780 /* Stack for startup_32 can be just as for start_secondary onwards */
787 irq_ctx_init(cpu); 781 irq_ctx_init(cpu);
788 initial_page_table = __pa(&trampoline_pg_dir);
789#else 782#else
790 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 783 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
791 initial_gs = per_cpu_offset(cpu); 784 initial_gs = per_cpu_offset(cpu);
@@ -934,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
934 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 927 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
935 928
936 err = do_boot_cpu(apicid, cpu); 929 err = do_boot_cpu(apicid, cpu);
937
938 if (err) { 930 if (err) {
939 pr_debug("do_boot_cpu failed %d\n", err); 931 pr_debug("do_boot_cpu failed %d\n", err);
940 return -EIO; 932 return -EIO;
@@ -1381,7 +1373,6 @@ void play_dead_common(void)
1381{ 1373{
1382 idle_task_exit(); 1374 idle_task_exit();
1383 reset_lazy_tlbstate(); 1375 reset_lazy_tlbstate();
1384 irq_ctx_exit(raw_smp_processor_id());
1385 c1e_remove_cpu(raw_smp_processor_id()); 1376 c1e_remove_cpu(raw_smp_processor_id());
1386 1377
1387 mb(); 1378 mb();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
deleted file mode 100644
index 312ef0292815..000000000000
--- a/arch/x86/kernel/tlb_uv.c
+++ /dev/null
@@ -1,1655 +0,0 @@
1/*
2 * SGI UltraViolet TLB flush routines.
3 *
4 * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI.
5 *
6 * This code is released under the GNU General Public License version 2 or
7 * later.
8 */
9#include <linux/seq_file.h>
10#include <linux/proc_fs.h>
11#include <linux/debugfs.h>
12#include <linux/kernel.h>
13#include <linux/slab.h>
14
15#include <asm/mmu_context.h>
16#include <asm/uv/uv.h>
17#include <asm/uv/uv_mmrs.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20#include <asm/apic.h>
21#include <asm/idle.h>
22#include <asm/tsc.h>
23#include <asm/irq_vectors.h>
24#include <asm/timer.h>
25
26/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
27static int timeout_base_ns[] = {
28 20,
29 160,
30 1280,
31 10240,
32 81920,
33 655360,
34 5242880,
35 167772160
36};
37static int timeout_us;
38static int nobau;
39static int baudisabled;
40static spinlock_t disable_lock;
41static cycles_t congested_cycles;
42
43/* tunables: */
44static int max_bau_concurrent = MAX_BAU_CONCURRENT;
45static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT;
46static int plugged_delay = PLUGGED_DELAY;
47static int plugsb4reset = PLUGSB4RESET;
48static int timeoutsb4reset = TIMEOUTSB4RESET;
49static int ipi_reset_limit = IPI_RESET_LIMIT;
50static int complete_threshold = COMPLETE_THRESHOLD;
51static int congested_response_us = CONGESTED_RESPONSE_US;
52static int congested_reps = CONGESTED_REPS;
53static int congested_period = CONGESTED_PERIOD;
54static struct dentry *tunables_dir;
55static struct dentry *tunables_file;
56
57static int __init setup_nobau(char *arg)
58{
59 nobau = 1;
60 return 0;
61}
62early_param("nobau", setup_nobau);
63
64/* base pnode in this partition */
65static int uv_partition_base_pnode __read_mostly;
66/* position of pnode (which is nasid>>1): */
67static int uv_nshift __read_mostly;
68static unsigned long uv_mmask __read_mostly;
69
70static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
71static DEFINE_PER_CPU(struct bau_control, bau_control);
72static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
73
74/*
75 * Determine the first node on a uvhub. 'Nodes' are used for kernel
76 * memory allocation.
77 */
78static int __init uvhub_to_first_node(int uvhub)
79{
80 int node, b;
81
82 for_each_online_node(node) {
83 b = uv_node_to_blade_id(node);
84 if (uvhub == b)
85 return node;
86 }
87 return -1;
88}
89
90/*
91 * Determine the apicid of the first cpu on a uvhub.
92 */
93static int __init uvhub_to_first_apicid(int uvhub)
94{
95 int cpu;
96
97 for_each_present_cpu(cpu)
98 if (uvhub == uv_cpu_to_blade_id(cpu))
99 return per_cpu(x86_cpu_to_apicid, cpu);
100 return -1;
101}
102
103/*
104 * Free a software acknowledge hardware resource by clearing its Pending
105 * bit. This will return a reply to the sender.
106 * If the message has timed out, a reply has already been sent by the
107 * hardware but the resource has not been released. In that case our
108 * clear of the Timeout bit (as well) will free the resource. No reply will
109 * be sent (the hardware will only do one reply per message).
110 */
111static inline void uv_reply_to_message(struct msg_desc *mdp,
112 struct bau_control *bcp)
113{
114 unsigned long dw;
115 struct bau_payload_queue_entry *msg;
116
117 msg = mdp->msg;
118 if (!msg->canceled) {
119 dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) |
120 msg->sw_ack_vector;
121 uv_write_local_mmr(
122 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
123 }
124 msg->replied_to = 1;
125 msg->sw_ack_vector = 0;
126}
127
128/*
129 * Process the receipt of a RETRY message
130 */
131static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
132 struct bau_control *bcp)
133{
134 int i;
135 int cancel_count = 0;
136 int slot2;
137 unsigned long msg_res;
138 unsigned long mmr = 0;
139 struct bau_payload_queue_entry *msg;
140 struct bau_payload_queue_entry *msg2;
141 struct ptc_stats *stat;
142
143 msg = mdp->msg;
144 stat = bcp->statp;
145 stat->d_retries++;
146 /*
147 * cancel any message from msg+1 to the retry itself
148 */
149 for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
150 if (msg2 > mdp->va_queue_last)
151 msg2 = mdp->va_queue_first;
152 if (msg2 == msg)
153 break;
154
155 /* same conditions for cancellation as uv_do_reset */
156 if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
157 (msg2->sw_ack_vector) && ((msg2->sw_ack_vector &
158 msg->sw_ack_vector) == 0) &&
159 (msg2->sending_cpu == msg->sending_cpu) &&
160 (msg2->msg_type != MSG_NOOP)) {
161 slot2 = msg2 - mdp->va_queue_first;
162 mmr = uv_read_local_mmr
163 (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
164 msg_res = msg2->sw_ack_vector;
165 /*
166 * This is a message retry; clear the resources held
167 * by the previous message only if they timed out.
168 * If it has not timed out we have an unexpected
169 * situation to report.
170 */
171 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
172 /*
173 * is the resource timed out?
174 * make everyone ignore the cancelled message.
175 */
176 msg2->canceled = 1;
177 stat->d_canceled++;
178 cancel_count++;
179 uv_write_local_mmr(
180 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
181 (msg_res << UV_SW_ACK_NPENDING) |
182 msg_res);
183 }
184 }
185 }
186 if (!cancel_count)
187 stat->d_nocanceled++;
188}
189
190/*
191 * Do all the things a cpu should do for a TLB shootdown message.
192 * Other cpu's may come here at the same time for this message.
193 */
194static void uv_bau_process_message(struct msg_desc *mdp,
195 struct bau_control *bcp)
196{
197 int msg_ack_count;
198 short socket_ack_count = 0;
199 struct ptc_stats *stat;
200 struct bau_payload_queue_entry *msg;
201 struct bau_control *smaster = bcp->socket_master;
202
203 /*
204 * This must be a normal message, or retry of a normal message
205 */
206 msg = mdp->msg;
207 stat = bcp->statp;
208 if (msg->address == TLB_FLUSH_ALL) {
209 local_flush_tlb();
210 stat->d_alltlb++;
211 } else {
212 __flush_tlb_one(msg->address);
213 stat->d_onetlb++;
214 }
215 stat->d_requestee++;
216
217 /*
218 * One cpu on each uvhub has the additional job on a RETRY
219 * of releasing the resource held by the message that is
220 * being retried. That message is identified by sending
221 * cpu number.
222 */
223 if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
224 uv_bau_process_retry_msg(mdp, bcp);
225
226 /*
227 * This is a sw_ack message, so we have to reply to it.
228 * Count each responding cpu on the socket. This avoids
229 * pinging the count's cache line back and forth between
230 * the sockets.
231 */
232 socket_ack_count = atomic_add_short_return(1, (struct atomic_short *)
233 &smaster->socket_acknowledge_count[mdp->msg_slot]);
234 if (socket_ack_count == bcp->cpus_in_socket) {
235 /*
236 * Both sockets dump their completed count total into
237 * the message's count.
238 */
239 smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
240 msg_ack_count = atomic_add_short_return(socket_ack_count,
241 (struct atomic_short *)&msg->acknowledge_count);
242
243 if (msg_ack_count == bcp->cpus_in_uvhub) {
244 /*
245 * All cpus in uvhub saw it; reply
246 */
247 uv_reply_to_message(mdp, bcp);
248 }
249 }
250
251 return;
252}
253
254/*
255 * Determine the first cpu on a uvhub.
256 */
257static int uvhub_to_first_cpu(int uvhub)
258{
259 int cpu;
260 for_each_present_cpu(cpu)
261 if (uvhub == uv_cpu_to_blade_id(cpu))
262 return cpu;
263 return -1;
264}
265
266/*
267 * Last resort when we get a large number of destination timeouts is
268 * to clear resources held by a given cpu.
269 * Do this with IPI so that all messages in the BAU message queue
270 * can be identified by their nonzero sw_ack_vector field.
271 *
272 * This is entered for a single cpu on the uvhub.
273 * The sender want's this uvhub to free a specific message's
274 * sw_ack resources.
275 */
276static void
277uv_do_reset(void *ptr)
278{
279 int i;
280 int slot;
281 int count = 0;
282 unsigned long mmr;
283 unsigned long msg_res;
284 struct bau_control *bcp;
285 struct reset_args *rap;
286 struct bau_payload_queue_entry *msg;
287 struct ptc_stats *stat;
288
289 bcp = &per_cpu(bau_control, smp_processor_id());
290 rap = (struct reset_args *)ptr;
291 stat = bcp->statp;
292 stat->d_resets++;
293
294 /*
295 * We're looking for the given sender, and
296 * will free its sw_ack resource.
297 * If all cpu's finally responded after the timeout, its
298 * message 'replied_to' was set.
299 */
300 for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
301 /* uv_do_reset: same conditions for cancellation as
302 uv_bau_process_retry_msg() */
303 if ((msg->replied_to == 0) &&
304 (msg->canceled == 0) &&
305 (msg->sending_cpu == rap->sender) &&
306 (msg->sw_ack_vector) &&
307 (msg->msg_type != MSG_NOOP)) {
308 /*
309 * make everyone else ignore this message
310 */
311 msg->canceled = 1;
312 slot = msg - bcp->va_queue_first;
313 count++;
314 /*
315 * only reset the resource if it is still pending
316 */
317 mmr = uv_read_local_mmr
318 (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
319 msg_res = msg->sw_ack_vector;
320 if (mmr & msg_res) {
321 stat->d_rcanceled++;
322 uv_write_local_mmr(
323 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
324 (msg_res << UV_SW_ACK_NPENDING) |
325 msg_res);
326 }
327 }
328 }
329 return;
330}
331
332/*
333 * Use IPI to get all target uvhubs to release resources held by
334 * a given sending cpu number.
335 */
336static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution,
337 int sender)
338{
339 int uvhub;
340 int cpu;
341 cpumask_t mask;
342 struct reset_args reset_args;
343
344 reset_args.sender = sender;
345
346 cpus_clear(mask);
347 /* find a single cpu for each uvhub in this distribution mask */
348 for (uvhub = 0;
349 uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE;
350 uvhub++) {
351 if (!bau_uvhub_isset(uvhub, distribution))
352 continue;
353 /* find a cpu for this uvhub */
354 cpu = uvhub_to_first_cpu(uvhub);
355 cpu_set(cpu, mask);
356 }
357 /* IPI all cpus; Preemption is already disabled */
358 smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1);
359 return;
360}
361
362static inline unsigned long
363cycles_2_us(unsigned long long cyc)
364{
365 unsigned long long ns;
366 unsigned long us;
367 ns = (cyc * per_cpu(cyc2ns, smp_processor_id()))
368 >> CYC2NS_SCALE_FACTOR;
369 us = ns / 1000;
370 return us;
371}
372
373/*
374 * wait for all cpus on this hub to finish their sends and go quiet
375 * leaves uvhub_quiesce set so that no new broadcasts are started by
376 * bau_flush_send_and_wait()
377 */
378static inline void
379quiesce_local_uvhub(struct bau_control *hmaster)
380{
381 atomic_add_short_return(1, (struct atomic_short *)
382 &hmaster->uvhub_quiesce);
383}
384
385/*
386 * mark this quiet-requestor as done
387 */
388static inline void
389end_uvhub_quiesce(struct bau_control *hmaster)
390{
391 atomic_add_short_return(-1, (struct atomic_short *)
392 &hmaster->uvhub_quiesce);
393}
394
395/*
396 * Wait for completion of a broadcast software ack message
397 * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
398 */
399static int uv_wait_completion(struct bau_desc *bau_desc,
400 unsigned long mmr_offset, int right_shift, int this_cpu,
401 struct bau_control *bcp, struct bau_control *smaster, long try)
402{
403 unsigned long descriptor_status;
404 cycles_t ttime;
405 struct ptc_stats *stat = bcp->statp;
406 struct bau_control *hmaster;
407
408 hmaster = bcp->uvhub_master;
409
410 /* spin on the status MMR, waiting for it to go idle */
411 while ((descriptor_status = (((unsigned long)
412 uv_read_local_mmr(mmr_offset) >>
413 right_shift) & UV_ACT_STATUS_MASK)) !=
414 DESC_STATUS_IDLE) {
415 /*
416 * Our software ack messages may be blocked because there are
417 * no swack resources available. As long as none of them
418 * has timed out hardware will NACK our message and its
419 * state will stay IDLE.
420 */
421 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
422 stat->s_stimeout++;
423 return FLUSH_GIVEUP;
424 } else if (descriptor_status ==
425 DESC_STATUS_DESTINATION_TIMEOUT) {
426 stat->s_dtimeout++;
427 ttime = get_cycles();
428
429 /*
430 * Our retries may be blocked by all destination
431 * swack resources being consumed, and a timeout
432 * pending. In that case hardware returns the
433 * ERROR that looks like a destination timeout.
434 */
435 if (cycles_2_us(ttime - bcp->send_message) <
436 timeout_us) {
437 bcp->conseccompletes = 0;
438 return FLUSH_RETRY_PLUGGED;
439 }
440
441 bcp->conseccompletes = 0;
442 return FLUSH_RETRY_TIMEOUT;
443 } else {
444 /*
445 * descriptor_status is still BUSY
446 */
447 cpu_relax();
448 }
449 }
450 bcp->conseccompletes++;
451 return FLUSH_COMPLETE;
452}
453
454static inline cycles_t
455sec_2_cycles(unsigned long sec)
456{
457 unsigned long ns;
458 cycles_t cyc;
459
460 ns = sec * 1000000000;
461 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
462 return cyc;
463}
464
465/*
466 * conditionally add 1 to *v, unless *v is >= u
467 * return 0 if we cannot add 1 to *v because it is >= u
468 * return 1 if we can add 1 to *v because it is < u
469 * the add is atomic
470 *
471 * This is close to atomic_add_unless(), but this allows the 'u' value
472 * to be lowered below the current 'v'. atomic_add_unless can only stop
473 * on equal.
474 */
475static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
476{
477 spin_lock(lock);
478 if (atomic_read(v) >= u) {
479 spin_unlock(lock);
480 return 0;
481 }
482 atomic_inc(v);
483 spin_unlock(lock);
484 return 1;
485}
486
487/*
488 * Our retries are blocked by all destination swack resources being
489 * in use, and a timeout is pending. In that case hardware immediately
490 * returns the ERROR that looks like a destination timeout.
491 */
492static void
493destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp,
494 struct bau_control *hmaster, struct ptc_stats *stat)
495{
496 udelay(bcp->plugged_delay);
497 bcp->plugged_tries++;
498 if (bcp->plugged_tries >= bcp->plugsb4reset) {
499 bcp->plugged_tries = 0;
500 quiesce_local_uvhub(hmaster);
501 spin_lock(&hmaster->queue_lock);
502 uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
503 spin_unlock(&hmaster->queue_lock);
504 end_uvhub_quiesce(hmaster);
505 bcp->ipi_attempts++;
506 stat->s_resets_plug++;
507 }
508}
509
510static void
511destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
512 struct bau_control *hmaster, struct ptc_stats *stat)
513{
514 hmaster->max_bau_concurrent = 1;
515 bcp->timeout_tries++;
516 if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
517 bcp->timeout_tries = 0;
518 quiesce_local_uvhub(hmaster);
519 spin_lock(&hmaster->queue_lock);
520 uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
521 spin_unlock(&hmaster->queue_lock);
522 end_uvhub_quiesce(hmaster);
523 bcp->ipi_attempts++;
524 stat->s_resets_timeout++;
525 }
526}
527
528/*
529 * Completions are taking a very long time due to a congested numalink
530 * network.
531 */
532static void
533disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
534{
535 int tcpu;
536 struct bau_control *tbcp;
537
538 /* let only one cpu do this disabling */
539 spin_lock(&disable_lock);
540 if (!baudisabled && bcp->period_requests &&
541 ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
542 /* it becomes this cpu's job to turn on the use of the
543 BAU again */
544 baudisabled = 1;
545 bcp->set_bau_off = 1;
546 bcp->set_bau_on_time = get_cycles() +
547 sec_2_cycles(bcp->congested_period);
548 stat->s_bau_disabled++;
549 for_each_present_cpu(tcpu) {
550 tbcp = &per_cpu(bau_control, tcpu);
551 tbcp->baudisabled = 1;
552 }
553 }
554 spin_unlock(&disable_lock);
555}
556
557/**
558 * uv_flush_send_and_wait
559 *
560 * Send a broadcast and wait for it to complete.
561 *
562 * The flush_mask contains the cpus the broadcast is to be sent to including
563 * cpus that are on the local uvhub.
564 *
565 * Returns 0 if all flushing represented in the mask was done.
566 * Returns 1 if it gives up entirely and the original cpu mask is to be
567 * returned to the kernel.
568 */
569int uv_flush_send_and_wait(struct bau_desc *bau_desc,
570 struct cpumask *flush_mask, struct bau_control *bcp)
571{
572 int right_shift;
573 int completion_status = 0;
574 int seq_number = 0;
575 long try = 0;
576 int cpu = bcp->uvhub_cpu;
577 int this_cpu = bcp->cpu;
578 unsigned long mmr_offset;
579 unsigned long index;
580 cycles_t time1;
581 cycles_t time2;
582 cycles_t elapsed;
583 struct ptc_stats *stat = bcp->statp;
584 struct bau_control *smaster = bcp->socket_master;
585 struct bau_control *hmaster = bcp->uvhub_master;
586
587 if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
588 &hmaster->active_descriptor_count,
589 hmaster->max_bau_concurrent)) {
590 stat->s_throttles++;
591 do {
592 cpu_relax();
593 } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
594 &hmaster->active_descriptor_count,
595 hmaster->max_bau_concurrent));
596 }
597 while (hmaster->uvhub_quiesce)
598 cpu_relax();
599
600 if (cpu < UV_CPUS_PER_ACT_STATUS) {
601 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
602 right_shift = cpu * UV_ACT_STATUS_SIZE;
603 } else {
604 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
605 right_shift =
606 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
607 }
608 time1 = get_cycles();
609 do {
610 if (try == 0) {
611 bau_desc->header.msg_type = MSG_REGULAR;
612 seq_number = bcp->message_number++;
613 } else {
614 bau_desc->header.msg_type = MSG_RETRY;
615 stat->s_retry_messages++;
616 }
617 bau_desc->header.sequence = seq_number;
618 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
619 bcp->uvhub_cpu;
620 bcp->send_message = get_cycles();
621 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
622 try++;
623 completion_status = uv_wait_completion(bau_desc, mmr_offset,
624 right_shift, this_cpu, bcp, smaster, try);
625
626 if (completion_status == FLUSH_RETRY_PLUGGED) {
627 destination_plugged(bau_desc, bcp, hmaster, stat);
628 } else if (completion_status == FLUSH_RETRY_TIMEOUT) {
629 destination_timeout(bau_desc, bcp, hmaster, stat);
630 }
631 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
632 bcp->ipi_attempts = 0;
633 completion_status = FLUSH_GIVEUP;
634 break;
635 }
636 cpu_relax();
637 } while ((completion_status == FLUSH_RETRY_PLUGGED) ||
638 (completion_status == FLUSH_RETRY_TIMEOUT));
639 time2 = get_cycles();
640 bcp->plugged_tries = 0;
641 bcp->timeout_tries = 0;
642 if ((completion_status == FLUSH_COMPLETE) &&
643 (bcp->conseccompletes > bcp->complete_threshold) &&
644 (hmaster->max_bau_concurrent <
645 hmaster->max_bau_concurrent_constant))
646 hmaster->max_bau_concurrent++;
647 while (hmaster->uvhub_quiesce)
648 cpu_relax();
649 atomic_dec(&hmaster->active_descriptor_count);
650 if (time2 > time1) {
651 elapsed = time2 - time1;
652 stat->s_time += elapsed;
653 if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
654 bcp->period_requests++;
655 bcp->period_time += elapsed;
656 if ((elapsed > congested_cycles) &&
657 (bcp->period_requests > bcp->congested_reps)) {
658 disable_for_congestion(bcp, stat);
659 }
660 }
661 } else
662 stat->s_requestor--;
663 if (completion_status == FLUSH_COMPLETE && try > 1)
664 stat->s_retriesok++;
665 else if (completion_status == FLUSH_GIVEUP) {
666 stat->s_giveup++;
667 return 1;
668 }
669 return 0;
670}
671
672/**
673 * uv_flush_tlb_others - globally purge translation cache of a virtual
674 * address or all TLB's
675 * @cpumask: mask of all cpu's in which the address is to be removed
676 * @mm: mm_struct containing virtual address range
677 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
678 * @cpu: the current cpu
679 *
680 * This is the entry point for initiating any UV global TLB shootdown.
681 *
682 * Purges the translation caches of all specified processors of the given
683 * virtual address, or purges all TLB's on specified processors.
684 *
685 * The caller has derived the cpumask from the mm_struct. This function
686 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
687 *
688 * The cpumask is converted into a uvhubmask of the uvhubs containing
689 * those cpus.
690 *
691 * Note that this function should be called with preemption disabled.
692 *
693 * Returns NULL if all remote flushing was done.
694 * Returns pointer to cpumask if some remote flushing remains to be
695 * done. The returned pointer is valid till preemption is re-enabled.
696 */
697const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
698 struct mm_struct *mm,
699 unsigned long va, unsigned int cpu)
700{
701 int tcpu;
702 int uvhub;
703 int locals = 0;
704 int remotes = 0;
705 int hubs = 0;
706 struct bau_desc *bau_desc;
707 struct cpumask *flush_mask;
708 struct ptc_stats *stat;
709 struct bau_control *bcp;
710 struct bau_control *tbcp;
711
712 /* kernel was booted 'nobau' */
713 if (nobau)
714 return cpumask;
715
716 bcp = &per_cpu(bau_control, cpu);
717 stat = bcp->statp;
718
719 /* bau was disabled due to slow response */
720 if (bcp->baudisabled) {
721 /* the cpu that disabled it must re-enable it */
722 if (bcp->set_bau_off) {
723 if (get_cycles() >= bcp->set_bau_on_time) {
724 stat->s_bau_reenabled++;
725 baudisabled = 0;
726 for_each_present_cpu(tcpu) {
727 tbcp = &per_cpu(bau_control, tcpu);
728 tbcp->baudisabled = 0;
729 tbcp->period_requests = 0;
730 tbcp->period_time = 0;
731 }
732 }
733 }
734 return cpumask;
735 }
736
737 /*
738 * Each sending cpu has a per-cpu mask which it fills from the caller's
739 * cpu mask. All cpus are converted to uvhubs and copied to the
740 * activation descriptor.
741 */
742 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
743 /* don't actually do a shootdown of the local cpu */
744 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
745 if (cpu_isset(cpu, *cpumask))
746 stat->s_ntargself++;
747
748 bau_desc = bcp->descriptor_base;
749 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
750 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
751
752 /* cpu statistics */
753 for_each_cpu(tcpu, flush_mask) {
754 uvhub = uv_cpu_to_blade_id(tcpu);
755 bau_uvhub_set(uvhub, &bau_desc->distribution);
756 if (uvhub == bcp->uvhub)
757 locals++;
758 else
759 remotes++;
760 }
761 if ((locals + remotes) == 0)
762 return NULL;
763 stat->s_requestor++;
764 stat->s_ntargcpu += remotes + locals;
765 stat->s_ntargremotes += remotes;
766 stat->s_ntarglocals += locals;
767 remotes = bau_uvhub_weight(&bau_desc->distribution);
768
769 /* uvhub statistics */
770 hubs = bau_uvhub_weight(&bau_desc->distribution);
771 if (locals) {
772 stat->s_ntarglocaluvhub++;
773 stat->s_ntargremoteuvhub += (hubs - 1);
774 } else
775 stat->s_ntargremoteuvhub += hubs;
776 stat->s_ntarguvhub += hubs;
777 if (hubs >= 16)
778 stat->s_ntarguvhub16++;
779 else if (hubs >= 8)
780 stat->s_ntarguvhub8++;
781 else if (hubs >= 4)
782 stat->s_ntarguvhub4++;
783 else if (hubs >= 2)
784 stat->s_ntarguvhub2++;
785 else
786 stat->s_ntarguvhub1++;
787
788 bau_desc->payload.address = va;
789 bau_desc->payload.sending_cpu = cpu;
790
791 /*
792 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
793 * or 1 if it gave up and the original cpumask should be returned.
794 */
795 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
796 return NULL;
797 else
798 return cpumask;
799}
800
801/*
802 * The BAU message interrupt comes here. (registered by set_intr_gate)
803 * See entry_64.S
804 *
805 * We received a broadcast assist message.
806 *
807 * Interrupts are disabled; this interrupt could represent
808 * the receipt of several messages.
809 *
810 * All cores/threads on this hub get this interrupt.
811 * The last one to see it does the software ack.
812 * (the resource will not be freed until noninterruptable cpus see this
813 * interrupt; hardware may timeout the s/w ack and reply ERROR)
814 */
815void uv_bau_message_interrupt(struct pt_regs *regs)
816{
817 int count = 0;
818 cycles_t time_start;
819 struct bau_payload_queue_entry *msg;
820 struct bau_control *bcp;
821 struct ptc_stats *stat;
822 struct msg_desc msgdesc;
823
824 time_start = get_cycles();
825 bcp = &per_cpu(bau_control, smp_processor_id());
826 stat = bcp->statp;
827 msgdesc.va_queue_first = bcp->va_queue_first;
828 msgdesc.va_queue_last = bcp->va_queue_last;
829 msg = bcp->bau_msg_head;
830 while (msg->sw_ack_vector) {
831 count++;
832 msgdesc.msg_slot = msg - msgdesc.va_queue_first;
833 msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1;
834 msgdesc.msg = msg;
835 uv_bau_process_message(&msgdesc, bcp);
836 msg++;
837 if (msg > msgdesc.va_queue_last)
838 msg = msgdesc.va_queue_first;
839 bcp->bau_msg_head = msg;
840 }
841 stat->d_time += (get_cycles() - time_start);
842 if (!count)
843 stat->d_nomsg++;
844 else if (count > 1)
845 stat->d_multmsg++;
846 ack_APIC_irq();
847}
848
849/*
850 * uv_enable_timeouts
851 *
852 * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
853 * shootdown message timeouts enabled. The timeout does not cause
854 * an interrupt, but causes an error message to be returned to
855 * the sender.
856 */
857static void uv_enable_timeouts(void)
858{
859 int uvhub;
860 int nuvhubs;
861 int pnode;
862 unsigned long mmr_image;
863
864 nuvhubs = uv_num_possible_blades();
865
866 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
867 if (!uv_blade_nr_possible_cpus(uvhub))
868 continue;
869
870 pnode = uv_blade_to_pnode(uvhub);
871 mmr_image =
872 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
873 /*
874 * Set the timeout period and then lock it in, in three
875 * steps; captures and locks in the period.
876 *
877 * To program the period, the SOFT_ACK_MODE must be off.
878 */
879 mmr_image &= ~((unsigned long)1 <<
880 UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
881 uv_write_global_mmr64
882 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
883 /*
884 * Set the 4-bit period.
885 */
886 mmr_image &= ~((unsigned long)0xf <<
887 UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
888 mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
889 UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
890 uv_write_global_mmr64
891 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
892 /*
893 * Subsequent reversals of the timebase bit (3) cause an
894 * immediate timeout of one or all INTD resources as
895 * indicated in bits 2:0 (7 causes all of them to timeout).
896 */
897 mmr_image |= ((unsigned long)1 <<
898 UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
899 uv_write_global_mmr64
900 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
901 }
902}
903
904static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
905{
906 if (*offset < num_possible_cpus())
907 return offset;
908 return NULL;
909}
910
911static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
912{
913 (*offset)++;
914 if (*offset < num_possible_cpus())
915 return offset;
916 return NULL;
917}
918
919static void uv_ptc_seq_stop(struct seq_file *file, void *data)
920{
921}
922
923static inline unsigned long long
924microsec_2_cycles(unsigned long microsec)
925{
926 unsigned long ns;
927 unsigned long long cyc;
928
929 ns = microsec * 1000;
930 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
931 return cyc;
932}
933
934/*
935 * Display the statistics thru /proc.
936 * 'data' points to the cpu number
937 */
938static int uv_ptc_seq_show(struct seq_file *file, void *data)
939{
940 struct ptc_stats *stat;
941 int cpu;
942
943 cpu = *(loff_t *)data;
944
945 if (!cpu) {
946 seq_printf(file,
947 "# cpu sent stime self locals remotes ncpus localhub ");
948 seq_printf(file,
949 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
950 seq_printf(file,
951 "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
952 seq_printf(file,
953 "retries rok resetp resett giveup sto bz throt ");
954 seq_printf(file,
955 "sw_ack recv rtime all ");
956 seq_printf(file,
957 "one mult none retry canc nocan reset rcan ");
958 seq_printf(file,
959 "disable enable\n");
960 }
961 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
962 stat = &per_cpu(ptcstats, cpu);
963 /* source side statistics */
964 seq_printf(file,
965 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
966 cpu, stat->s_requestor, cycles_2_us(stat->s_time),
967 stat->s_ntargself, stat->s_ntarglocals,
968 stat->s_ntargremotes, stat->s_ntargcpu,
969 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
970 stat->s_ntarguvhub, stat->s_ntarguvhub16);
971 seq_printf(file, "%ld %ld %ld %ld %ld ",
972 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
973 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
974 stat->s_dtimeout);
975 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
976 stat->s_retry_messages, stat->s_retriesok,
977 stat->s_resets_plug, stat->s_resets_timeout,
978 stat->s_giveup, stat->s_stimeout,
979 stat->s_busy, stat->s_throttles);
980
981 /* destination side statistics */
982 seq_printf(file,
983 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
984 uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
985 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
986 stat->d_requestee, cycles_2_us(stat->d_time),
987 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
988 stat->d_nomsg, stat->d_retries, stat->d_canceled,
989 stat->d_nocanceled, stat->d_resets,
990 stat->d_rcanceled);
991 seq_printf(file, "%ld %ld\n",
992 stat->s_bau_disabled, stat->s_bau_reenabled);
993 }
994
995 return 0;
996}
997
998/*
999 * Display the tunables thru debugfs
1000 */
1001static ssize_t tunables_read(struct file *file, char __user *userbuf,
1002 size_t count, loff_t *ppos)
1003{
1004 char buf[300];
1005 int ret;
1006
1007 ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
1008 "max_bau_concurrent plugged_delay plugsb4reset",
1009 "timeoutsb4reset ipi_reset_limit complete_threshold",
1010 "congested_response_us congested_reps congested_period",
1011 max_bau_concurrent, plugged_delay, plugsb4reset,
1012 timeoutsb4reset, ipi_reset_limit, complete_threshold,
1013 congested_response_us, congested_reps, congested_period);
1014
1015 return simple_read_from_buffer(userbuf, count, ppos, buf, ret);
1016}
1017
1018/*
1019 * -1: resetf the statistics
1020 * 0: display meaning of the statistics
1021 */
1022static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
1023 size_t count, loff_t *data)
1024{
1025 int cpu;
1026 long input_arg;
1027 char optstr[64];
1028 struct ptc_stats *stat;
1029
1030 if (count == 0 || count > sizeof(optstr))
1031 return -EINVAL;
1032 if (copy_from_user(optstr, user, count))
1033 return -EFAULT;
1034 optstr[count - 1] = '\0';
1035 if (strict_strtol(optstr, 10, &input_arg) < 0) {
1036 printk(KERN_DEBUG "%s is invalid\n", optstr);
1037 return -EINVAL;
1038 }
1039
1040 if (input_arg == 0) {
1041 printk(KERN_DEBUG "# cpu: cpu number\n");
1042 printk(KERN_DEBUG "Sender statistics:\n");
1043 printk(KERN_DEBUG
1044 "sent: number of shootdown messages sent\n");
1045 printk(KERN_DEBUG
1046 "stime: time spent sending messages\n");
1047 printk(KERN_DEBUG
1048 "numuvhubs: number of hubs targeted with shootdown\n");
1049 printk(KERN_DEBUG
1050 "numuvhubs16: number times 16 or more hubs targeted\n");
1051 printk(KERN_DEBUG
1052 "numuvhubs8: number times 8 or more hubs targeted\n");
1053 printk(KERN_DEBUG
1054 "numuvhubs4: number times 4 or more hubs targeted\n");
1055 printk(KERN_DEBUG
1056 "numuvhubs2: number times 2 or more hubs targeted\n");
1057 printk(KERN_DEBUG
1058 "numuvhubs1: number times 1 hub targeted\n");
1059 printk(KERN_DEBUG
1060 "numcpus: number of cpus targeted with shootdown\n");
1061 printk(KERN_DEBUG
1062 "dto: number of destination timeouts\n");
1063 printk(KERN_DEBUG
1064 "retries: destination timeout retries sent\n");
1065 printk(KERN_DEBUG
1066 "rok: : destination timeouts successfully retried\n");
1067 printk(KERN_DEBUG
1068 "resetp: ipi-style resource resets for plugs\n");
1069 printk(KERN_DEBUG
1070 "resett: ipi-style resource resets for timeouts\n");
1071 printk(KERN_DEBUG
1072 "giveup: fall-backs to ipi-style shootdowns\n");
1073 printk(KERN_DEBUG
1074 "sto: number of source timeouts\n");
1075 printk(KERN_DEBUG
1076 "bz: number of stay-busy's\n");
1077 printk(KERN_DEBUG
1078 "throt: number times spun in throttle\n");
1079 printk(KERN_DEBUG "Destination side statistics:\n");
1080 printk(KERN_DEBUG
1081 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
1082 printk(KERN_DEBUG
1083 "recv: shootdown messages received\n");
1084 printk(KERN_DEBUG
1085 "rtime: time spent processing messages\n");
1086 printk(KERN_DEBUG
1087 "all: shootdown all-tlb messages\n");
1088 printk(KERN_DEBUG
1089 "one: shootdown one-tlb messages\n");
1090 printk(KERN_DEBUG
1091 "mult: interrupts that found multiple messages\n");
1092 printk(KERN_DEBUG
1093 "none: interrupts that found no messages\n");
1094 printk(KERN_DEBUG
1095 "retry: number of retry messages processed\n");
1096 printk(KERN_DEBUG
1097 "canc: number messages canceled by retries\n");
1098 printk(KERN_DEBUG
1099 "nocan: number retries that found nothing to cancel\n");
1100 printk(KERN_DEBUG
1101 "reset: number of ipi-style reset requests processed\n");
1102 printk(KERN_DEBUG
1103 "rcan: number messages canceled by reset requests\n");
1104 printk(KERN_DEBUG
1105 "disable: number times use of the BAU was disabled\n");
1106 printk(KERN_DEBUG
1107 "enable: number times use of the BAU was re-enabled\n");
1108 } else if (input_arg == -1) {
1109 for_each_present_cpu(cpu) {
1110 stat = &per_cpu(ptcstats, cpu);
1111 memset(stat, 0, sizeof(struct ptc_stats));
1112 }
1113 }
1114
1115 return count;
1116}
1117
1118static int local_atoi(const char *name)
1119{
1120 int val = 0;
1121
1122 for (;; name++) {
1123 switch (*name) {
1124 case '0' ... '9':
1125 val = 10*val+(*name-'0');
1126 break;
1127 default:
1128 return val;
1129 }
1130 }
1131}
1132
1133/*
1134 * set the tunables
1135 * 0 values reset them to defaults
1136 */
1137static ssize_t tunables_write(struct file *file, const char __user *user,
1138 size_t count, loff_t *data)
1139{
1140 int cpu;
1141 int cnt = 0;
1142 int val;
1143 char *p;
1144 char *q;
1145 char instr[64];
1146 struct bau_control *bcp;
1147
1148 if (count == 0 || count > sizeof(instr)-1)
1149 return -EINVAL;
1150 if (copy_from_user(instr, user, count))
1151 return -EFAULT;
1152
1153 instr[count] = '\0';
1154 /* count the fields */
1155 p = instr + strspn(instr, WHITESPACE);
1156 q = p;
1157 for (; *p; p = q + strspn(q, WHITESPACE)) {
1158 q = p + strcspn(p, WHITESPACE);
1159 cnt++;
1160 if (q == p)
1161 break;
1162 }
1163 if (cnt != 9) {
1164 printk(KERN_INFO "bau tunable error: should be 9 numbers\n");
1165 return -EINVAL;
1166 }
1167
1168 p = instr + strspn(instr, WHITESPACE);
1169 q = p;
1170 for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
1171 q = p + strcspn(p, WHITESPACE);
1172 val = local_atoi(p);
1173 switch (cnt) {
1174 case 0:
1175 if (val == 0) {
1176 max_bau_concurrent = MAX_BAU_CONCURRENT;
1177 max_bau_concurrent_constant =
1178 MAX_BAU_CONCURRENT;
1179 continue;
1180 }
1181 bcp = &per_cpu(bau_control, smp_processor_id());
1182 if (val < 1 || val > bcp->cpus_in_uvhub) {
1183 printk(KERN_DEBUG
1184 "Error: BAU max concurrent %d is invalid\n",
1185 val);
1186 return -EINVAL;
1187 }
1188 max_bau_concurrent = val;
1189 max_bau_concurrent_constant = val;
1190 continue;
1191 case 1:
1192 if (val == 0)
1193 plugged_delay = PLUGGED_DELAY;
1194 else
1195 plugged_delay = val;
1196 continue;
1197 case 2:
1198 if (val == 0)
1199 plugsb4reset = PLUGSB4RESET;
1200 else
1201 plugsb4reset = val;
1202 continue;
1203 case 3:
1204 if (val == 0)
1205 timeoutsb4reset = TIMEOUTSB4RESET;
1206 else
1207 timeoutsb4reset = val;
1208 continue;
1209 case 4:
1210 if (val == 0)
1211 ipi_reset_limit = IPI_RESET_LIMIT;
1212 else
1213 ipi_reset_limit = val;
1214 continue;
1215 case 5:
1216 if (val == 0)
1217 complete_threshold = COMPLETE_THRESHOLD;
1218 else
1219 complete_threshold = val;
1220 continue;
1221 case 6:
1222 if (val == 0)
1223 congested_response_us = CONGESTED_RESPONSE_US;
1224 else
1225 congested_response_us = val;
1226 continue;
1227 case 7:
1228 if (val == 0)
1229 congested_reps = CONGESTED_REPS;
1230 else
1231 congested_reps = val;
1232 continue;
1233 case 8:
1234 if (val == 0)
1235 congested_period = CONGESTED_PERIOD;
1236 else
1237 congested_period = val;
1238 continue;
1239 }
1240 if (q == p)
1241 break;
1242 }
1243 for_each_present_cpu(cpu) {
1244 bcp = &per_cpu(bau_control, cpu);
1245 bcp->max_bau_concurrent = max_bau_concurrent;
1246 bcp->max_bau_concurrent_constant = max_bau_concurrent;
1247 bcp->plugged_delay = plugged_delay;
1248 bcp->plugsb4reset = plugsb4reset;
1249 bcp->timeoutsb4reset = timeoutsb4reset;
1250 bcp->ipi_reset_limit = ipi_reset_limit;
1251 bcp->complete_threshold = complete_threshold;
1252 bcp->congested_response_us = congested_response_us;
1253 bcp->congested_reps = congested_reps;
1254 bcp->congested_period = congested_period;
1255 }
1256 return count;
1257}
1258
1259static const struct seq_operations uv_ptc_seq_ops = {
1260 .start = uv_ptc_seq_start,
1261 .next = uv_ptc_seq_next,
1262 .stop = uv_ptc_seq_stop,
1263 .show = uv_ptc_seq_show
1264};
1265
1266static int uv_ptc_proc_open(struct inode *inode, struct file *file)
1267{
1268 return seq_open(file, &uv_ptc_seq_ops);
1269}
1270
1271static int tunables_open(struct inode *inode, struct file *file)
1272{
1273 return 0;
1274}
1275
1276static const struct file_operations proc_uv_ptc_operations = {
1277 .open = uv_ptc_proc_open,
1278 .read = seq_read,
1279 .write = uv_ptc_proc_write,
1280 .llseek = seq_lseek,
1281 .release = seq_release,
1282};
1283
1284static const struct file_operations tunables_fops = {
1285 .open = tunables_open,
1286 .read = tunables_read,
1287 .write = tunables_write,
1288};
1289
1290static int __init uv_ptc_init(void)
1291{
1292 struct proc_dir_entry *proc_uv_ptc;
1293
1294 if (!is_uv_system())
1295 return 0;
1296
1297 proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
1298 &proc_uv_ptc_operations);
1299 if (!proc_uv_ptc) {
1300 printk(KERN_ERR "unable to create %s proc entry\n",
1301 UV_PTC_BASENAME);
1302 return -EINVAL;
1303 }
1304
1305 tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
1306 if (!tunables_dir) {
1307 printk(KERN_ERR "unable to create debugfs directory %s\n",
1308 UV_BAU_TUNABLES_DIR);
1309 return -EINVAL;
1310 }
1311 tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
1312 tunables_dir, NULL, &tunables_fops);
1313 if (!tunables_file) {
1314 printk(KERN_ERR "unable to create debugfs file %s\n",
1315 UV_BAU_TUNABLES_FILE);
1316 return -EINVAL;
1317 }
1318 return 0;
1319}
1320
1321/*
1322 * initialize the sending side's sending buffers
1323 */
1324static void
1325uv_activation_descriptor_init(int node, int pnode)
1326{
1327 int i;
1328 int cpu;
1329 unsigned long pa;
1330 unsigned long m;
1331 unsigned long n;
1332 struct bau_desc *bau_desc;
1333 struct bau_desc *bd2;
1334 struct bau_control *bcp;
1335
1336 /*
1337 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1338 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
1339 */
1340 bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
1341 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
1342 BUG_ON(!bau_desc);
1343
1344 pa = uv_gpa(bau_desc); /* need the real nasid*/
1345 n = pa >> uv_nshift;
1346 m = pa & uv_mmask;
1347
1348 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1349 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1350
1351 /*
1352 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1353 * cpu even though we only use the first one; one descriptor can
1354 * describe a broadcast to 256 uv hubs.
1355 */
1356 for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
1357 i++, bd2++) {
1358 memset(bd2, 0, sizeof(struct bau_desc));
1359 bd2->header.sw_ack_flag = 1;
1360 /*
1361 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
1362 * in the partition. The bit map will indicate uvhub numbers,
1363 * which are 0-N in a partition. Pnodes are unique system-wide.
1364 */
1365 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
1366 bd2->header.dest_subnodeid = 0x10; /* the LB */
1367 bd2->header.command = UV_NET_ENDPOINT_INTD;
1368 bd2->header.int_both = 1;
1369 /*
1370 * all others need to be set to zero:
1371 * fairness chaining multilevel count replied_to
1372 */
1373 }
1374 for_each_present_cpu(cpu) {
1375 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
1376 continue;
1377 bcp = &per_cpu(bau_control, cpu);
1378 bcp->descriptor_base = bau_desc;
1379 }
1380}
1381
1382/*
1383 * initialize the destination side's receiving buffers
1384 * entered for each uvhub in the partition
1385 * - node is first node (kernel memory notion) on the uvhub
1386 * - pnode is the uvhub's physical identifier
1387 */
1388static void
1389uv_payload_queue_init(int node, int pnode)
1390{
1391 int pn;
1392 int cpu;
1393 char *cp;
1394 unsigned long pa;
1395 struct bau_payload_queue_entry *pqp;
1396 struct bau_payload_queue_entry *pqp_malloc;
1397 struct bau_control *bcp;
1398
1399 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
1400 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
1401 GFP_KERNEL, node);
1402 BUG_ON(!pqp);
1403 pqp_malloc = pqp;
1404
1405 cp = (char *)pqp + 31;
1406 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
1407
1408 for_each_present_cpu(cpu) {
1409 if (pnode != uv_cpu_to_pnode(cpu))
1410 continue;
1411 /* for every cpu on this pnode: */
1412 bcp = &per_cpu(bau_control, cpu);
1413 bcp->va_queue_first = pqp;
1414 bcp->bau_msg_head = pqp;
1415 bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
1416 }
1417 /*
1418 * need the pnode of where the memory was really allocated
1419 */
1420 pa = uv_gpa(pqp);
1421 pn = pa >> uv_nshift;
1422 uv_write_global_mmr64(pnode,
1423 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
1424 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
1425 uv_physnodeaddr(pqp));
1426 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
1427 uv_physnodeaddr(pqp));
1428 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
1429 (unsigned long)
1430 uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
1431 /* in effect, all msg_type's are set to MSG_NOOP */
1432 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
1433}
1434
1435/*
1436 * Initialization of each UV hub's structures
1437 */
1438static void __init uv_init_uvhub(int uvhub, int vector)
1439{
1440 int node;
1441 int pnode;
1442 unsigned long apicid;
1443
1444 node = uvhub_to_first_node(uvhub);
1445 pnode = uv_blade_to_pnode(uvhub);
1446 uv_activation_descriptor_init(node, pnode);
1447 uv_payload_queue_init(node, pnode);
1448 /*
1449 * the below initialization can't be in firmware because the
1450 * messaging IRQ will be determined by the OS
1451 */
1452 apicid = uvhub_to_first_apicid(uvhub);
1453 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1454 ((apicid << 32) | vector));
1455}
1456
1457/*
1458 * We will set BAU_MISC_CONTROL with a timeout period.
1459 * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
1460 * So the destination timeout period has be be calculated from them.
1461 */
1462static int
1463calculate_destination_timeout(void)
1464{
1465 unsigned long mmr_image;
1466 int mult1;
1467 int mult2;
1468 int index;
1469 int base;
1470 int ret;
1471 unsigned long ts_ns;
1472
1473 mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
1474 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
1475 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1476 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1477 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1478 base = timeout_base_ns[index];
1479 ts_ns = base * mult1 * mult2;
1480 ret = ts_ns / 1000;
1481 return ret;
1482}
1483
1484/*
1485 * initialize the bau_control structure for each cpu
1486 */
1487static void __init uv_init_per_cpu(int nuvhubs)
1488{
1489 int i;
1490 int cpu;
1491 int pnode;
1492 int uvhub;
1493 int have_hmaster;
1494 short socket = 0;
1495 unsigned short socket_mask;
1496 unsigned char *uvhub_mask;
1497 struct bau_control *bcp;
1498 struct uvhub_desc *bdp;
1499 struct socket_desc *sdp;
1500 struct bau_control *hmaster = NULL;
1501 struct bau_control *smaster = NULL;
1502 struct socket_desc {
1503 short num_cpus;
1504 short cpu_number[16];
1505 };
1506 struct uvhub_desc {
1507 unsigned short socket_mask;
1508 short num_cpus;
1509 short uvhub;
1510 short pnode;
1511 struct socket_desc socket[2];
1512 };
1513 struct uvhub_desc *uvhub_descs;
1514
1515 timeout_us = calculate_destination_timeout();
1516
1517 uvhub_descs = (struct uvhub_desc *)
1518 kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
1519 memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
1520 uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
1521 for_each_present_cpu(cpu) {
1522 bcp = &per_cpu(bau_control, cpu);
1523 memset(bcp, 0, sizeof(struct bau_control));
1524 pnode = uv_cpu_hub_info(cpu)->pnode;
1525 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1526 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1527 bdp = &uvhub_descs[uvhub];
1528 bdp->num_cpus++;
1529 bdp->uvhub = uvhub;
1530 bdp->pnode = pnode;
1531 /* kludge: 'assuming' one node per socket, and assuming that
1532 disabling a socket just leaves a gap in node numbers */
1533 socket = (cpu_to_node(cpu) & 1);
1534 bdp->socket_mask |= (1 << socket);
1535 sdp = &bdp->socket[socket];
1536 sdp->cpu_number[sdp->num_cpus] = cpu;
1537 sdp->num_cpus++;
1538 }
1539 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1540 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
1541 continue;
1542 have_hmaster = 0;
1543 bdp = &uvhub_descs[uvhub];
1544 socket_mask = bdp->socket_mask;
1545 socket = 0;
1546 while (socket_mask) {
1547 if (!(socket_mask & 1))
1548 goto nextsocket;
1549 sdp = &bdp->socket[socket];
1550 for (i = 0; i < sdp->num_cpus; i++) {
1551 cpu = sdp->cpu_number[i];
1552 bcp = &per_cpu(bau_control, cpu);
1553 bcp->cpu = cpu;
1554 if (i == 0) {
1555 smaster = bcp;
1556 if (!have_hmaster) {
1557 have_hmaster++;
1558 hmaster = bcp;
1559 }
1560 }
1561 bcp->cpus_in_uvhub = bdp->num_cpus;
1562 bcp->cpus_in_socket = sdp->num_cpus;
1563 bcp->socket_master = smaster;
1564 bcp->uvhub = bdp->uvhub;
1565 bcp->uvhub_master = hmaster;
1566 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
1567 blade_processor_id;
1568 }
1569nextsocket:
1570 socket++;
1571 socket_mask = (socket_mask >> 1);
1572 }
1573 }
1574 kfree(uvhub_descs);
1575 kfree(uvhub_mask);
1576 for_each_present_cpu(cpu) {
1577 bcp = &per_cpu(bau_control, cpu);
1578 bcp->baudisabled = 0;
1579 bcp->statp = &per_cpu(ptcstats, cpu);
1580 /* time interval to catch a hardware stay-busy bug */
1581 bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
1582 bcp->max_bau_concurrent = max_bau_concurrent;
1583 bcp->max_bau_concurrent_constant = max_bau_concurrent;
1584 bcp->plugged_delay = plugged_delay;
1585 bcp->plugsb4reset = plugsb4reset;
1586 bcp->timeoutsb4reset = timeoutsb4reset;
1587 bcp->ipi_reset_limit = ipi_reset_limit;
1588 bcp->complete_threshold = complete_threshold;
1589 bcp->congested_response_us = congested_response_us;
1590 bcp->congested_reps = congested_reps;
1591 bcp->congested_period = congested_period;
1592 }
1593}
1594
1595/*
1596 * Initialization of BAU-related structures
1597 */
1598static int __init uv_bau_init(void)
1599{
1600 int uvhub;
1601 int pnode;
1602 int nuvhubs;
1603 int cur_cpu;
1604 int vector;
1605 unsigned long mmr;
1606
1607 if (!is_uv_system())
1608 return 0;
1609
1610 if (nobau)
1611 return 0;
1612
1613 for_each_possible_cpu(cur_cpu)
1614 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
1615 GFP_KERNEL, cpu_to_node(cur_cpu));
1616
1617 uv_nshift = uv_hub_info->m_val;
1618 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
1619 nuvhubs = uv_num_possible_blades();
1620 spin_lock_init(&disable_lock);
1621 congested_cycles = microsec_2_cycles(congested_response_us);
1622
1623 uv_init_per_cpu(nuvhubs);
1624
1625 uv_partition_base_pnode = 0x7fffffff;
1626 for (uvhub = 0; uvhub < nuvhubs; uvhub++)
1627 if (uv_blade_nr_possible_cpus(uvhub) &&
1628 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1629 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1630
1631 vector = UV_BAU_MESSAGE;
1632 for_each_possible_blade(uvhub)
1633 if (uv_blade_nr_possible_cpus(uvhub))
1634 uv_init_uvhub(uvhub, vector);
1635
1636 uv_enable_timeouts();
1637 alloc_intr_gate(vector, uv_bau_message_intr1);
1638
1639 for_each_possible_blade(uvhub) {
1640 if (uv_blade_nr_possible_cpus(uvhub)) {
1641 pnode = uv_blade_to_pnode(uvhub);
1642 /* INIT the bau */
1643 uv_write_global_mmr64(pnode,
1644 UVH_LB_BAU_SB_ACTIVATION_CONTROL,
1645 ((unsigned long)1 << 63));
1646 mmr = 1; /* should be 1 to broadcast to both sockets */
1647 uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST,
1648 mmr);
1649 }
1650 }
1651
1652 return 0;
1653}
1654core_initcall(uv_bau_init);
1655fs_initcall(uv_ptc_init);
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 4c3da5674e67..a375616d77f7 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -38,19 +38,3 @@ unsigned long __trampinit setup_trampoline(void)
38 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 38 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
39 return virt_to_phys(trampoline_base); 39 return virt_to_phys(trampoline_base);
40} 40}
41
42void __init setup_trampoline_page_table(void)
43{
44#ifdef CONFIG_X86_32
45 /* Copy kernel address range */
46 clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
47 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
48 KERNEL_PGD_PTRS);
49
50 /* Initialize low mappings */
51 clone_pgd_range(trampoline_pg_dir,
52 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
53 min_t(unsigned long, KERNEL_PGD_PTRS,
54 KERNEL_PGD_BOUNDARY));
55#endif
56}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d43968503dd2..cb838ca42c96 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -575,6 +575,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
575 if (regs->flags & X86_VM_MASK) { 575 if (regs->flags & X86_VM_MASK) {
576 handle_vm86_trap((struct kernel_vm86_regs *) regs, 576 handle_vm86_trap((struct kernel_vm86_regs *) regs,
577 error_code, 1); 577 error_code, 1);
578 preempt_conditional_cli(regs);
578 return; 579 return;
579 } 580 }
580 581
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
deleted file mode 100644
index 7b24460917d5..000000000000
--- a/arch/x86/kernel/uv_irq.c
+++ /dev/null
@@ -1,285 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ functions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/module.h>
12#include <linux/rbtree.h>
13#include <linux/slab.h>
14#include <linux/irq.h>
15
16#include <asm/apic.h>
17#include <asm/uv/uv_irq.h>
18#include <asm/uv/uv_hub.h>
19
20/* MMR offset and pnode of hub sourcing interrupts for a given irq */
21struct uv_irq_2_mmr_pnode{
22 struct rb_node list;
23 unsigned long offset;
24 int pnode;
25 int irq;
26};
27
28static spinlock_t uv_irq_lock;
29static struct rb_root uv_irq_root;
30
31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
32
33static void uv_noop(struct irq_data *data) { }
34
35static void uv_ack_apic(struct irq_data *data)
36{
37 ack_APIC_irq();
38}
39
40static struct irq_chip uv_irq_chip = {
41 .name = "UV-CORE",
42 .irq_mask = uv_noop,
43 .irq_unmask = uv_noop,
44 .irq_eoi = uv_ack_apic,
45 .irq_set_affinity = uv_set_irq_affinity,
46};
47
48/*
49 * Add offset and pnode information of the hub sourcing interrupts to the
50 * rb tree for a specific irq.
51 */
52static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
53{
54 struct rb_node **link = &uv_irq_root.rb_node;
55 struct rb_node *parent = NULL;
56 struct uv_irq_2_mmr_pnode *n;
57 struct uv_irq_2_mmr_pnode *e;
58 unsigned long irqflags;
59
60 n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
61 uv_blade_to_memory_nid(blade));
62 if (!n)
63 return -ENOMEM;
64
65 n->irq = irq;
66 n->offset = offset;
67 n->pnode = uv_blade_to_pnode(blade);
68 spin_lock_irqsave(&uv_irq_lock, irqflags);
69 /* Find the right place in the rbtree: */
70 while (*link) {
71 parent = *link;
72 e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
73
74 if (unlikely(irq == e->irq)) {
75 /* irq entry exists */
76 e->pnode = uv_blade_to_pnode(blade);
77 e->offset = offset;
78 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
79 kfree(n);
80 return 0;
81 }
82
83 if (irq < e->irq)
84 link = &(*link)->rb_left;
85 else
86 link = &(*link)->rb_right;
87 }
88
89 /* Insert the node into the rbtree. */
90 rb_link_node(&n->list, parent, link);
91 rb_insert_color(&n->list, &uv_irq_root);
92
93 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
94 return 0;
95}
96
97/* Retrieve offset and pnode information from the rb tree for a specific irq */
98int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
99{
100 struct uv_irq_2_mmr_pnode *e;
101 struct rb_node *n;
102 unsigned long irqflags;
103
104 spin_lock_irqsave(&uv_irq_lock, irqflags);
105 n = uv_irq_root.rb_node;
106 while (n) {
107 e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
108
109 if (e->irq == irq) {
110 *offset = e->offset;
111 *pnode = e->pnode;
112 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
113 return 0;
114 }
115
116 if (irq < e->irq)
117 n = n->rb_left;
118 else
119 n = n->rb_right;
120 }
121 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
122 return -1;
123}
124
125/*
126 * Re-target the irq to the specified CPU and enable the specified MMR located
127 * on the specified blade to allow the sending of MSIs to the specified CPU.
128 */
129static int
130arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
131 unsigned long mmr_offset, int limit)
132{
133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
134 struct irq_cfg *cfg = get_irq_chip_data(irq);
135 unsigned long mmr_value;
136 struct uv_IO_APIC_route_entry *entry;
137 int mmr_pnode, err;
138
139 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
140 sizeof(unsigned long));
141
142 err = assign_irq_vector(irq, cfg, eligible_cpu);
143 if (err != 0)
144 return err;
145
146 if (limit == UV_AFFINITY_CPU)
147 irq_set_status_flags(irq, IRQ_NO_BALANCING);
148 else
149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
150
151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
152 irq_name);
153
154 mmr_value = 0;
155 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
156 entry->vector = cfg->vector;
157 entry->delivery_mode = apic->irq_delivery_mode;
158 entry->dest_mode = apic->irq_dest_mode;
159 entry->polarity = 0;
160 entry->trigger = 0;
161 entry->mask = 0;
162 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
163
164 mmr_pnode = uv_blade_to_pnode(mmr_blade);
165 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
166
167 if (cfg->move_in_progress)
168 send_cleanup_vector(cfg);
169
170 return irq;
171}
172
173/*
174 * Disable the specified MMR located on the specified blade so that MSIs are
175 * longer allowed to be sent.
176 */
177static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
178{
179 unsigned long mmr_value;
180 struct uv_IO_APIC_route_entry *entry;
181
182 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
183 sizeof(unsigned long));
184
185 mmr_value = 0;
186 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
187 entry->mask = 1;
188
189 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
190}
191
192static int
193uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
194 bool force)
195{
196 struct irq_cfg *cfg = data->chip_data;
197 unsigned int dest;
198 unsigned long mmr_value, mmr_offset;
199 struct uv_IO_APIC_route_entry *entry;
200 int mmr_pnode;
201
202 if (__ioapic_set_affinity(data, mask, &dest))
203 return -1;
204
205 mmr_value = 0;
206 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
207
208 entry->vector = cfg->vector;
209 entry->delivery_mode = apic->irq_delivery_mode;
210 entry->dest_mode = apic->irq_dest_mode;
211 entry->polarity = 0;
212 entry->trigger = 0;
213 entry->mask = 0;
214 entry->dest = dest;
215
216 /* Get previously stored MMR and pnode of hub sourcing interrupts */
217 if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
218 return -1;
219
220 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
221
222 if (cfg->move_in_progress)
223 send_cleanup_vector(cfg);
224
225 return 0;
226}
227
228/*
229 * Set up a mapping of an available irq and vector, and enable the specified
230 * MMR that defines the MSI that is to be sent to the specified CPU when an
231 * interrupt is raised.
232 */
233int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
234 unsigned long mmr_offset, int limit)
235{
236 int irq, ret;
237
238 irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
239
240 if (irq <= 0)
241 return -EBUSY;
242
243 ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
244 limit);
245 if (ret == irq)
246 uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
247 else
248 destroy_irq(irq);
249
250 return ret;
251}
252EXPORT_SYMBOL_GPL(uv_setup_irq);
253
254/*
255 * Tear down a mapping of an irq and vector, and disable the specified MMR that
256 * defined the MSI that was to be sent to the specified CPU when an interrupt
257 * was raised.
258 *
259 * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
260 */
261void uv_teardown_irq(unsigned int irq)
262{
263 struct uv_irq_2_mmr_pnode *e;
264 struct rb_node *n;
265 unsigned long irqflags;
266
267 spin_lock_irqsave(&uv_irq_lock, irqflags);
268 n = uv_irq_root.rb_node;
269 while (n) {
270 e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
271 if (e->irq == irq) {
272 arch_disable_uv_irq(e->pnode, e->offset);
273 rb_erase(n, &uv_irq_root);
274 kfree(e);
275 break;
276 }
277 if (irq < e->irq)
278 n = n->rb_left;
279 else
280 n = n->rb_right;
281 }
282 spin_unlock_irqrestore(&uv_irq_lock, irqflags);
283 destroy_irq(irq);
284}
285EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
deleted file mode 100644
index 309c70fb7759..000000000000
--- a/arch/x86/kernel/uv_sysfs.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/*
2 * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
20 */
21
22#include <linux/sysdev.h>
23#include <asm/uv/bios.h>
24#include <asm/uv/uv.h>
25
26struct kobject *sgi_uv_kobj;
27
28static ssize_t partition_id_show(struct kobject *kobj,
29 struct kobj_attribute *attr, char *buf)
30{
31 return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
32}
33
34static ssize_t coherence_id_show(struct kobject *kobj,
35 struct kobj_attribute *attr, char *buf)
36{
37 return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
38}
39
40static struct kobj_attribute partition_id_attr =
41 __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
42
43static struct kobj_attribute coherence_id_attr =
44 __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
45
46
47static int __init sgi_uv_sysfs_init(void)
48{
49 unsigned long ret;
50
51 if (!is_uv_system())
52 return -ENODEV;
53
54 if (!sgi_uv_kobj)
55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
56 if (!sgi_uv_kobj) {
57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
58 return -EINVAL;
59 }
60
61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
62 if (ret) {
63 printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
64 return ret;
65 }
66
67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
68 if (ret) {
69 printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
70 return ret;
71 }
72
73 return 0;
74}
75
76device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
deleted file mode 100644
index 56e421bc379b..000000000000
--- a/arch/x86/kernel/uv_time.c
+++ /dev/null
@@ -1,423 +0,0 @@
1/*
2 * SGI RTC clock/timer routines.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Dimitri Sivanich
20 */
21#include <linux/clockchips.h>
22#include <linux/slab.h>
23
24#include <asm/uv/uv_mmrs.h>
25#include <asm/uv/uv_hub.h>
26#include <asm/uv/bios.h>
27#include <asm/uv/uv.h>
28#include <asm/apic.h>
29#include <asm/cpu.h>
30
31#define RTC_NAME "sgi_rtc"
32
33static cycle_t uv_read_rtc(struct clocksource *cs);
34static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
35static void uv_rtc_timer_setup(enum clock_event_mode,
36 struct clock_event_device *);
37
38static struct clocksource clocksource_uv = {
39 .name = RTC_NAME,
40 .rating = 400,
41 .read = uv_read_rtc,
42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
43 .shift = 10,
44 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
45};
46
47static struct clock_event_device clock_event_device_uv = {
48 .name = RTC_NAME,
49 .features = CLOCK_EVT_FEAT_ONESHOT,
50 .shift = 20,
51 .rating = 400,
52 .irq = -1,
53 .set_next_event = uv_rtc_next_event,
54 .set_mode = uv_rtc_timer_setup,
55 .event_handler = NULL,
56};
57
58static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
59
60/* There is one of these allocated per node */
61struct uv_rtc_timer_head {
62 spinlock_t lock;
63 /* next cpu waiting for timer, local node relative: */
64 int next_cpu;
65 /* number of cpus on this node: */
66 int ncpus;
67 struct {
68 int lcpu; /* systemwide logical cpu number */
69 u64 expires; /* next timer expiration for this cpu */
70 } cpu[1];
71};
72
73/*
74 * Access to uv_rtc_timer_head via blade id.
75 */
76static struct uv_rtc_timer_head **blade_info __read_mostly;
77
78static int uv_rtc_evt_enable;
79
80/*
81 * Hardware interface routines
82 */
83
84/* Send IPIs to another node */
85static void uv_rtc_send_IPI(int cpu)
86{
87 unsigned long apicid, val;
88 int pnode;
89
90 apicid = cpu_physical_id(cpu);
91 pnode = uv_apicid_to_pnode(apicid);
92 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
93 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
94 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
95
96 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
97}
98
99/* Check for an RTC interrupt pending */
100static int uv_intr_pending(int pnode)
101{
102 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
103 UVH_EVENT_OCCURRED0_RTC1_MASK;
104}
105
106/* Setup interrupt and return non-zero if early expiration occurred. */
107static int uv_setup_intr(int cpu, u64 expires)
108{
109 u64 val;
110 int pnode = uv_cpu_to_pnode(cpu);
111
112 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
113 UVH_RTC1_INT_CONFIG_M_MASK);
114 uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
115
116 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
117 UVH_EVENT_OCCURRED0_RTC1_MASK);
118
119 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
120 ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
121
122 /* Set configuration */
123 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
124 /* Initialize comparator value */
125 uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
126
127 if (uv_read_rtc(NULL) <= expires)
128 return 0;
129
130 return !uv_intr_pending(pnode);
131}
132
133/*
134 * Per-cpu timer tracking routines
135 */
136
137static __init void uv_rtc_deallocate_timers(void)
138{
139 int bid;
140
141 for_each_possible_blade(bid) {
142 kfree(blade_info[bid]);
143 }
144 kfree(blade_info);
145}
146
147/* Allocate per-node list of cpu timer expiration times. */
148static __init int uv_rtc_allocate_timers(void)
149{
150 int cpu;
151
152 blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
153 if (!blade_info)
154 return -ENOMEM;
155 memset(blade_info, 0, uv_possible_blades * sizeof(void *));
156
157 for_each_present_cpu(cpu) {
158 int nid = cpu_to_node(cpu);
159 int bid = uv_cpu_to_blade_id(cpu);
160 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
161 struct uv_rtc_timer_head *head = blade_info[bid];
162
163 if (!head) {
164 head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
165 (uv_blade_nr_possible_cpus(bid) *
166 2 * sizeof(u64)),
167 GFP_KERNEL, nid);
168 if (!head) {
169 uv_rtc_deallocate_timers();
170 return -ENOMEM;
171 }
172 spin_lock_init(&head->lock);
173 head->ncpus = uv_blade_nr_possible_cpus(bid);
174 head->next_cpu = -1;
175 blade_info[bid] = head;
176 }
177
178 head->cpu[bcpu].lcpu = cpu;
179 head->cpu[bcpu].expires = ULLONG_MAX;
180 }
181
182 return 0;
183}
184
185/* Find and set the next expiring timer. */
186static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
187{
188 u64 lowest = ULLONG_MAX;
189 int c, bcpu = -1;
190
191 head->next_cpu = -1;
192 for (c = 0; c < head->ncpus; c++) {
193 u64 exp = head->cpu[c].expires;
194 if (exp < lowest) {
195 bcpu = c;
196 lowest = exp;
197 }
198 }
199 if (bcpu >= 0) {
200 head->next_cpu = bcpu;
201 c = head->cpu[bcpu].lcpu;
202 if (uv_setup_intr(c, lowest))
203 /* If we didn't set it up in time, trigger */
204 uv_rtc_send_IPI(c);
205 } else {
206 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
207 UVH_RTC1_INT_CONFIG_M_MASK);
208 }
209}
210
211/*
212 * Set expiration time for current cpu.
213 *
214 * Returns 1 if we missed the expiration time.
215 */
216static int uv_rtc_set_timer(int cpu, u64 expires)
217{
218 int pnode = uv_cpu_to_pnode(cpu);
219 int bid = uv_cpu_to_blade_id(cpu);
220 struct uv_rtc_timer_head *head = blade_info[bid];
221 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
222 u64 *t = &head->cpu[bcpu].expires;
223 unsigned long flags;
224 int next_cpu;
225
226 spin_lock_irqsave(&head->lock, flags);
227
228 next_cpu = head->next_cpu;
229 *t = expires;
230
231 /* Will this one be next to go off? */
232 if (next_cpu < 0 || bcpu == next_cpu ||
233 expires < head->cpu[next_cpu].expires) {
234 head->next_cpu = bcpu;
235 if (uv_setup_intr(cpu, expires)) {
236 *t = ULLONG_MAX;
237 uv_rtc_find_next_timer(head, pnode);
238 spin_unlock_irqrestore(&head->lock, flags);
239 return -ETIME;
240 }
241 }
242
243 spin_unlock_irqrestore(&head->lock, flags);
244 return 0;
245}
246
247/*
248 * Unset expiration time for current cpu.
249 *
250 * Returns 1 if this timer was pending.
251 */
252static int uv_rtc_unset_timer(int cpu, int force)
253{
254 int pnode = uv_cpu_to_pnode(cpu);
255 int bid = uv_cpu_to_blade_id(cpu);
256 struct uv_rtc_timer_head *head = blade_info[bid];
257 int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
258 u64 *t = &head->cpu[bcpu].expires;
259 unsigned long flags;
260 int rc = 0;
261
262 spin_lock_irqsave(&head->lock, flags);
263
264 if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
265 rc = 1;
266
267 if (rc) {
268 *t = ULLONG_MAX;
269 /* Was the hardware setup for this timer? */
270 if (head->next_cpu == bcpu)
271 uv_rtc_find_next_timer(head, pnode);
272 }
273
274 spin_unlock_irqrestore(&head->lock, flags);
275
276 return rc;
277}
278
279
280/*
281 * Kernel interface routines.
282 */
283
284/*
285 * Read the RTC.
286 *
287 * Starting with HUB rev 2.0, the UV RTC register is replicated across all
288 * cachelines of it's own page. This allows faster simultaneous reads
289 * from a given socket.
290 */
291static cycle_t uv_read_rtc(struct clocksource *cs)
292{
293 unsigned long offset;
294
295 if (uv_get_min_hub_revision_id() == 1)
296 offset = 0;
297 else
298 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
299
300 return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
301}
302
303/*
304 * Program the next event, relative to now
305 */
306static int uv_rtc_next_event(unsigned long delta,
307 struct clock_event_device *ced)
308{
309 int ced_cpu = cpumask_first(ced->cpumask);
310
311 return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
312}
313
314/*
315 * Setup the RTC timer in oneshot mode
316 */
317static void uv_rtc_timer_setup(enum clock_event_mode mode,
318 struct clock_event_device *evt)
319{
320 int ced_cpu = cpumask_first(evt->cpumask);
321
322 switch (mode) {
323 case CLOCK_EVT_MODE_PERIODIC:
324 case CLOCK_EVT_MODE_ONESHOT:
325 case CLOCK_EVT_MODE_RESUME:
326 /* Nothing to do here yet */
327 break;
328 case CLOCK_EVT_MODE_UNUSED:
329 case CLOCK_EVT_MODE_SHUTDOWN:
330 uv_rtc_unset_timer(ced_cpu, 1);
331 break;
332 }
333}
334
335static void uv_rtc_interrupt(void)
336{
337 int cpu = smp_processor_id();
338 struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
339
340 if (!ced || !ced->event_handler)
341 return;
342
343 if (uv_rtc_unset_timer(cpu, 0) != 1)
344 return;
345
346 ced->event_handler(ced);
347}
348
349static int __init uv_enable_evt_rtc(char *str)
350{
351 uv_rtc_evt_enable = 1;
352
353 return 1;
354}
355__setup("uvrtcevt", uv_enable_evt_rtc);
356
357static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
358{
359 struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
360
361 *ced = clock_event_device_uv;
362 ced->cpumask = cpumask_of(smp_processor_id());
363 clockevents_register_device(ced);
364}
365
366static __init int uv_rtc_setup_clock(void)
367{
368 int rc;
369
370 if (!is_uv_system())
371 return -ENODEV;
372
373 clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
374 clocksource_uv.shift);
375
376 /* If single blade, prefer tsc */
377 if (uv_num_possible_blades() == 1)
378 clocksource_uv.rating = 250;
379
380 rc = clocksource_register(&clocksource_uv);
381 if (rc)
382 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
383 else
384 printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
385 sn_rtc_cycles_per_second/(unsigned long)1E6);
386
387 if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
388 return rc;
389
390 /* Setup and register clockevents */
391 rc = uv_rtc_allocate_timers();
392 if (rc)
393 goto error;
394
395 x86_platform_ipi_callback = uv_rtc_interrupt;
396
397 clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
398 NSEC_PER_SEC, clock_event_device_uv.shift);
399
400 clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
401 sn_rtc_cycles_per_second;
402
403 clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
404 (NSEC_PER_SEC / sn_rtc_cycles_per_second);
405
406 rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
407 if (rc) {
408 x86_platform_ipi_callback = NULL;
409 uv_rtc_deallocate_timers();
410 goto error;
411 }
412
413 printk(KERN_INFO "UV RTC clockevents registered\n");
414
415 return 0;
416
417error:
418 clocksource_unregister(&clocksource_uv);
419 printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
420
421 return rc;
422}
423arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
deleted file mode 100644
index 3371bd053b89..000000000000
--- a/arch/x86/kernel/visws_quirks.c
+++ /dev/null
@@ -1,614 +0,0 @@
1/*
2 * SGI Visual Workstation support and quirks, unmaintained.
3 *
4 * Split out from setup.c by davej@suse.de
5 *
6 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
7 *
8 * SGI Visual Workstation interrupt controller
9 *
10 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
11 * which serves as the main interrupt controller in the system. Non-legacy
12 * hardware in the system uses this controller directly. Legacy devices
13 * are connected to the PIIX4 which in turn has its 8259(s) connected to
14 * a of the Cobalt APIC entry.
15 *
16 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
17 *
18 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
19 */
20#include <linux/interrupt.h>
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/smp.h>
24
25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h>
27#include <asm/io_apic.h>
28#include <asm/fixmap.h>
29#include <asm/reboot.h>
30#include <asm/setup.h>
31#include <asm/apic.h>
32#include <asm/e820.h>
33#include <asm/time.h>
34#include <asm/io.h>
35
36#include <linux/kernel_stat.h>
37
38#include <asm/i8259.h>
39#include <asm/irq_vectors.h>
40#include <asm/visws/lithium.h>
41
42#include <linux/sched.h>
43#include <linux/kernel.h>
44#include <linux/pci.h>
45#include <linux/pci_ids.h>
46
47extern int no_broadcast;
48
49char visws_board_type = -1;
50char visws_board_rev = -1;
51
52static void __init visws_time_init(void)
53{
54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
55
56 /* Set the countdown value */
57 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
58
59 /* Start the timer */
60 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
61
62 /* Enable (unmask) the timer interrupt */
63 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
64
65 setup_default_timer_irq();
66}
67
68/* Replaces the default init_ISA_irqs in the generic setup */
69static void __init visws_pre_intr_init(void);
70
71/* Quirk for machine specific memory setup. */
72
73#define MB (1024 * 1024)
74
75unsigned long sgivwfb_mem_phys;
76unsigned long sgivwfb_mem_size;
77EXPORT_SYMBOL(sgivwfb_mem_phys);
78EXPORT_SYMBOL(sgivwfb_mem_size);
79
80long long mem_size __initdata = 0;
81
82static char * __init visws_memory_setup(void)
83{
84 long long gfx_mem_size = 8 * MB;
85
86 mem_size = boot_params.alt_mem_k;
87
88 if (!mem_size) {
89 printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
90 mem_size = 128 * MB;
91 }
92
93 /*
94 * this hardcodes the graphics memory to 8 MB
95 * it really should be sized dynamically (or at least
96 * set as a boot param)
97 */
98 if (!sgivwfb_mem_size) {
99 printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
100 sgivwfb_mem_size = 8 * MB;
101 }
102
103 /*
104 * Trim to nearest MB
105 */
106 sgivwfb_mem_size &= ~((1 << 20) - 1);
107 sgivwfb_mem_phys = mem_size - gfx_mem_size;
108
109 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
110 e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
111 e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
112
113 return "PROM";
114}
115
116static void visws_machine_emergency_restart(void)
117{
118 /*
119 * Visual Workstations restart after this
120 * register is poked on the PIIX4
121 */
122 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
123}
124
125static void visws_machine_power_off(void)
126{
127 unsigned short pm_status;
128/* extern unsigned int pci_bus0; */
129
130 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
131 outw(pm_status, PMSTS_PORT);
132
133 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
134
135 mdelay(10);
136
137#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
138 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
139
140/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
141 outl(PIIX_SPECIAL_STOP, 0xCFC);
142}
143
144static void __init visws_get_smp_config(unsigned int early)
145{
146}
147
148/*
149 * The Visual Workstation is Intel MP compliant in the hardware
150 * sense, but it doesn't have a BIOS(-configuration table).
151 * No problem for Linux.
152 */
153
154static void __init MP_processor_info(struct mpc_cpu *m)
155{
156 int ver, logical_apicid;
157 physid_mask_t apic_cpus;
158
159 if (!(m->cpuflag & CPU_ENABLED))
160 return;
161
162 logical_apicid = m->apicid;
163 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
164 m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
165 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
166 (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
167
168 if (m->cpuflag & CPU_BOOTPROCESSOR)
169 boot_cpu_physical_apicid = m->apicid;
170
171 ver = m->apicver;
172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
174 m->apicid, MAX_APICS);
175 return;
176 }
177
178 apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
179 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
180 /*
181 * Validate version
182 */
183 if (ver == 0x0) {
184 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
185 "fixing up to 0x10. (tell your hw vendor)\n",
186 m->apicid);
187 ver = 0x10;
188 }
189 apic_version[m->apicid] = ver;
190}
191
192static void __init visws_find_smp_config(void)
193{
194 struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
195 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
196
197 if (ncpus > CO_CPU_MAX) {
198 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
199 ncpus, mp);
200
201 ncpus = CO_CPU_MAX;
202 }
203
204 if (ncpus > setup_max_cpus)
205 ncpus = setup_max_cpus;
206
207#ifdef CONFIG_X86_LOCAL_APIC
208 smp_found_config = 1;
209#endif
210 while (ncpus--)
211 MP_processor_info(mp++);
212
213 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
214}
215
216static void visws_trap_init(void);
217
218void __init visws_early_detect(void)
219{
220 int raw;
221
222 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
223 >> PIIX_GPI_BD_SHIFT;
224
225 if (visws_board_type < 0)
226 return;
227
228 /*
229 * Override the default platform setup functions
230 */
231 x86_init.resources.memory_setup = visws_memory_setup;
232 x86_init.mpparse.get_smp_config = visws_get_smp_config;
233 x86_init.mpparse.find_smp_config = visws_find_smp_config;
234 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
235 x86_init.irqs.trap_init = visws_trap_init;
236 x86_init.timers.timer_init = visws_time_init;
237 x86_init.pci.init = pci_visws_init;
238 x86_init.pci.init_irq = x86_init_noop;
239
240 /*
241 * Install reboot quirks:
242 */
243 pm_power_off = visws_machine_power_off;
244 machine_ops.emergency_restart = visws_machine_emergency_restart;
245
246 /*
247 * Do not use broadcast IPIs:
248 */
249 no_broadcast = 0;
250
251#ifdef CONFIG_X86_IO_APIC
252 /*
253 * Turn off IO-APIC detection and initialization:
254 */
255 skip_ioapic_setup = 1;
256#endif
257
258 /*
259 * Get Board rev.
260 * First, we have to initialize the 307 part to allow us access
261 * to the GPIO registers. Let's map them at 0x0fc0 which is right
262 * after the PIIX4 PM section.
263 */
264 outb_p(SIO_DEV_SEL, SIO_INDEX);
265 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
266
267 outb_p(SIO_DEV_MSB, SIO_INDEX);
268 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
269
270 outb_p(SIO_DEV_LSB, SIO_INDEX);
271 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
272
273 outb_p(SIO_DEV_ENB, SIO_INDEX);
274 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
275
276 /*
277 * Now, we have to map the power management section to write
278 * a bit which enables access to the GPIO registers.
279 * What lunatic came up with this shit?
280 */
281 outb_p(SIO_DEV_SEL, SIO_INDEX);
282 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
283
284 outb_p(SIO_DEV_MSB, SIO_INDEX);
285 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
286
287 outb_p(SIO_DEV_LSB, SIO_INDEX);
288 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
289
290 outb_p(SIO_DEV_ENB, SIO_INDEX);
291 outb_p(1, SIO_DATA); /* Enable PM registers. */
292
293 /*
294 * Now, write the PM register which enables the GPIO registers.
295 */
296 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
297 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
298
299 /*
300 * Now, initialize the GPIO registers.
301 * We want them all to be inputs which is the
302 * power on default, so let's leave them alone.
303 * So, let's just read the board rev!
304 */
305 raw = inb_p(SIO_GP_DATA1);
306 raw &= 0x7f; /* 7 bits of valid board revision ID. */
307
308 if (visws_board_type == VISWS_320) {
309 if (raw < 0x6) {
310 visws_board_rev = 4;
311 } else if (raw < 0xc) {
312 visws_board_rev = 5;
313 } else {
314 visws_board_rev = 6;
315 }
316 } else if (visws_board_type == VISWS_540) {
317 visws_board_rev = 2;
318 } else {
319 visws_board_rev = raw;
320 }
321
322 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
323 (visws_board_type == VISWS_320 ? "320" :
324 (visws_board_type == VISWS_540 ? "540" :
325 "unknown")), visws_board_rev);
326}
327
328#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
329#define BCD (LI_INTB | LI_INTC | LI_INTD)
330#define ALLDEVS (A01234 | BCD)
331
332static __init void lithium_init(void)
333{
334 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
335 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
336
337 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
338 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
339 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
340/* panic("This machine is not SGI Visual Workstation 320/540"); */
341 }
342
343 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
344 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
345 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
346/* panic("This machine is not SGI Visual Workstation 320/540"); */
347 }
348
349 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
350 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
351}
352
353static __init void cobalt_init(void)
354{
355 /*
356 * On normal SMP PC this is used only with SMP, but we have to
357 * use it and set it up here to start the Cobalt clock
358 */
359 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
360 setup_local_APIC();
361 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
362 (unsigned int)apic_read(APIC_LVR),
363 (unsigned int)apic_read(APIC_ID));
364
365 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
366 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
367 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
368 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
369
370 /* Enable Cobalt APIC being careful to NOT change the ID! */
371 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
372
373 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
374 co_apic_read(CO_APIC_ID));
375}
376
377static void __init visws_trap_init(void)
378{
379 lithium_init();
380 cobalt_init();
381}
382
383/*
384 * IRQ controller / APIC support:
385 */
386
387static DEFINE_SPINLOCK(cobalt_lock);
388
389/*
390 * Set the given Cobalt APIC Redirection Table entry to point
391 * to the given IDT vector/index.
392 */
393static inline void co_apic_set(int entry, int irq)
394{
395 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
396 co_apic_write(CO_APIC_HI(entry), 0);
397}
398
399/*
400 * Cobalt (IO)-APIC functions to handle PCI devices.
401 */
402static inline int co_apic_ide0_hack(void)
403{
404 extern char visws_board_type;
405 extern char visws_board_rev;
406
407 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
408 return 5;
409 return CO_APIC_IDE0;
410}
411
412static int is_co_apic(unsigned int irq)
413{
414 if (IS_CO_APIC(irq))
415 return CO_APIC(irq);
416
417 switch (irq) {
418 case 0: return CO_APIC_CPU;
419 case CO_IRQ_IDE0: return co_apic_ide0_hack();
420 case CO_IRQ_IDE1: return CO_APIC_IDE1;
421 default: return -1;
422 }
423}
424
425
426/*
427 * This is the SGI Cobalt (IO-)APIC:
428 */
429static void enable_cobalt_irq(struct irq_data *data)
430{
431 co_apic_set(is_co_apic(data->irq), data->irq);
432}
433
434static void disable_cobalt_irq(struct irq_data *data)
435{
436 int entry = is_co_apic(data->irq);
437
438 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
439 co_apic_read(CO_APIC_LO(entry));
440}
441
442static void ack_cobalt_irq(struct irq_data *data)
443{
444 unsigned long flags;
445
446 spin_lock_irqsave(&cobalt_lock, flags);
447 disable_cobalt_irq(data);
448 apic_write(APIC_EOI, APIC_EIO_ACK);
449 spin_unlock_irqrestore(&cobalt_lock, flags);
450}
451
452static struct irq_chip cobalt_irq_type = {
453 .name = "Cobalt-APIC",
454 .irq_enable = enable_cobalt_irq,
455 .irq_disable = disable_cobalt_irq,
456 .irq_ack = ack_cobalt_irq,
457};
458
459
460/*
461 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
462 * -- not the manner expected by the code in i8259.c.
463 *
464 * there is a 'master' physical interrupt source that gets sent to
465 * the CPU. But in the chipset there are various 'virtual' interrupts
466 * waiting to be handled. We represent this to Linux through a 'master'
467 * interrupt controller type, and through a special virtual interrupt-
468 * controller. Device drivers only see the virtual interrupt sources.
469 */
470static unsigned int startup_piix4_master_irq(struct irq_data *data)
471{
472 legacy_pic->init(0);
473 enable_cobalt_irq(data);
474}
475
476static void end_piix4_master_irq(struct irq_data *data)
477{
478 unsigned long flags;
479
480 spin_lock_irqsave(&cobalt_lock, flags);
481 enable_cobalt_irq(data);
482 spin_unlock_irqrestore(&cobalt_lock, flags);
483}
484
485static struct irq_chip piix4_master_irq_type = {
486 .name = "PIIX4-master",
487 .irq_startup = startup_piix4_master_irq,
488 .irq_ack = ack_cobalt_irq,
489};
490
491static void pii4_mask(struct irq_data *data) { }
492
493static struct irq_chip piix4_virtual_irq_type = {
494 .name = "PIIX4-virtual",
495 .mask = pii4_mask,
496};
497
498/*
499 * PIIX4-8259 master/virtual functions to handle interrupt requests
500 * from legacy devices: floppy, parallel, serial, rtc.
501 *
502 * None of these get Cobalt APIC entries, neither do they have IDT
503 * entries. These interrupts are purely virtual and distributed from
504 * the 'master' interrupt source: CO_IRQ_8259.
505 *
506 * When the 8259 interrupts its handler figures out which of these
507 * devices is interrupting and dispatches to its handler.
508 *
509 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
510 * enable_irq gets the right irq. This 'master' irq is never directly
511 * manipulated by any driver.
512 */
513static irqreturn_t piix4_master_intr(int irq, void *dev_id)
514{
515 unsigned long flags;
516 int realirq;
517
518 raw_spin_lock_irqsave(&i8259A_lock, flags);
519
520 /* Find out what's interrupting in the PIIX4 master 8259 */
521 outb(0x0c, 0x20); /* OCW3 Poll command */
522 realirq = inb(0x20);
523
524 /*
525 * Bit 7 == 0 means invalid/spurious
526 */
527 if (unlikely(!(realirq & 0x80)))
528 goto out_unlock;
529
530 realirq &= 7;
531
532 if (unlikely(realirq == 2)) {
533 outb(0x0c, 0xa0);
534 realirq = inb(0xa0);
535
536 if (unlikely(!(realirq & 0x80)))
537 goto out_unlock;
538
539 realirq = (realirq & 7) + 8;
540 }
541
542 /* mask and ack interrupt */
543 cached_irq_mask |= 1 << realirq;
544 if (unlikely(realirq > 7)) {
545 inb(0xa1);
546 outb(cached_slave_mask, 0xa1);
547 outb(0x60 + (realirq & 7), 0xa0);
548 outb(0x60 + 2, 0x20);
549 } else {
550 inb(0x21);
551 outb(cached_master_mask, 0x21);
552 outb(0x60 + realirq, 0x20);
553 }
554
555 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
556
557 /*
558 * handle this 'virtual interrupt' as a Cobalt one now.
559 */
560 generic_handle_irq(realirq);
561
562 return IRQ_HANDLED;
563
564out_unlock:
565 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
566 return IRQ_NONE;
567}
568
569static struct irqaction master_action = {
570 .handler = piix4_master_intr,
571 .name = "PIIX4-8259",
572};
573
574static struct irqaction cascade_action = {
575 .handler = no_action,
576 .name = "cascade",
577};
578
579static inline void set_piix4_virtual_irq_type(void)
580{
581 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
582 piix4_virtual_irq_type.disable = i8259A_chip.mask;
583 piix4_virtual_irq_type.unmask = i8259A_chip.unmask;
584}
585
586static void __init visws_pre_intr_init(void)
587{
588 int i;
589
590 set_piix4_virtual_irq_type();
591
592 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
593 struct irq_chip *chip = NULL;
594
595 if (i == 0)
596 chip = &cobalt_irq_type;
597 else if (i == CO_IRQ_IDE0)
598 chip = &cobalt_irq_type;
599 else if (i == CO_IRQ_IDE1)
600 >chip = &cobalt_irq_type;
601 else if (i == CO_IRQ_8259)
602 chip = &piix4_master_irq_type;
603 else if (i < CO_IRQ_APIC0)
604 chip = &piix4_virtual_irq_type;
605 else if (IS_CO_APIC(i))
606 chip = &cobalt_irq_type;
607
608 if (chip)
609 set_irq_chip(i, chip);
610 }
611
612 setup_irq(CO_IRQ_8259, &master_action);
613 setup_irq(2, &cascade_action);
614}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5ffb5622f793..61fb98519622 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -551,8 +551,14 @@ cannot_handle:
551int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) 551int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
552{ 552{
553 if (VMPI.is_vm86pus) { 553 if (VMPI.is_vm86pus) {
554 if ((trapno == 3) || (trapno == 1)) 554 if ((trapno == 3) || (trapno == 1)) {
555 return_to_32bit(regs, VM86_TRAP + (trapno << 8)); 555 KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
556 /* setting this flag forces the code in entry_32.S to
557 call save_v86_state() and change the stack pointer
558 to KVM86->regs32 */
559 set_thread_flag(TIF_IRET);
560 return 0;
561 }
556 do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); 562 do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
557 return 0; 563 return 0;
558 } 564 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 38e2b67807e1..e03530aebfd0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -301,7 +301,7 @@ SECTIONS
301 } 301 }
302 302
303#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 303#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
304 PERCPU(PAGE_SIZE) 304 PERCPU(THREAD_SIZE)
305#endif 305#endif
306 306
307 . = ALIGN(PAGE_SIZE); 307 . = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index cd6da6bf3eca..ceb2911aa439 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -6,10 +6,12 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h> 7#include <linux/ioport.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pci.h>
9 10
10#include <asm/bios_ebda.h> 11#include <asm/bios_ebda.h>
11#include <asm/paravirt.h> 12#include <asm/paravirt.h>
12#include <asm/pci_x86.h> 13#include <asm/pci_x86.h>
14#include <asm/pci.h>
13#include <asm/mpspec.h> 15#include <asm/mpspec.h>
14#include <asm/setup.h> 16#include <asm/setup.h>
15#include <asm/apic.h> 17#include <asm/apic.h>
@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
99}; 101};
100 102
101EXPORT_SYMBOL_GPL(x86_platform); 103EXPORT_SYMBOL_GPL(x86_platform);
104struct x86_msi_ops x86_msi = {
105 .setup_msi_irqs = native_setup_msi_irqs,
106 .teardown_msi_irq = native_teardown_msi_irq,
107 .teardown_msi_irqs = default_teardown_msi_irqs,
108};