67 files changed, 603 insertions, 6011 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2c833d8c4141..9e13763b6092 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,6 @@ obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y                   += time.o ioport.o ldt.o dumpstack.o
 obj-y                   += setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
-obj-$(CONFIG_X86_VISWS) += visws_quirks.o
 obj-$(CONFIG_X86_32)    += probe_roms_32.o
 obj-$(CONFIG_X86_32)    += sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)    += sys_x86_64.o x8664_ksyms_64.o
@@ -58,7 +57,6 @@ obj-$(CONFIG_INTEL_TXT)		+= tboot.o
 obj-$(CONFIG_STACKTRACE)        += stacktrace.o
 obj-y                           += cpu/
 obj-y                           += acpi/
-obj-$(CONFIG_SFI)               += sfi.o
 obj-y                           += reboot.o
 obj-$(CONFIG_MCA)               += mca_32.o
 obj-$(CONFIG_X86_MSR)           += msr.o
@@ -82,7 +80,6 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)        += crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)           += kprobes.o
 obj-$(CONFIG_MODULES)           += module.o
-obj-$(CONFIG_EFI)               += efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT)       += doublefault_32.o
 obj-$(CONFIG_KGDB)              += kgdb.o
 obj-$(CONFIG_VM86)              += vm86_32.o
@@ -104,14 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_PCSPKR_PLATFORM)   += pcspeaker.o
-obj-$(CONFIG_SCx200)            += scx200.o
-scx200-y                        += scx200_32.o
-obj-$(CONFIG_OLPC)              += olpc.o
-obj-$(CONFIG_OLPC_XO1)          += olpc-xo1.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
-obj-$(CONFIG_X86_MRST)          += mrst.o
 microcode-y                             := microcode_core.o
 microcode-$(CONFIG_MICROCODE_INTEL)     += microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)       += microcode_amd.o
@@ -124,7 +113,6 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-$(CONFIG_X86_UV)            += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
        obj-$(CONFIG_AUDIT)             += audit_64.o
        obj-$(CONFIG_GART_IOMMU)        += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c05872aa3ce0..71232b941b6c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
        return 0;
 }
-/*
+static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
- * success: return IRQ number (>=0)
+                                 int trigger, int polarity)
- * failure: return < 0
- */
-int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
-        unsigned int irq;
-        unsigned int plat_gsi = gsi;
 #ifdef CONFIG_PCI
        /*
         * Make sure all (legacy) PCI IRQs are set as level-triggered.
         */
-        if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+        if (trigger == ACPI_LEVEL_SENSITIVE)
-                if (trigger == ACPI_LEVEL_SENSITIVE)
+                eisa_set_level_irq(gsi);
-                        eisa_set_level_irq(gsi);
-        }
 #endif
+        return gsi;
+}
+static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+                                    int trigger, int polarity)
+{
 #ifdef CONFIG_X86_IO_APIC
-        if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+        gsi = mp_register_gsi(dev, gsi, trigger, polarity);
-                plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
-        }
 #endif
+        return gsi;
+}
+int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
+                           int trigger, int polarity) = acpi_register_gsi_pic;
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+        unsigned int irq;
+        unsigned int plat_gsi = gsi;
+        plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
        irq = gsi_to_irq(plat_gsi);
        return irq;
 }
+void __init acpi_set_irq_model_pic(void)
+{
+        acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+        __acpi_register_gsi = acpi_register_gsi_pic;
+        acpi_ioapic = 0;
+}
+void __init acpi_set_irq_model_ioapic(void)
+{
+        acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+        __acpi_register_gsi = acpi_register_gsi_ioapic;
+        acpi_ioapic = 1;
+}
 /*
 *  ACPI based hotplug support for CPU
 */
@@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void)
                         */
                        error = acpi_parse_madt_ioapic_entries();
                        if (!error) {
-                                acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+                                acpi_set_irq_model_ioapic();
-                                acpi_ioapic = 1;
                                smp_found_config = 1;
                        }
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e1252074ea40..69fd72aa5594 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -13,6 +13,10 @@
 #include <asm/segment.h>
 #include <asm/desc.h>
+#ifdef CONFIG_X86_32
+#include <asm/pgtable.h>
+#endif
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -91,7 +95,7 @@ int acpi_save_state_mem(void)
 #ifndef CONFIG_64BIT
        header->pmode_entry = (u32)&wakeup_pmode_return;
-        header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
+        header->pmode_cr3 = (u32)__pa(&initial_page_table);
        saved_magic = 0x12345678;
 #else /* CONFIG_64BIT */
        header->trampoline_segment = setup_trampoline() >> 4;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a36bb90aef53..5079f24c955a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -638,71 +638,32 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
        /* Use __stop_machine() because the caller already got online_cpus. */
-        __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+        __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
        return addr;
 }
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+#ifdef CONFIG_X86_64
+unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
+#else
+unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+#endif
 void __init arch_init_ideal_nop5(void)
 {
-        extern const unsigned char ftrace_test_p6nop[];
-        extern const unsigned char ftrace_test_nop5[];
-        extern const unsigned char ftrace_test_jmp[];
-        int faulted = 0;
        /*
-         * There is no good nop for all x86 archs.
+         * There is no good nop for all x86 archs.  This selection
-         * We will default to using the P6_NOP5, but first we
+         * algorithm should be unified with the one in find_nop_table(),
-         * will test to make sure that the nop will actually
+         * but this should be good enough for now.
-         * work on this CPU. If it faults, we will then
-         * go to a lesser efficient 5 byte nop. If that fails
-         * we then just use a jmp as our nop. This isn't the most
-         * efficient nop, but we can not use a multi part nop
-         * since we would then risk being preempted in the middle
-         * of that nop, and if we enabled tracing then, it might
-         * cause a system crash.
         *
-         * TODO: check the cpuid to determine the best nop.
+         * For cases other than the ones below, use the safe (as in
+         * always functional) defaults above.
         */
-        asm volatile (
+#ifdef CONFIG_X86_64
-                "ftrace_test_jmp:"
+        /* Don't use these on 32 bits due to broken virtualizers */
-                "jmp ftrace_test_p6nop\n"
+        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-                "nop\n"
+                memcpy(ideal_nop5, p6_nops[5], 5);
-                "nop\n"
+#endif
-                "nop\n"  /* 2 byte jmp + 3 bytes */
-                "ftrace_test_p6nop:"
-                P6_NOP5
-                "jmp 1f\n"
-                "ftrace_test_nop5:"
-                ".byte 0x66,0x66,0x66,0x66,0x90\n"
-                "1:"
-                ".section .fixup, \"ax\"\n"
-                "2:     movl $1, %0\n"
-                "       jmp ftrace_test_nop5\n"
-                "3:     movl $2, %0\n"
-                "       jmp 1b\n"
-                ".previous\n"
-                _ASM_EXTABLE(ftrace_test_p6nop, 2b)
-                _ASM_EXTABLE(ftrace_test_nop5, 3b)
-                : "=r"(faulted) : "0" (faulted));
-        switch (faulted) {
-        case 0:
-                pr_info("converting mcount calls to 0f 1f 44 00 00\n");
-                memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
-                break;
-        case 1:
-                pr_info("converting mcount calls to 66 66 66 66 90\n");
-                memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
-                break;
-        case 2:
-                pr_info("converting mcount calls to jmp . + 5\n");
-                memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
-                break;
-        }
 }
 #endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..affacb5e0065 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,116 @@
 static u32 *flush_words;
-struct pci_device_id k8_nb_ids[] = {
+struct pci_device_id amd_nb_misc_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
        {}
 };
-EXPORT_SYMBOL(k8_nb_ids);
+EXPORT_SYMBOL(amd_nb_misc_ids);
-struct k8_northbridge_info k8_northbridges;
+struct amd_northbridge_info amd_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
+EXPORT_SYMBOL(amd_northbridges);
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+static struct pci_dev *next_northbridge(struct pci_dev *dev,
+                                        struct pci_device_id *ids)
 {
        do {
                dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
                if (!dev)
                        break;
-        } while (!pci_match_id(&k8_nb_ids[0], dev));
+        } while (!pci_match_id(ids, dev));
        return dev;
 }
-int cache_k8_northbridges(void)
+int amd_cache_northbridges(void)
 {
-        int i;
+        int i = 0;
-        struct pci_dev *dev;
+        struct amd_northbridge *nb;
+        struct pci_dev *misc;
-        if (k8_northbridges.num)
+        if (amd_nb_num())
                return 0;
-        dev = NULL;
+        misc = NULL;
-        while ((dev = next_k8_northbridge(dev)) != NULL)
+        while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
-                k8_northbridges.num++;
+                i++;
-        /* some CPU families (e.g. family 0x11) do not support GART */
+        if (i == 0)
-        if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+                return 0;
-            boot_cpu_data.x86 == 0x15)
-                k8_northbridges.gart_supported = 1;
-        k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
+        nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
-                                          sizeof(void *), GFP_KERNEL);
+        if (!nb)
-        if (!k8_northbridges.nb_misc)
                return -ENOMEM;
-        if (!k8_northbridges.num) {
+        amd_northbridges.nb = nb;
-                k8_northbridges.nb_misc[0] = NULL;
+        amd_northbridges.num = i;
-                return 0;
-        }
-        if (k8_northbridges.gart_supported) {
+        misc = NULL;
-                flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
+        for (i = 0; i != amd_nb_num(); i++) {
-                                      GFP_KERNEL);
+                node_to_amd_nb(i)->misc = misc =
-                if (!flush_words) {
+                        next_northbridge(misc, amd_nb_misc_ids);
-                        kfree(k8_northbridges.nb_misc);
+        }
-                        return -ENOMEM;
-                }
+        /* some CPU families (e.g. family 0x11) do not support GART */
-        }
+        if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+            boot_cpu_data.x86 == 0x15)
+                amd_northbridges.flags |= AMD_NB_GART;
+        /*
+         * Some CPU families support L3 Cache Index Disable. There are some
+         * limitations because of E382 and E388 on family 0x10.
+         */
+        if (boot_cpu_data.x86 == 0x10 &&
+            boot_cpu_data.x86_model >= 0x8 &&
+            (boot_cpu_data.x86_model > 0x9 ||
+             boot_cpu_data.x86_mask >= 0x1))
+                amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
-        dev = NULL;
-        i = 0;
-        while ((dev = next_k8_northbridge(dev)) != NULL) {
-                k8_northbridges.nb_misc[i] = dev;
-                if (k8_northbridges.gart_supported)
-                        pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
-        }
-        k8_northbridges.nb_misc[i] = NULL;
        return 0;
 }
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+EXPORT_SYMBOL_GPL(amd_cache_northbridges);
 /* Ignores subdevice/subvendor but as far as I can figure out
   they're useless anyways */
-int __init early_is_k8_nb(u32 device)
+int __init early_is_amd_nb(u32 device)
 {
        struct pci_device_id *id;
        u32 vendor = device & 0xffff;
        device >>= 16;
-        for (id = k8_nb_ids; id->vendor; id++)
+        for (id = amd_nb_misc_ids; id->vendor; id++)
                if (vendor == id->vendor && device == id->device)
                        return 1;
        return 0;
 }
-void k8_flush_garts(void)
+int amd_cache_gart(void)
+{
+       int i;
+       if (!amd_nb_has_feature(AMD_NB_GART))
+               return 0;
+       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+       if (!flush_words) {
+               amd_northbridges.flags &= ~AMD_NB_GART;
+               return -ENOMEM;
+       }
+       for (i = 0; i != amd_nb_num(); i++)
+               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                     &flush_words[i]);
+       return 0;
+}
+void amd_flush_garts(void)
 {
        int flushed, i;
        unsigned long flags;
        static DEFINE_SPINLOCK(gart_lock);
-        if (!k8_northbridges.gart_supported)
+        if (!amd_nb_has_feature(AMD_NB_GART))
                return;
        /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
           that it doesn't matter to serialize more. -AK */
        spin_lock_irqsave(&gart_lock, flags);
        flushed = 0;
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
-                pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
+                pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
-                                       flush_words[i]|1);
+                                       flush_words[i] | 1);
                flushed++;
        }
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
                u32 w;
                /* Make sure the hardware actually executed the flush*/
                for (;;) {
-                        pci_read_config_dword(k8_northbridges.nb_misc[i],
+                        pci_read_config_dword(node_to_amd_nb(i)->misc,
                                              0x9c, &w);
                        if (!(w & 1))
                                break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
        if (!flushed)
                printk("nothing to flush?\n");
 }
-EXPORT_SYMBOL_GPL(k8_flush_garts);
+EXPORT_SYMBOL_GPL(amd_flush_garts);
-static __init int init_k8_nbs(void)
+static __init int init_amd_nbs(void)
 {
        int err = 0;
-        err = cache_k8_northbridges();
+        err = amd_cache_northbridges();
        if (err < 0)
-                printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
+                printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+        if (amd_cache_gart() < 0)
+                printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
+                       "GART support disabled.\n");
        return err;
 }
 /* This has to go after the PCI subsystem */
-fs_initcall(init_k8_nbs);
+fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..dcd7c83e1659 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
 * Do an PCI bus scan by hand because we're running before the PCI
 * subsystem.
 *
- * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
 * generically. It's probably overkill to always scan all slots because
 * the AGP bridges should be always an own bus on the HT hierarchy,
 * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                        if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                        if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                        if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                        if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                        if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                        if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
                        iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
                dev_base = bus_dev_ranges[i].dev_base;
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                        if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                        if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
                        write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 463839645f9b..c48a64510844 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,7 +52,6 @@
 #include <asm/mce.h>
 #include <asm/kvm_para.h>
 #include <asm/tsc.h>
-#include <asm/atomic.h>
 unsigned int num_processors;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4f026a632c95..4abf08aab3d4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3113,7 +3113,7 @@ void destroy_irq(unsigned int irq)
        irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
-        if (intr_remapping_enabled)
+        if (irq_remapped(cfg))
                free_irte(irq);
        raw_spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq, cfg);
@@ -3335,7 +3335,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
        return 0;
 }
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
        int node, ret, sub_handle, index = 0;
        unsigned int irq, irq_want;
@@ -3393,7 +3393,7 @@ error:
        return ret;
 }
-void arch_teardown_msi_irq(unsigned int irq)
+void native_teardown_msi_irq(unsigned int irq)
 {
        destroy_irq(irq);
 }
@@ -3654,6 +3654,11 @@ static void __init probe_nr_irqs_gsi(void)
        printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
 }
+int get_nr_irqs_gsi(void)
+{
+        return nr_irqs_gsi;
+}
 #ifdef CONFIG_SPARSE_IRQ
 int __init arch_probe_nr_irqs(void)
 {
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54cb248..194539aea175 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
 *
 * SGI UV APIC functions (note: not an Intel compatible APIC)
 *
- * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
 */
 #include <linux/cpumask.h>
 #include <linux/hardirq.h>
@@ -41,6 +41,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
 static enum uv_system_type uv_system_type;
 static u64 gru_start_paddr, gru_end_paddr;
+static union uvh_apicid uvh_apicid;
 int uv_min_hub_revision_id;
 EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
 static DEFINE_SPINLOCK(uv_nmi_lock);
@@ -70,12 +71,27 @@ static int early_get_nodeid(void)
        return node_id.s.node_id;
 }
+static void __init early_get_apic_pnode_shift(void)
+{
+        unsigned long *mmr;
+        mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
+        uvh_apicid.v = *mmr;
+        early_iounmap(mmr, sizeof(*mmr));
+        if (!uvh_apicid.v)
+                /*
+                 * Old bios, use default value
+                 */
+                uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+}
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
        int nodeid;
        if (!strcmp(oem_id, "SGI")) {
                nodeid = early_get_nodeid();
+                early_get_apic_pnode_shift();
                x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
                x86_platform.nmi_init = uv_nmi_init;
                if (!strcmp(oem_table_id, "UVL"))
@@ -84,7 +100,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
                        uv_system_type = UV_X2APIC;
                else if (!strcmp(oem_table_id, "UVH")) {
                        __get_cpu_var(x2apic_extra_bits) =
-                                nodeid << (UV_APIC_PNODE_SHIFT - 1);
+                                nodeid << (uvh_apicid.s.pnode_shift - 1);
                        uv_system_type = UV_NON_UNIQUE_APIC;
                        return 1;
                }
@@ -363,14 +379,14 @@ struct redir_addr {
 #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
 static __initdata struct redir_addr redir_addrs[] = {
-        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
-        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
-        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+        {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
 };
 static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 {
-        union uvh_si_alias0_overlay_config_u alias;
+        union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
        union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
        int i;
@@ -644,7 +660,7 @@ void uv_nmi_init(void)
 void __init uv_system_init(void)
 {
-        union uvh_si_addr_map_config_u m_n_config;
+        union uvh_rh_gam_config_mmr_u  m_n_config;
        union uvh_node_id_u node_id;
        unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
        int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
@@ -654,7 +670,7 @@ void __init uv_system_init(void)
        map_low_mmrs();
-        m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
        n_val = m_n_config.s.n_skt;
        mmr_base =
@@ -716,6 +732,10 @@ void __init uv_system_init(void)
                int apicid = per_cpu(x86_cpu_to_apicid, cpu);
                nid = cpu_to_node(cpu);
+                /*
+                 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
+                 */
+                uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
                pnode = uv_apicid_to_pnode(apicid);
                blade = boot_pnode_to_blade(pnode);
                lcpu = uv_blade_info[blade].nr_possible_cpus;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 4c9c67bf09b7..0e4f24c2a746 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -189,8 +189,8 @@
 *   Intel Order Number 241704-001.  Microsoft Part Number 781-110-X01.
 *
 * [This document is available free from Intel by calling 800.628.8686 (fax
- * 916.356.6100) or 800.548.4725; or via anonymous ftp from
+ * 916.356.6100) or 800.548.4725; or from
- * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc.  It is also
+ * http://www.microsoft.com/whdc/archive/amp_12.mspx  It is also
 * available from Microsoft by calling 206.882.8080.]
 *
 * APM 1.2 Reference:
@@ -1926,6 +1926,7 @@ static const struct file_operations apm_bios_fops = {
        .unlocked_ioctl = do_ioctl,
        .open           = do_open,
        .release        = do_release,
+        .llseek         = noop_llseek,
 };
 static struct miscdevice apm_device = {
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dfdbf6403895..1a4088dda37a 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -99,9 +99,7 @@ void foo(void)
        DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
        DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
-        DEFINE(PTRS_PER_PTE, PTRS_PER_PTE);
+        DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-        DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
-        DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
        OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
deleted file mode 100644
index 8bc57baaa9ad..000000000000
--- a/arch/x86/kernel/bios_uv.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * BIOS run time interface routines.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson <rja@sgi.com>
- */
-#include <linux/efi.h>
-#include <asm/efi.h>
-#include <linux/io.h>
-#include <asm/uv/bios.h>
-#include <asm/uv/uv_hub.h>
-static struct uv_systab uv_systab;
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
-{
-        struct uv_systab *tab = &uv_systab;
-        s64 ret;
-        if (!tab->function)
-                /*
-                 * BIOS does not support UV systab
-                 */
-                return BIOS_STATUS_UNIMPLEMENTED;
-        ret = efi_call6((void *)__va(tab->function), (u64)which,
-                        a1, a2, a3, a4, a5);
-        return ret;
-}
-EXPORT_SYMBOL_GPL(uv_bios_call);
-s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
-                                        u64 a4, u64 a5)
-{
-        unsigned long bios_flags;
-        s64 ret;
-        local_irq_save(bios_flags);
-        ret = uv_bios_call(which, a1, a2, a3, a4, a5);
-        local_irq_restore(bios_flags);
-        return ret;
-}
-s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
-                                        u64 a4, u64 a5)
-{
-        s64 ret;
-        preempt_disable();
-        ret = uv_bios_call(which, a1, a2, a3, a4, a5);
-        preempt_enable();
-        return ret;
-}
-long sn_partition_id;
-EXPORT_SYMBOL_GPL(sn_partition_id);
-long sn_coherency_id;
-EXPORT_SYMBOL_GPL(sn_coherency_id);
-long sn_region_size;
-EXPORT_SYMBOL_GPL(sn_region_size);
-long system_serial_number;
-EXPORT_SYMBOL_GPL(system_serial_number);
-int uv_type;
-EXPORT_SYMBOL_GPL(uv_type);
-s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
-                long *region, long *ssn)
-{
-        s64 ret;
-        u64 v0, v1;
-        union partition_info_u part;
-        ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
-                                (u64)(&v0), (u64)(&v1), 0, 0);
-        if (ret != BIOS_STATUS_SUCCESS)
-                return ret;
-        part.val = v0;
-        if (uvtype)
-                *uvtype = part.hub_version;
-        if (partid)
-                *partid = part.partition_id;
-        if (coher)
-                *coher = part.coherence_id;
-        if (region)
-                *region = part.region_size;
-        if (ssn)
-                *ssn = v1;
-        return ret;
-}
-EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
-int
-uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
-                           unsigned long *intr_mmr_offset)
-{
-        u64 watchlist;
-        s64 ret;
-        /*
-         * bios returns watchlist number or negative error number.
-         */
-        ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
-                        mq_size, (u64)intr_mmr_offset,
-                        (u64)&watchlist, 0);
-        if (ret < BIOS_STATUS_SUCCESS)
-                return ret;
-        return watchlist;
-}
-EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
-int
-uv_bios_mq_watchlist_free(int blade, int watchlist_num)
-{
-        return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
-                                blade, watchlist_num, 0, 0, 0);
-}
-EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
-s64
-uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
-{
-        return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
-                                        perms, 0, 0);
-}
-EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
-s64
-uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
-{
-        s64 ret;
-        ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
-                                        (u64)addr, buf, (u64)len, 0);
-        return ret;
-}
-EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
-s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
-{
-        return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
-                           (u64)ticks_per_second, 0, 0, 0);
-}
-EXPORT_SYMBOL_GPL(uv_bios_freq_base);
-/*
- * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
- * @decode: true to enable target, false to disable target
- * @domain: PCI domain number
- * @bus: PCI bus number
- *
- * Returns:
- *    0: Success
- *    -EINVAL: Invalid domain or bus number
- *    -ENOSYS: Capability not available
- *    -EBUSY: Legacy VGA I/O cannot be retargeted at this time
- */
-int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
-{
-        return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
-                                (u64)decode, (u64)domain, (u64)bus, 0, 0);
-}
-EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
-#ifdef CONFIG_EFI
-void uv_bios_init(void)
-{
-        struct uv_systab *tab;
-        if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
-            (efi.uv_systab == (unsigned long)NULL)) {
-                printk(KERN_CRIT "No EFI UV System Table.\n");
-                uv_systab.function = (unsigned long)NULL;
-                return;
-        }
-        tab = (struct uv_systab *)ioremap(efi.uv_systab,
-                                        sizeof(struct uv_systab));
-        if (strncmp(tab->signature, "UVST", 4) != 0)
-                printk(KERN_ERR "bad signature in UV system table!");
-        /*
-         * Copy table to permanent spot for later use.
-         */
-        memcpy(&uv_systab, tab, sizeof(struct uv_systab));
-        iounmap(tab);
-        printk(KERN_INFO "EFI UV System Table Revision %d\n",
-                                        uv_systab.revision);
-}
-#else   /* !CONFIG_EFI */
-void uv_bios_init(void) { }
-#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index cd8da247dda1..a2baafb2fe6d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -701,6 +701,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
                per_cpu(acfreq_data, policy->cpu) = NULL;
                acpi_processor_unregister_performance(data->acpi_data,
                                                      policy->cpu);
+                kfree(data->freq_table);
                kfree(data);
        }
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 733093d60436..141abebc4516 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = {
 * Detects nForce2 A2 and C1 stepping
 *
 */
-static unsigned int nforce2_detect_chipset(void)
+static int nforce2_detect_chipset(void)
 {
        nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
                                        PCI_DEVICE_ID_NVIDIA_NFORCE2,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index fc09f142d94d..d9f51367666b 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq;
 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
 * and MSR_TMTA_LONGRUN_CTRL
 */
-static void __init longrun_get_policy(struct cpufreq_policy *policy)
+static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
 {
        u32 msr_lo, msr_hi;
@@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu)
 * TMTA rules:
 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
 */
-static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
+static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
                                                      unsigned int *high_freq)
 {
        u32 msr_lo, msr_hi;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 695f17731e23..d16c2c53d6bf 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -284,9 +284,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
        /* Don't do the funky fallback heuristics the AMD version employs
           for now. */
        node = apicid_to_node[apicid];
-        if (node == NUMA_NO_NODE)
+        if (node == NUMA_NO_NODE || !node_online(node)) {
-                node = first_node(node_online_map);
-        else if (!node_online(node)) {
                /* reuse the value from init_cpu_to_node() */
                node = cpu_to_node(cpu);
        }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 12cd823c8d03..9ecf81f9b90f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
 };
 struct amd_l3_cache {
-        struct   pci_dev *dev;
+        struct   amd_northbridge *nb;
-        bool     can_disable;
        unsigned indices;
        u8       subcaches[4];
 };
@@ -311,14 +310,12 @@ struct _cache_attr {
 /*
 * L3 cache descriptors
 */
-static struct amd_l3_cache **__cpuinitdata l3_caches;
 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 {
        unsigned int sc0, sc1, sc2, sc3;
        u32 val = 0;
-        pci_read_config_dword(l3->dev, 0x1C4, &val);
+        pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
        /* calculate subcache sizes */
        l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -327,49 +324,17 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
        l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
-static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
-{
+                                        int index)
-        struct amd_l3_cache *l3;
-        struct pci_dev *dev = node_to_k8_nb_misc(node);
-        l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
-        if (!l3) {
-                printk(KERN_WARNING "Error allocating L3 struct\n");
-                return NULL;
-        }
-        l3->dev = dev;
-        amd_calc_l3_indices(l3);
-        return l3;
-}
-static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
-                                           int index)
 {
+        static struct amd_l3_cache *__cpuinitdata l3_caches;
        int node;
-        if (boot_cpu_data.x86 != 0x10)
+        /* only for L3, and not in virtualized environments */
-                return;
+        if (index < 3 || amd_nb_num() == 0)
-        if (index < 3)
-                return;
-        /* see errata #382 and #388 */
-        if (boot_cpu_data.x86_model < 0x8)
-                return;
-        if ((boot_cpu_data.x86_model == 0x8 ||
-             boot_cpu_data.x86_model == 0x9)
-                &&
-             boot_cpu_data.x86_mask < 0x1)
-                        return;
-        /* not in virtualized environments */
-        if (k8_northbridges.num == 0)
                return;
        /*
@@ -377,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
         * never freed but this is done only on shutdown so it doesn't matter.
         */
        if (!l3_caches) {
-                int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
+                int size = amd_nb_num() * sizeof(struct amd_l3_cache);
                l3_caches = kzalloc(size, GFP_ATOMIC);
                if (!l3_caches)
@@ -386,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
        node = amd_get_nb_id(smp_processor_id());
-        if (!l3_caches[node]) {
+        if (!l3_caches[node].nb) {
-                l3_caches[node] = amd_init_l3_cache(node);
+                l3_caches[node].nb = node_to_amd_nb(node);
-                l3_caches[node]->can_disable = true;
+                amd_calc_l3_indices(&l3_caches[node]);
        }
-        WARN_ON(!l3_caches[node]);
+        this_leaf->l3 = &l3_caches[node];
-        this_leaf->l3 = l3_caches[node];
 }
 /*
@@ -407,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 {
        unsigned int reg = 0;
-        pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
+        pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
        /* check whether this slot is activated already */
        if (reg & (3UL << 30))
@@ -421,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 {
        int index;
-        if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+        if (!this_leaf->l3 ||
+            !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
        index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -456,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                if (!l3->subcaches[i])
                        continue;
-                pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+                pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
                /*
                 * We need to WBINVD on a core on the node containing the L3
@@ -466,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                wbinvd_on_cpu(cpu);
                reg |= BIT(31);
-                pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+                pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
        }
 }
@@ -523,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+        if (!this_leaf->l3 ||
+            !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
        cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -544,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 #define STORE_CACHE_DISABLE(slot)                                       \
 static ssize_t                                                          \
 store_cache_disable_##slot(struct _cpuid4_info *this_leaf,              \
-                            const char *buf, size_t count)              \
+                           const char *buf, size_t count)               \
 {                                                                       \
        return store_cache_disable(this_leaf, buf, count, slot);        \
 }
@@ -557,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
                show_cache_disable_1, store_cache_disable_1);
 #else   /* CONFIG_AMD_NB */
-static void __cpuinit
+#define amd_init_l3_cache(x, y)
-amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
-{
-};
 #endif /* CONFIG_AMD_NB */
 static int
@@ -574,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
                amd_cpuid4(index, &eax, &ebx, &ecx);
-                amd_check_l3_disable(this_leaf, index);
+                amd_init_l3_cache(this_leaf, index);
        } else {
                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
        }
@@ -982,30 +944,48 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
-#define DEFAULT_SYSFS_CACHE_ATTRS       \
-        &type.attr,                     \
-        &level.attr,                    \
-        &coherency_line_size.attr,      \
-        &physical_line_partition.attr,  \
-        &ways_of_associativity.attr,    \
-        &number_of_sets.attr,           \
-        &size.attr,                     \
-        &shared_cpu_map.attr,           \
-        &shared_cpu_list.attr
 static struct attribute *default_attrs[] = {
-        DEFAULT_SYSFS_CACHE_ATTRS,
+        &type.attr,
+        &level.attr,
+        &coherency_line_size.attr,
+        &physical_line_partition.attr,
+        &ways_of_associativity.attr,
+        &number_of_sets.attr,
+        &size.attr,
+        &shared_cpu_map.attr,
+        &shared_cpu_list.attr,
        NULL
 };
-static struct attribute *default_l3_attrs[] = {
-        DEFAULT_SYSFS_CACHE_ATTRS,
 #ifdef CONFIG_AMD_NB
-        &cache_disable_0.attr,
+static struct attribute ** __cpuinit amd_l3_attrs(void)
-        &cache_disable_1.attr,
+{
+        static struct attribute **attrs;
+        int n;
+        if (attrs)
+                return attrs;
+        n = sizeof (default_attrs) / sizeof (struct attribute *);
+        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+                n += 2;
+        attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
+        if (attrs == NULL)
+                return attrs = default_attrs;
+        for (n = 0; default_attrs[n]; n++)
+                attrs[n] = default_attrs[n];
+        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+                attrs[n++] = &cache_disable_0.attr;
+                attrs[n++] = &cache_disable_1.attr;
+        }
+        return attrs;
+}
 #endif
-        NULL
-};
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
@@ -1116,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
                this_leaf = CPUID4_INFO_IDX(cpu, i);
-                if (this_leaf->l3 && this_leaf->l3->can_disable)
+                ktype_cache.default_attrs = default_attrs;
-                        ktype_cache.default_attrs = default_l3_attrs;
+#ifdef CONFIG_AMD_NB
-                else
+                if (this_leaf->l3)
-                        ktype_cache.default_attrs = default_attrs;
+                        ktype_cache.default_attrs = amd_l3_attrs();
+#endif
                retval = kobject_init_and_add(&(this_object->kobj),
                                              &ktype_cache,
                                              per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8a85dd1b1aa1..1e8d66c1336a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -192,6 +192,7 @@ static const struct file_operations severities_coverage_fops = {
        .release        = seq_release,
        .read           = seq_read,
        .write          = severities_coverage_write,
+        .llseek         = seq_lseek,
 };
 static int __init severities_debugfs_init(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ed41562909fe..7a35b72d7c03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1665,6 +1665,7 @@ struct file_operations mce_chrdev_ops = {
        .read                   = mce_read,
        .poll                   = mce_poll,
        .unlocked_ioctl         = mce_ioctl,
+        .llseek         = no_llseek,
 };
 EXPORT_SYMBOL_GPL(mce_chrdev_ops);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fe73c1844a9a..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -49,7 +49,6 @@ static unsigned long
 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 {
        unsigned long offset, addr = (unsigned long)from;
-        int type = in_nmi() ? KM_NMI : KM_IRQ0;
        unsigned long size, len = 0;
        struct page *page;
        void *map;
@@ -63,9 +62,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
                offset = addr & (PAGE_SIZE - 1);
                size = min(PAGE_SIZE - offset, n - len);
-                map = kmap_atomic(page, type);
+                map = kmap_atomic(page);
                memcpy(to, map+offset, size);
-                kunmap_atomic(map, type);
+                kunmap_atomic(map);
                put_page(page);
                len  += size;
@@ -238,6 +237,7 @@ struct x86_pmu {
         * Intel DebugStore bits
         */
        int             bts, pebs;
+        int             bts_active, pebs_active;
        int             pebs_record_size;
        void            (*drain_pebs)(struct pt_regs *regs);
        struct event_constraint *pebs_constraints;
@@ -381,7 +381,7 @@ static void release_pmc_hardware(void) {}
 #endif
-static int reserve_ds_buffers(void);
+static void reserve_ds_buffers(void);
 static void release_ds_buffers(void);
 static void hw_perf_event_destroy(struct perf_event *event)
@@ -478,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
            (hwc->sample_period == 1)) {
                /* BTS is not supported by this architecture. */
-                if (!x86_pmu.bts)
+                if (!x86_pmu.bts_active)
                        return -EOPNOTSUPP;
                /* BTS is currently only allowed for user-mode. */
@@ -497,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
                int precise = 0;
                /* Support for constant skid */
-                if (x86_pmu.pebs)
+                if (x86_pmu.pebs_active) {
                        precise++;
-                /* Support for IP fixup */
+                        /* Support for IP fixup */
-                if (x86_pmu.lbr_nr)
+                        if (x86_pmu.lbr_nr)
-                        precise++;
+                                precise++;
+                }
                if (event->attr.precise_ip > precise)
                        return -EOPNOTSUPP;
@@ -544,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
                if (atomic_read(&active_events) == 0) {
                        if (!reserve_pmc_hardware())
                                err = -EBUSY;
-                        else {
+                        else
-                                err = reserve_ds_buffers();
+                                reserve_ds_buffers();
-                                if (err)
-                                        release_pmc_hardware();
-                        }
                }
                if (!err)
                        atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 46d58448c3af..e421b8cd6944 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
        struct amd_nb *nb;
        int i;
-        nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+        nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
+                          cpu_to_node(cpu));
        if (!nb)
                return NULL;
-        memset(nb, 0, sizeof(*nb));
        nb->nb_id = nb_id;
        /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 }
+static int alloc_pebs_buffer(int cpu)
+{
+        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+        int node = cpu_to_node(cpu);
+        int max, thresh = 1; /* always use a single PEBS record */
+        void *buffer;
+        if (!x86_pmu.pebs)
+                return 0;
+        buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+        if (unlikely(!buffer))
+                return -ENOMEM;
+        max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+        ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+        ds->pebs_index = ds->pebs_buffer_base;
+        ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+                max * x86_pmu.pebs_record_size;
+        ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+                thresh * x86_pmu.pebs_record_size;
+        return 0;
+}
+static void release_pebs_buffer(int cpu)
+{
+        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+        if (!ds || !x86_pmu.pebs)
+                return;
+        kfree((void *)(unsigned long)ds->pebs_buffer_base);
+        ds->pebs_buffer_base = 0;
+}
+static int alloc_bts_buffer(int cpu)
+{
+        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+        int node = cpu_to_node(cpu);
+        int max, thresh;
+        void *buffer;
+        if (!x86_pmu.bts)
+                return 0;
+        buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+        if (unlikely(!buffer))
+                return -ENOMEM;
+        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+        thresh = max / 16;
+        ds->bts_buffer_base = (u64)(unsigned long)buffer;
+        ds->bts_index = ds->bts_buffer_base;
+        ds->bts_absolute_maximum = ds->bts_buffer_base +
+                max * BTS_RECORD_SIZE;
+        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+                thresh * BTS_RECORD_SIZE;
+        return 0;
+}
+static void release_bts_buffer(int cpu)
+{
+        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+        if (!ds || !x86_pmu.bts)
+                return;
+        kfree((void *)(unsigned long)ds->bts_buffer_base);
+        ds->bts_buffer_base = 0;
+}
+static int alloc_ds_buffer(int cpu)
+{
+        int node = cpu_to_node(cpu);
+        struct debug_store *ds;
+        ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+        if (unlikely(!ds))
+                return -ENOMEM;
+        per_cpu(cpu_hw_events, cpu).ds = ds;
+        return 0;
+}
+static void release_ds_buffer(int cpu)
+{
+        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+        if (!ds)
+                return;
+        per_cpu(cpu_hw_events, cpu).ds = NULL;
+        kfree(ds);
+}
 static void release_ds_buffers(void)
 {
        int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
                return;
        get_online_cpus();
        for_each_online_cpu(cpu)
                fini_debug_store_on_cpu(cpu);
        for_each_possible_cpu(cpu) {
-                struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+                release_pebs_buffer(cpu);
+                release_bts_buffer(cpu);
-                if (!ds)
+                release_ds_buffer(cpu);
-                        continue;
-                per_cpu(cpu_hw_events, cpu).ds = NULL;
-                kfree((void *)(unsigned long)ds->pebs_buffer_base);
-                kfree((void *)(unsigned long)ds->bts_buffer_base);
-                kfree(ds);
        }
        put_online_cpus();
 }
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
 {
-        int cpu, err = 0;
+        int bts_err = 0, pebs_err = 0;
+        int cpu;
+        x86_pmu.bts_active = 0;
+        x86_pmu.pebs_active = 0;
        if (!x86_pmu.bts && !x86_pmu.pebs)
-                return 0;
+                return;
+        if (!x86_pmu.bts)
+                bts_err = 1;
+        if (!x86_pmu.pebs)
+                pebs_err = 1;
        get_online_cpus();
        for_each_possible_cpu(cpu) {
-                struct debug_store *ds;
+                if (alloc_ds_buffer(cpu)) {
-                void *buffer;
+                        bts_err = 1;
-                int max, thresh;
+                        pebs_err = 1;
+                }
+                if (!bts_err && alloc_bts_buffer(cpu))
+                        bts_err = 1;
-                err = -ENOMEM;
+                if (!pebs_err && alloc_pebs_buffer(cpu))
-                ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+                        pebs_err = 1;
-                if (unlikely(!ds))
+                if (bts_err && pebs_err)
                        break;
-                per_cpu(cpu_hw_events, cpu).ds = ds;
+        }
-                if (x86_pmu.bts) {
-                        buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-                        if (unlikely(!buffer))
-                                break;
-                        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-                        thresh = max / 16;
-                        ds->bts_buffer_base = (u64)(unsigned long)buffer;
-                        ds->bts_index = ds->bts_buffer_base;
-                        ds->bts_absolute_maximum = ds->bts_buffer_base +
-                                max * BTS_RECORD_SIZE;
-                        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-                                thresh * BTS_RECORD_SIZE;
-                }
-                if (x86_pmu.pebs) {
+        if (bts_err) {
-                        buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
+                for_each_possible_cpu(cpu)
-                        if (unlikely(!buffer))
+                        release_bts_buffer(cpu);
-                                break;
+        }
-                        max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-                        ds->pebs_buffer_base = (u64)(unsigned long)buffer;
-                        ds->pebs_index = ds->pebs_buffer_base;
-                        ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-                                max * x86_pmu.pebs_record_size;
-                        /*
-                         * Always use single record PEBS
-                         */
-                        ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-                                x86_pmu.pebs_record_size;
-                }
-                err = 0;
+        if (pebs_err) {
+                for_each_possible_cpu(cpu)
+                        release_pebs_buffer(cpu);
        }
-        if (err)
+        if (bts_err && pebs_err) {
-                release_ds_buffers();
+                for_each_possible_cpu(cpu)
-        else {
+                        release_ds_buffer(cpu);
+        } else {
+                if (x86_pmu.bts && !bts_err)
+                        x86_pmu.bts_active = 1;
+                if (x86_pmu.pebs && !pebs_err)
+                        x86_pmu.pebs_active = 1;
                for_each_online_cpu(cpu)
                        init_debug_store_on_cpu(cpu);
        }
        put_online_cpus();
-        return err;
 }
 /*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
        if (!event)
                return 0;
-        if (!ds)
+        if (!x86_pmu.bts_active)
                return 0;
        at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
        struct pebs_record_core *at, *top;
        int n;
-        if (!ds || !x86_pmu.pebs)
+        if (!x86_pmu.pebs_active)
                return;
        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
        u64 status = 0;
        int bit, n;
-        if (!ds || !x86_pmu.pebs)
+        if (!x86_pmu.pebs_active)
                return;
        at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
 #else /* CONFIG_CPU_SUP_INTEL */
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
 {
-        return 0;
 }
 static void release_ds_buffers(void)
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 67414550c3cc..d5cd13945d5a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -61,7 +61,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
        if (!is_crashed_pfn_valid(pfn))
                return -EFAULT;
-        vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
+        vaddr = kmap_atomic_pfn(pfn);
        if (!userbuf) {
                memcpy(buf, (vaddr + offset), csize);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0f6376ffa2d9..1bc7f75a5bda 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,11 +82,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                if (kstack_end(stack))
                        break;
                if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-                        printk("\n%s", log_lvl);
+                        printk(KERN_CONT "\n");
-                printk(" %08lx", *stack++);
+                printk(KERN_CONT " %08lx", *stack++);
                touch_nmi_watchdog();
        }
-        printk("\n");
+        printk(KERN_CONT "\n");
        show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 57a21f11c791..6a340485249a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -265,20 +265,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                if (stack >= irq_stack && stack <= irq_stack_end) {
                        if (stack == irq_stack_end) {
                                stack = (unsigned long *) (irq_stack_end[-1]);
-                                printk(" <EOI> ");
+                                printk(KERN_CONT " <EOI> ");
                        }
                } else {
                if (((long) stack & (THREAD_SIZE-1)) == 0)
                        break;
                }
                if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-                        printk("\n%s", log_lvl);
+                        printk(KERN_CONT "\n");
-                printk(" %016lx", *stack++);
+                printk(KERN_CONT " %016lx", *stack++);
                touch_nmi_watchdog();
        }
        preempt_enable();
-        printk("\n");
+        printk(KERN_CONT "\n");
        show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
deleted file mode 100644
index 0fe27d7c6258..000000000000
--- a/arch/x86/kernel/efi.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Common EFI (Extensible Firmware Interface) support functions
- * Based on Extensible Firmware Interface Specification version 1.0
- *
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2002 Hewlett-Packard Co.
- *      David Mosberger-Tang <davidm@hpl.hp.com>
- *      Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 2005-2008 Intel Co.
- *      Fenghua Yu <fenghua.yu@intel.com>
- *      Bibo Mao <bibo.mao@intel.com>
- *      Chandramouli Narayanan <mouli@linux.intel.com>
- *      Huang Ying <ying.huang@intel.com>
- *
- * Copied from efi_32.c to eliminate the duplicated code between EFI
- * 32/64 support code. --ying 2007-10-26
- *
- * All EFI Runtime Services are not implemented yet as EFI only
- * supports physical mode addressing on SoftSDV. This is to be fixed
- * in a future version.  --drummond 1999-07-20
- *
- * Implemented EFI runtime services and virtual mode calls.  --davidm
- *
- * Goutham Rao: <goutham.rao@intel.com>
- *      Skip non-WB memory and ignore empty memory ranges.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/efi.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-#include <linux/time.h>
-#include <linux/io.h>
-#include <linux/reboot.h>
-#include <linux/bcd.h>
-#include <asm/setup.h>
-#include <asm/efi.h>
-#include <asm/time.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/x86_init.h>
-#define EFI_DEBUG       1
-#define PFX             "EFI: "
-int efi_enabled;
-EXPORT_SYMBOL(efi_enabled);
-struct efi efi;
-EXPORT_SYMBOL(efi);
-struct efi_memory_map memmap;
-static struct efi efi_phys __initdata;
-static efi_system_table_t efi_systab __initdata;
-static int __init setup_noefi(char *arg)
-{
-        efi_enabled = 0;
-        return 0;
-}
-early_param("noefi", setup_noefi);
-int add_efi_memmap;
-EXPORT_SYMBOL(add_efi_memmap);
-static int __init setup_add_efi_memmap(char *arg)
-{
-        add_efi_memmap = 1;
-        return 0;
-}
-early_param("add_efi_memmap", setup_add_efi_memmap);
-static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
-        return efi_call_virt2(get_time, tm, tc);
-}
-static efi_status_t virt_efi_set_time(efi_time_t *tm)
-{
-        return efi_call_virt1(set_time, tm);
-}
-static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
-                                             efi_bool_t *pending,
-                                             efi_time_t *tm)
-{
-        return efi_call_virt3(get_wakeup_time,
-                              enabled, pending, tm);
-}
-static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
-{
-        return efi_call_virt2(set_wakeup_time,
-                              enabled, tm);
-}
-static efi_status_t virt_efi_get_variable(efi_char16_t *name,
-                                          efi_guid_t *vendor,
-                                          u32 *attr,
-                                          unsigned long *data_size,
-                                          void *data)
-{
-        return efi_call_virt5(get_variable,
-                              name, vendor, attr,
-                              data_size, data);
-}
-static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
-                                               efi_char16_t *name,
-                                               efi_guid_t *vendor)
-{
-        return efi_call_virt3(get_next_variable,
-                              name_size, name, vendor);
-}
-static efi_status_t virt_efi_set_variable(efi_char16_t *name,
-                                          efi_guid_t *vendor,
-                                          unsigned long attr,
-                                          unsigned long data_size,
-                                          void *data)
-{
-        return efi_call_virt5(set_variable,
-                              name, vendor, attr,
-                              data_size, data);
-}
-static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
-{
-        return efi_call_virt1(get_next_high_mono_count, count);
-}
-static void virt_efi_reset_system(int reset_type,
-                                  efi_status_t status,
-                                  unsigned long data_size,
-                                  efi_char16_t *data)
-{
-        efi_call_virt4(reset_system, reset_type, status,
-                       data_size, data);
-}
-static efi_status_t virt_efi_set_virtual_address_map(
-        unsigned long memory_map_size,
-        unsigned long descriptor_size,
-        u32 descriptor_version,
-        efi_memory_desc_t *virtual_map)
-{
-        return efi_call_virt4(set_virtual_address_map,
-                              memory_map_size, descriptor_size,
-                              descriptor_version, virtual_map);
-}
-static efi_status_t __init phys_efi_set_virtual_address_map(
-        unsigned long memory_map_size,
-        unsigned long descriptor_size,
-        u32 descriptor_version,
-        efi_memory_desc_t *virtual_map)
-{
-        efi_status_t status;
-        efi_call_phys_prelog();
-        status = efi_call_phys4(efi_phys.set_virtual_address_map,
-                                memory_map_size, descriptor_size,
-                                descriptor_version, virtual_map);
-        efi_call_phys_epilog();
-        return status;
-}
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
-                                             efi_time_cap_t *tc)
-{
-        efi_status_t status;
-        efi_call_phys_prelog();
-        status = efi_call_phys2(efi_phys.get_time, tm, tc);
-        efi_call_phys_epilog();
-        return status;
-}
-int efi_set_rtc_mmss(unsigned long nowtime)
-{
-        int real_seconds, real_minutes;
-        efi_status_t    status;
-        efi_time_t      eft;
-        efi_time_cap_t  cap;
-        status = efi.get_time(&eft, &cap);
-        if (status != EFI_SUCCESS) {
-                printk(KERN_ERR "Oops: efitime: can't read time!\n");
-                return -1;
-        }
-        real_seconds = nowtime % 60;
-        real_minutes = nowtime / 60;
-        if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
-                real_minutes += 30;
-        real_minutes %= 60;
-        eft.minute = real_minutes;
-        eft.second = real_seconds;
-        status = efi.set_time(&eft);
-        if (status != EFI_SUCCESS) {
-                printk(KERN_ERR "Oops: efitime: can't write time!\n");
-                return -1;
-        }
-        return 0;
-}
-unsigned long efi_get_time(void)
-{
-        efi_status_t status;
-        efi_time_t eft;
-        efi_time_cap_t cap;
-        status = efi.get_time(&eft, &cap);
-        if (status != EFI_SUCCESS)
-                printk(KERN_ERR "Oops: efitime: can't read time!\n");
-        return mktime(eft.year, eft.month, eft.day, eft.hour,
-                      eft.minute, eft.second);
-}
-/*
- * Tell the kernel about the EFI memory map.  This might include
- * more than the max 128 entries that can fit in the e820 legacy
- * (zeropage) memory map.
- */
-static void __init do_add_efi_memmap(void)
-{
-        void *p;
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                efi_memory_desc_t *md = p;
-                unsigned long long start = md->phys_addr;
-                unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
-                int e820_type;
-                switch (md->type) {
-                case EFI_LOADER_CODE:
-                case EFI_LOADER_DATA:
-                case EFI_BOOT_SERVICES_CODE:
-                case EFI_BOOT_SERVICES_DATA:
-                case EFI_CONVENTIONAL_MEMORY:
-                        if (md->attribute & EFI_MEMORY_WB)
-                                e820_type = E820_RAM;
-                        else
-                                e820_type = E820_RESERVED;
-                        break;
-                case EFI_ACPI_RECLAIM_MEMORY:
-                        e820_type = E820_ACPI;
-                        break;
-                case EFI_ACPI_MEMORY_NVS:
-                        e820_type = E820_NVS;
-                        break;
-                case EFI_UNUSABLE_MEMORY:
-                        e820_type = E820_UNUSABLE;
-                        break;
-                default:
-                        /*
-                         * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
-                         * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
-                         * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
-                         */
-                        e820_type = E820_RESERVED;
-                        break;
-                }
-                e820_add_region(start, size, e820_type);
-        }
-        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-}
-void __init efi_memblock_x86_reserve_range(void)
-{
-        unsigned long pmap;
-#ifdef CONFIG_X86_32
-        pmap = boot_params.efi_info.efi_memmap;
-#else
-        pmap = (boot_params.efi_info.efi_memmap |
-                ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
-#endif
-        memmap.phys_map = (void *)pmap;
-        memmap.nr_map = boot_params.efi_info.efi_memmap_size /
-                boot_params.efi_info.efi_memdesc_size;
-        memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
-        memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
-        memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
-                      "EFI memmap");
-}
-#if EFI_DEBUG
-static void __init print_efi_memmap(void)
-{
-        efi_memory_desc_t *md;
-        void *p;
-        int i;
-        for (p = memmap.map, i = 0;
-             p < memmap.map_end;
-             p += memmap.desc_size, i++) {
-                md = p;
-                printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
-                        "range=[0x%016llx-0x%016llx) (%lluMB)\n",
-                        i, md->type, md->attribute, md->phys_addr,
-                        md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-                        (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
-        }
-}
-#endif  /*  EFI_DEBUG  */
-void __init efi_init(void)
-{
-        efi_config_table_t *config_tables;
-        efi_runtime_services_t *runtime;
-        efi_char16_t *c16;
-        char vendor[100] = "unknown";
-        int i = 0;
-        void *tmp;
-#ifdef CONFIG_X86_32
-        efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
-        efi_phys.systab = (efi_system_table_t *)
-                (boot_params.efi_info.efi_systab |
-                 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-#endif
-        efi.systab = early_ioremap((unsigned long)efi_phys.systab,
-                                   sizeof(efi_system_table_t));
-        if (efi.systab == NULL)
-                printk(KERN_ERR "Couldn't map the EFI system table!\n");
-        memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
-        early_iounmap(efi.systab, sizeof(efi_system_table_t));
-        efi.systab = &efi_systab;
-        /*
-         * Verify the EFI Table
-         */
-        if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
-                printk(KERN_ERR "EFI system table signature incorrect!\n");
-        if ((efi.systab->hdr.revision >> 16) == 0)
-                printk(KERN_ERR "Warning: EFI system table version "
-                       "%d.%02d, expected 1.00 or greater!\n",
-                       efi.systab->hdr.revision >> 16,
-                       efi.systab->hdr.revision & 0xffff);
-        /*
-         * Show what we know for posterity
-         */
-        c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
-        if (c16) {
-                for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
-                        vendor[i] = *c16++;
-                vendor[i] = '\0';
-        } else
-                printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
-        early_iounmap(tmp, 2);
-        printk(KERN_INFO "EFI v%u.%.02u by %s\n",
-               efi.systab->hdr.revision >> 16,
-               efi.systab->hdr.revision & 0xffff, vendor);
-        /*
-         * Let's see what config tables the firmware passed to us.
-         */
-        config_tables = early_ioremap(
-                efi.systab->tables,
-                efi.systab->nr_tables * sizeof(efi_config_table_t));
-        if (config_tables == NULL)
-                printk(KERN_ERR "Could not map EFI Configuration Table!\n");
-        printk(KERN_INFO);
-        for (i = 0; i < efi.systab->nr_tables; i++) {
-                if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
-                        efi.mps = config_tables[i].table;
-                        printk(" MPS=0x%lx ", config_tables[i].table);
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        ACPI_20_TABLE_GUID)) {
-                        efi.acpi20 = config_tables[i].table;
-                        printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        ACPI_TABLE_GUID)) {
-                        efi.acpi = config_tables[i].table;
-                        printk(" ACPI=0x%lx ", config_tables[i].table);
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        SMBIOS_TABLE_GUID)) {
-                        efi.smbios = config_tables[i].table;
-                        printk(" SMBIOS=0x%lx ", config_tables[i].table);
-#ifdef CONFIG_X86_UV
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        UV_SYSTEM_TABLE_GUID)) {
-                        efi.uv_systab = config_tables[i].table;
-                        printk(" UVsystab=0x%lx ", config_tables[i].table);
-#endif
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        HCDP_TABLE_GUID)) {
-                        efi.hcdp = config_tables[i].table;
-                        printk(" HCDP=0x%lx ", config_tables[i].table);
-                } else if (!efi_guidcmp(config_tables[i].guid,
-                                        UGA_IO_PROTOCOL_GUID)) {
-                        efi.uga = config_tables[i].table;
-                        printk(" UGA=0x%lx ", config_tables[i].table);
-                }
-        }
-        printk("\n");
-        early_iounmap(config_tables,
-                          efi.systab->nr_tables * sizeof(efi_config_table_t));
-        /*
-         * Check out the runtime services table. We need to map
-         * the runtime services table so that we can grab the physical
-         * address of several of the EFI runtime functions, needed to
-         * set the firmware into virtual mode.
-         */
-        runtime = early_ioremap((unsigned long)efi.systab->runtime,
-                                sizeof(efi_runtime_services_t));
-        if (runtime != NULL) {
-                /*
-                 * We will only need *early* access to the following
-                 * two EFI runtime services before set_virtual_address_map
-                 * is invoked.
-                 */
-                efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
-                efi_phys.set_virtual_address_map =
-                        (efi_set_virtual_address_map_t *)
-                        runtime->set_virtual_address_map;
-                /*
-                 * Make efi_get_time can be called before entering
-                 * virtual mode.
-                 */
-                efi.get_time = phys_efi_get_time;
-        } else
-                printk(KERN_ERR "Could not map the EFI runtime service "
-                       "table!\n");
-        early_iounmap(runtime, sizeof(efi_runtime_services_t));
-        /* Map the EFI memory map */
-        memmap.map = early_ioremap((unsigned long)memmap.phys_map,
-                                   memmap.nr_map * memmap.desc_size);
-        if (memmap.map == NULL)
-                printk(KERN_ERR "Could not map the EFI memory map!\n");
-        memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-        if (memmap.desc_size != sizeof(efi_memory_desc_t))
-                printk(KERN_WARNING
-                  "Kernel-defined memdesc doesn't match the one from EFI!\n");
-        if (add_efi_memmap)
-                do_add_efi_memmap();
-#ifdef CONFIG_X86_32
-        x86_platform.get_wallclock = efi_get_time;
-        x86_platform.set_wallclock = efi_set_rtc_mmss;
-#endif
-        /* Setup for EFI runtime service */
-        reboot_type = BOOT_EFI;
-#if EFI_DEBUG
-        print_efi_memmap();
-#endif
-}
-static void __init runtime_code_page_mkexec(void)
-{
-        efi_memory_desc_t *md;
-        void *p;
-        u64 addr, npages;
-        /* Make EFI runtime service code area executable */
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                md = p;
-                if (md->type != EFI_RUNTIME_SERVICES_CODE)
-                        continue;
-                addr = md->virt_addr;
-                npages = md->num_pages;
-                memrange_efi_to_native(&addr, &npages);
-                set_memory_x(addr, npages);
-        }
-}
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-void __init efi_enter_virtual_mode(void)
-{
-        efi_memory_desc_t *md;
-        efi_status_t status;
-        unsigned long size;
-        u64 end, systab, addr, npages, end_pfn;
-        void *p, *va;
-        efi.systab = NULL;
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                md = p;
-                if (!(md->attribute & EFI_MEMORY_RUNTIME))
-                        continue;
-                size = md->num_pages << EFI_PAGE_SHIFT;
-                end = md->phys_addr + size;
-                end_pfn = PFN_UP(end);
-                if (end_pfn <= max_low_pfn_mapped
-                    || (end_pfn > (1UL << (32 - PAGE_SHIFT))
-                        && end_pfn <= max_pfn_mapped))
-                        va = __va(md->phys_addr);
-                else
-                        va = efi_ioremap(md->phys_addr, size, md->type);
-                md->virt_addr = (u64) (unsigned long) va;
-                if (!va) {
-                        printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
-                               (unsigned long long)md->phys_addr);
-                        continue;
-                }
-                if (!(md->attribute & EFI_MEMORY_WB)) {
-                        addr = md->virt_addr;
-                        npages = md->num_pages;
-                        memrange_efi_to_native(&addr, &npages);
-                        set_memory_uc(addr, npages);
-                }
-                systab = (u64) (unsigned long) efi_phys.systab;
-                if (md->phys_addr <= systab && systab < end) {
-                        systab += md->virt_addr - md->phys_addr;
-                        efi.systab = (efi_system_table_t *) (unsigned long) systab;
-                }
-        }
-        BUG_ON(!efi.systab);
-        status = phys_efi_set_virtual_address_map(
-                memmap.desc_size * memmap.nr_map,
-                memmap.desc_size,
-                memmap.desc_version,
-                memmap.phys_map);
-        if (status != EFI_SUCCESS) {
-                printk(KERN_ALERT "Unable to switch EFI into virtual mode "
-                       "(status=%lx)!\n", status);
-                panic("EFI call to SetVirtualAddressMap() failed!");
-        }
-        /*
-         * Now that EFI is in virtual mode, update the function
-         * pointers in the runtime service table to the new virtual addresses.
-         *
-         * Call EFI services through wrapper functions.
-         */
-        efi.get_time = virt_efi_get_time;
-        efi.set_time = virt_efi_set_time;
-        efi.get_wakeup_time = virt_efi_get_wakeup_time;
-        efi.set_wakeup_time = virt_efi_set_wakeup_time;
-        efi.get_variable = virt_efi_get_variable;
-        efi.get_next_variable = virt_efi_get_next_variable;
-        efi.set_variable = virt_efi_set_variable;
-        efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
-        efi.reset_system = virt_efi_reset_system;
-        efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
-        if (__supported_pte_mask & _PAGE_NX)
-                runtime_code_page_mkexec();
-        early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
-        memmap.map = NULL;
-}
-/*
- * Convenience functions to obtain memory types and attributes
- */
-u32 efi_mem_type(unsigned long phys_addr)
-{
-        efi_memory_desc_t *md;
-        void *p;
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                md = p;
-                if ((md->phys_addr <= phys_addr) &&
-                    (phys_addr < (md->phys_addr +
-                                  (md->num_pages << EFI_PAGE_SHIFT))))
-                        return md->type;
-        }
-        return 0;
-}
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
-        efi_memory_desc_t *md;
-        void *p;
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                md = p;
-                if ((md->phys_addr <= phys_addr) &&
-                    (phys_addr < (md->phys_addr +
-                                  (md->num_pages << EFI_PAGE_SHIFT))))
-                        return md->attribute;
-        }
-        return 0;
-}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
deleted file mode 100644
index 5cab48ee61a4..000000000000
--- a/arch/x86/kernel/efi_32.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Extensible Firmware Interface
- *
- * Based on Extensible Firmware Interface Specification version 1.0
- *
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2002 Hewlett-Packard Co.
- *      David Mosberger-Tang <davidm@hpl.hp.com>
- *      Stephane Eranian <eranian@hpl.hp.com>
- *
- * All EFI Runtime Services are not implemented yet as EFI only
- * supports physical mode addressing on SoftSDV. This is to be fixed
- * in a future version.  --drummond 1999-07-20
- *
- * Implemented EFI runtime services and virtual mode calls.  --davidm
- *
- * Goutham Rao: <goutham.rao@intel.com>
- *      Skip non-WB memory and ignore empty memory ranges.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/efi.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/efi.h>
-/*
- * To make EFI call EFI runtime service in physical addressing mode we need
- * prelog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
- */
-static unsigned long efi_rt_eflags;
-static pgd_t efi_bak_pg_dir_pointer[2];
-void efi_call_phys_prelog(void)
-{
-        unsigned long cr4;
-        unsigned long temp;
-        struct desc_ptr gdt_descr;
-        local_irq_save(efi_rt_eflags);
-        /*
-         * If I don't have PAE, I should just duplicate two entries in page
-         * directory. If I have PAE, I just need to duplicate one entry in
-         * page directory.
-         */
-        cr4 = read_cr4_safe();
-        if (cr4 & X86_CR4_PAE) {
-                efi_bak_pg_dir_pointer[0].pgd =
-                    swapper_pg_dir[pgd_index(0)].pgd;
-                swapper_pg_dir[0].pgd =
-                    swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
-        } else {
-                efi_bak_pg_dir_pointer[0].pgd =
-                    swapper_pg_dir[pgd_index(0)].pgd;
-                efi_bak_pg_dir_pointer[1].pgd =
-                    swapper_pg_dir[pgd_index(0x400000)].pgd;
-                swapper_pg_dir[pgd_index(0)].pgd =
-                    swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
-                temp = PAGE_OFFSET + 0x400000;
-                swapper_pg_dir[pgd_index(0x400000)].pgd =
-                    swapper_pg_dir[pgd_index(temp)].pgd;
-        }
-        /*
-         * After the lock is released, the original page table is restored.
-         */
-        __flush_tlb_all();
-        gdt_descr.address = __pa(get_cpu_gdt_table(0));
-        gdt_descr.size = GDT_SIZE - 1;
-        load_gdt(&gdt_descr);
-}
-void efi_call_phys_epilog(void)
-{
-        unsigned long cr4;
-        struct desc_ptr gdt_descr;
-        gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
-        gdt_descr.size = GDT_SIZE - 1;
-        load_gdt(&gdt_descr);
-        cr4 = read_cr4_safe();
-        if (cr4 & X86_CR4_PAE) {
-                swapper_pg_dir[pgd_index(0)].pgd =
-                    efi_bak_pg_dir_pointer[0].pgd;
-        } else {
-                swapper_pg_dir[pgd_index(0)].pgd =
-                    efi_bak_pg_dir_pointer[0].pgd;
-                swapper_pg_dir[pgd_index(0x400000)].pgd =
-                    efi_bak_pg_dir_pointer[1].pgd;
-        }
-        /*
-         * After the lock is released, the original page table is restored.
-         */
-        __flush_tlb_all();
-        local_irq_restore(efi_rt_eflags);
-}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
deleted file mode 100644
index ac0621a7ac3d..000000000000
--- a/arch/x86/kernel/efi_64.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * x86_64 specific EFI support functions
- * Based on Extensible Firmware Interface Specification version 1.0
- *
- * Copyright (C) 2005-2008 Intel Co.
- *      Fenghua Yu <fenghua.yu@intel.com>
- *      Bibo Mao <bibo.mao@intel.com>
- *      Chandramouli Narayanan <mouli@linux.intel.com>
- *      Huang Ying <ying.huang@intel.com>
- *
- * Code to convert EFI to E820 map has been implemented in elilo bootloader
- * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
- * is setup appropriately for EFI runtime code.
- * - mouli 06/14/2007.
- *
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/efi.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <linux/reboot.h>
-#include <asm/setup.h>
-#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm/efi.h>
-#include <asm/cacheflush.h>
-#include <asm/fixmap.h>
-static pgd_t save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-static void __init early_mapping_set_exec(unsigned long start,
-                                          unsigned long end,
-                                          int executable)
-{
-        unsigned long num_pages;
-        start &= PMD_MASK;
-        end = (end + PMD_SIZE - 1) & PMD_MASK;
-        num_pages = (end - start) >> PAGE_SHIFT;
-        if (executable)
-                set_memory_x((unsigned long)__va(start), num_pages);
-        else
-                set_memory_nx((unsigned long)__va(start), num_pages);
-}
-static void __init early_runtime_code_mapping_set_exec(int executable)
-{
-        efi_memory_desc_t *md;
-        void *p;
-        if (!(__supported_pte_mask & _PAGE_NX))
-                return;
-        /* Make EFI runtime service code area executable */
-        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                md = p;
-                if (md->type == EFI_RUNTIME_SERVICES_CODE) {
-                        unsigned long end;
-                        end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-                        early_mapping_set_exec(md->phys_addr, end, executable);
-                }
-        }
-}
-void __init efi_call_phys_prelog(void)
-{
-        unsigned long vaddress;
-        early_runtime_code_mapping_set_exec(1);
-        local_irq_save(efi_flags);
-        vaddress = (unsigned long)__va(0x0UL);
-        save_pgd = *pgd_offset_k(0x0UL);
-        set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
-        __flush_tlb_all();
-}
-void __init efi_call_phys_epilog(void)
-{
-        /*
-         * After the lock is released, the original page table is restored.
-         */
-        set_pgd(pgd_offset_k(0x0UL), save_pgd);
-        __flush_tlb_all();
-        local_irq_restore(efi_flags);
-        early_runtime_code_mapping_set_exec(0);
-}
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
-                                 u32 type)
-{
-        unsigned long last_map_pfn;
-        if (type == EFI_MEMORY_MAPPED_IO)
-                return ioremap(phys_addr, size);
-        last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-        if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
-                return NULL;
-        return (void __iomem *)__va(phys_addr);
-}
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
deleted file mode 100644
index fbe66e626c09..000000000000
--- a/arch/x86/kernel/efi_stub_32.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off.
- */
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-.text
-ENTRY(efi_call_phys)
-        /*
-         * 0. The function can only be called in Linux kernel. So CS has been
-         * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
-         * the values of these registers are the same. And, the corresponding
-         * GDT entries are identical. So I will do nothing about segment reg
-         * and GDT, but change GDT base register in prelog and epilog.
-         */
-        /*
-         * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
-         * But to make it smoothly switch from virtual mode to flat mode.
-         * The mapping of lower virtual memory has been created in prelog and
-         * epilog.
-         */
-        movl    $1f, %edx
-        subl    $__PAGE_OFFSET, %edx
-        jmp     *%edx
-1:
-        /*
-         * 2. Now on the top of stack is the return
-         * address in the caller of efi_call_phys(), then parameter 1,
-         * parameter 2, ..., param n. To make things easy, we save the return
-         * address of efi_call_phys in a global variable.
-         */
-        popl    %edx
-        movl    %edx, saved_return_addr
-        /* get the function pointer into ECX*/
-        popl    %ecx
-        movl    %ecx, efi_rt_function_ptr
-        movl    $2f, %edx
-        subl    $__PAGE_OFFSET, %edx
-        pushl   %edx
-        /*
-         * 3. Clear PG bit in %CR0.
-         */
-        movl    %cr0, %edx
-        andl    $0x7fffffff, %edx
-        movl    %edx, %cr0
-        jmp     1f
-1:
-        /*
-         * 4. Adjust stack pointer.
-         */
-        subl    $__PAGE_OFFSET, %esp
-        /*
-         * 5. Call the physical function.
-         */
-        jmp     *%ecx
-2:
-        /*
-         * 6. After EFI runtime service returns, control will return to
-         * following instruction. We'd better readjust stack pointer first.
-         */
-        addl    $__PAGE_OFFSET, %esp
-        /*
-         * 7. Restore PG bit
-         */
-        movl    %cr0, %edx
-        orl     $0x80000000, %edx
-        movl    %edx, %cr0
-        jmp     1f
-1:
-        /*
-         * 8. Now restore the virtual mode from flat mode by
-         * adding EIP with PAGE_OFFSET.
-         */
-        movl    $1f, %edx
-        jmp     *%edx
-1:
-        /*
-         * 9. Balance the stack. And because EAX contain the return value,
-         * we'd better not clobber it.
-         */
-        leal    efi_rt_function_ptr, %edx
-        movl    (%edx), %ecx
-        pushl   %ecx
-        /*
-         * 10. Push the saved return address onto the stack and return.
-         */
-        leal    saved_return_addr, %edx
-        movl    (%edx), %ecx
-        pushl   %ecx
-        ret
-ENDPROC(efi_call_phys)
-.previous
-.data
-saved_return_addr:
-        .long 0
-efi_rt_function_ptr:
-        .long 0
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
deleted file mode 100644
index 4c07ccab8146..000000000000
--- a/arch/x86/kernel/efi_stub_64.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Function calling ABI conversion from Linux to EFI for x86_64
- *
- * Copyright (C) 2007 Intel Corp
- *      Bibo Mao <bibo.mao@intel.com>
- *      Huang Ying <ying.huang@intel.com>
- */
-#include <linux/linkage.h>
-#define SAVE_XMM                        \
-        mov %rsp, %rax;                 \
-        subq $0x70, %rsp;               \
-        and $~0xf, %rsp;                \
-        mov %rax, (%rsp);               \
-        mov %cr0, %rax;                 \
-        clts;                           \
-        mov %rax, 0x8(%rsp);            \
-        movaps %xmm0, 0x60(%rsp);       \
-        movaps %xmm1, 0x50(%rsp);       \
-        movaps %xmm2, 0x40(%rsp);       \
-        movaps %xmm3, 0x30(%rsp);       \
-        movaps %xmm4, 0x20(%rsp);       \
-        movaps %xmm5, 0x10(%rsp)
-#define RESTORE_XMM                     \
-        movaps 0x60(%rsp), %xmm0;       \
-        movaps 0x50(%rsp), %xmm1;       \
-        movaps 0x40(%rsp), %xmm2;       \
-        movaps 0x30(%rsp), %xmm3;       \
-        movaps 0x20(%rsp), %xmm4;       \
-        movaps 0x10(%rsp), %xmm5;       \
-        mov 0x8(%rsp), %rsi;            \
-        mov %rsi, %cr0;                 \
-        mov (%rsp), %rsp
-ENTRY(efi_call0)
-        SAVE_XMM
-        subq $32, %rsp
-        call *%rdi
-        addq $32, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call0)
-ENTRY(efi_call1)
-        SAVE_XMM
-        subq $32, %rsp
-        mov  %rsi, %rcx
-        call *%rdi
-        addq $32, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call1)
-ENTRY(efi_call2)
-        SAVE_XMM
-        subq $32, %rsp
-        mov  %rsi, %rcx
-        call *%rdi
-        addq $32, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call2)
-ENTRY(efi_call3)
-        SAVE_XMM
-        subq $32, %rsp
-        mov  %rcx, %r8
-        mov  %rsi, %rcx
-        call *%rdi
-        addq $32, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call3)
-ENTRY(efi_call4)
-        SAVE_XMM
-        subq $32, %rsp
-        mov %r8, %r9
-        mov %rcx, %r8
-        mov %rsi, %rcx
-        call *%rdi
-        addq $32, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call4)
-ENTRY(efi_call5)
-        SAVE_XMM
-        subq $48, %rsp
-        mov %r9, 32(%rsp)
-        mov %r8, %r9
-        mov %rcx, %r8
-        mov %rsi, %rcx
-        call *%rdi
-        addq $48, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call5)
-ENTRY(efi_call6)
-        SAVE_XMM
-        mov (%rsp), %rax
-        mov 8(%rax), %rax
-        subq $48, %rsp
-        mov %r9, 32(%rsp)
-        mov %rax, 40(%rsp)
-        mov %r8, %r9
-        mov %rcx, %r8
-        mov %rsi, %rcx
-        call *%rdi
-        addq $48, %rsp
-        RESTORE_XMM
-        ret
-ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9fb188d7bc76..59e175e89599 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -382,20 +382,20 @@ sysenter_past_esp:
         * enough kernel state to call TRACE_IRQS_OFF can be called - but
         * we immediately enable interrupts at that point anyway.
         */
-        pushl_cfi $(__USER_DS)
+        pushl_cfi $__USER_DS
        /*CFI_REL_OFFSET ss, 0*/
        pushl_cfi %ebp
        CFI_REL_OFFSET esp, 0
        pushfl_cfi
        orl $X86_EFLAGS_IF, (%esp)
-        pushl_cfi $(__USER_CS)
+        pushl_cfi $__USER_CS
        /*CFI_REL_OFFSET cs, 0*/
        /*
         * Push current_thread_info()->sysenter_return to the stack.
         * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
         * pushed above; +8 corresponds to copy_thread's esp0 setting.
         */
-        pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+        pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp)
        CFI_REL_OFFSET eip, 0
        pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a7ae7fd1010f..fe2690d71c0c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -963,22 +963,10 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
        x86_platform_ipi smp_x86_platform_ipi
 #ifdef CONFIG_SMP
-apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+.irpc idx, "01234567"
-        invalidate_interrupt0 smp_invalidate_interrupt
+apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
-apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
+        invalidate_interrupt\idx smp_invalidate_interrupt
-        invalidate_interrupt1 smp_invalidate_interrupt
+.endr
-apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
-        invalidate_interrupt2 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
-        invalidate_interrupt3 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
-        invalidate_interrupt4 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
-        invalidate_interrupt5 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
-        invalidate_interrupt6 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
-        invalidate_interrupt7 smp_invalidate_interrupt
 #endif
 apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 9a6ca2392170..763310165fa0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/bios_ebda.h>
+#include <asm/tlbflush.h>
 static void __init i386_default_early_setup(void)
 {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fa8c1b8e09fb..bcece91dd311 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -183,13 +183,12 @@ default_entry:
 #ifdef CONFIG_X86_PAE
        /*
-         * In PAE mode swapper_pg_dir is statically defined to contain enough
+         * In PAE mode initial_page_table is statically defined to contain
-         * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+         * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-         * entries). The identity mapping is handled by pointing two PGD
+         * entries). The identity mapping is handled by pointing two PGD entries
-         * entries to the first kernel PMD.
+         * to the first kernel PMD.
         *
-         * Note the upper half of each PMD or PTE are always zero at
+         * Note the upper half of each PMD or PTE are always zero at this stage.
-         * this stage.
         */
 #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
@@ -197,7 +196,7 @@ default_entry:
        xorl %ebx,%ebx                          /* %ebx is kept at zero */
        movl $pa(__brk_base), %edi
-        movl $pa(swapper_pg_pmd), %edx
+        movl $pa(initial_pg_pmd), %edx
        movl $PTE_IDENT_ATTR, %eax
 10:
        leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PMD entry */
@@ -226,14 +225,14 @@ default_entry:
        movl %eax, pa(max_pfn_mapped)
        /* Do early initialization of the fixmap area */
-        movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
+        movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-        movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
+        movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
 #else   /* Not PAE */
 page_pde_offset = (__PAGE_OFFSET >> 20);
        movl $pa(__brk_base), %edi
-        movl $pa(swapper_pg_dir), %edx
+        movl $pa(initial_page_table), %edx
        movl $PTE_IDENT_ATTR, %eax
 10:
        leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PDE entry */
@@ -257,8 +256,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
        movl %eax, pa(max_pfn_mapped)
        /* Do early initialization of the fixmap area */
-        movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
+        movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-        movl %eax,pa(swapper_pg_dir+0xffc)
+        movl %eax,pa(initial_page_table+0xffc)
 #endif
        jmp 3f
 /*
@@ -334,7 +333,7 @@ ENTRY(startup_32_smp)
 /*
 * Enable paging
 */
-        movl pa(initial_page_table), %eax
+        movl $pa(initial_page_table), %eax
        movl %eax,%cr3          /* set the page table pointer.. */
        movl %cr0,%eax
        orl  $X86_CR0_PG,%eax
@@ -614,8 +613,6 @@ ignore_int:
 .align 4
 ENTRY(initial_code)
        .long i386_start_kernel
-ENTRY(initial_page_table)
-        .long pa(swapper_pg_dir)
 /*
 * BSS section
@@ -623,20 +620,18 @@ ENTRY(initial_page_table)
 __PAGE_ALIGNED_BSS
        .align PAGE_SIZE_asm
 #ifdef CONFIG_X86_PAE
-swapper_pg_pmd:
+initial_pg_pmd:
        .fill 1024*KPMDS,4,0
 #else
-ENTRY(swapper_pg_dir)
+ENTRY(initial_page_table)
        .fill 1024,4,0
 #endif
-swapper_pg_fixmap:
+initial_pg_fixmap:
        .fill 1024,4,0
-#ifdef CONFIG_X86_TRAMPOLINE
-ENTRY(trampoline_pg_dir)
-        .fill 1024,4,0
-#endif
 ENTRY(empty_zero_page)
        .fill 4096,1,0
+ENTRY(swapper_pg_dir)
+        .fill 1024,4,0
 /*
 * This starts the data section.
@@ -645,20 +640,20 @@ ENTRY(empty_zero_page)
 __PAGE_ALIGNED_DATA
        /* Page-aligned for the benefit of paravirt? */
        .align PAGE_SIZE_asm
-ENTRY(swapper_pg_dir)
+ENTRY(initial_page_table)
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR),0     /* low identity map */
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR),0     /* low identity map */
 # if KPMDS == 3
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR),0
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0
 # elif KPMDS == 2
        .long   0,0
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR),0
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
 # elif KPMDS == 1
        .long   0,0
        .long   0,0
-        .long   pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
+        .long   pa(initial_pg_pmd+PGD_IDENT_ATTR),0
 # else
 #  error "Kernel PMDs should be 1, 2 or 3"
 # endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index efaf906daf93..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -380,44 +380,35 @@ static int hpet_next_event(unsigned long delta,
                           struct clock_event_device *evt, int timer)
 {
        u32 cnt;
+        s32 res;
        cnt = hpet_readl(HPET_COUNTER);
        cnt += (u32) delta;
        hpet_writel(cnt, HPET_Tn_CMP(timer));
        /*
-         * We need to read back the CMP register on certain HPET
+         * HPETs are a complete disaster. The compare register is
-         * implementations (ATI chipsets) which seem to delay the
+         * based on a equal comparison and neither provides a less
-         * transfer of the compare register into the internal compare
+         * than or equal functionality (which would require to take
-         * logic. With small deltas this might actually be too late as
+         * the wraparound into account) nor a simple count down event
-         * the counter could already be higher than the compare value
+         * mode. Further the write to the comparator register is
-         * at that point and we would wait for the next hpet interrupt
+         * delayed internally up to two HPET clock cycles in certain
-         * forever. We found out that reading the CMP register back
+         * chipsets (ATI, ICH9,10). We worked around that by reading
-         * forces the transfer so we can rely on the comparison with
+         * back the compare register, but that required another
-         * the counter register below. If the read back from the
+         * workaround for ICH9,10 chips where the first readout after
-         * compare register does not match the value we programmed
+         * write can return the old stale value. We already have a
-         * then we might have a real hardware problem. We can not do
+         * minimum delta of 5us enforced, but a NMI or SMI hitting
-         * much about it here, but at least alert the user/admin with
+         * between the counter readout and the comparator write can
-         * a prominent warning.
+         * move us behind that point easily. Now instead of reading
-         *
+         * the compare register back several times, we make the ETIME
-         * An erratum on some chipsets (ICH9,..), results in
+         * decision based on the following: Return ETIME if the
-         * comparator read immediately following a write returning old
+         * counter value after the write is less than 8 HPET cycles
-         * value. Workaround for this is to read this value second
+         * away from the event or if the counter is already ahead of
-         * time, when first read returns old value.
+         * the event.
-         *
-         * In fact the write to the comparator register is delayed up
-         * to two HPET cycles so the workaround we tried to restrict
-         * the readback to those known to be borked ATI chipsets
-         * failed miserably. So we give up on optimizations forever
-         * and penalize all HPET incarnations unconditionally.
         */
-        if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+        res = (s32)(cnt - hpet_readl(HPET_COUNTER));
-                if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
-                        printk_once(KERN_WARNING
-                                "hpet: compare register read back failed.\n");
-        }
-        return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
+        return res < 8 ? -ETIME : 0;
 }
 static void hpet_legacy_set_mode(enum clock_event_mode mode,
@@ -722,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
        switch (action & 0xf) {
        case CPU_ONLINE:
-                INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
+                INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
                init_completion(&work.complete);
                /* FIXME: add schedule_work_on() */
                schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 10709f29d166..96656f207751 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/uaccess.h>
 #include <linux/percpu.h>
+#include <linux/mm.h>
 #include <asm/apic.h>
@@ -49,21 +50,17 @@ static inline int check_stack_overflow(void) { return 0; }
 static inline void print_stack_overflow(void) { }
 #endif
-#ifdef CONFIG_4KSTACKS
 /*
 * per-CPU IRQ handling contexts (thread information and stack)
 */
 union irq_ctx {
        struct thread_info      tinfo;
        u32                     stack[THREAD_SIZE/sizeof(u32)];
-} __attribute__((aligned(PAGE_SIZE)));
+} __attribute__((aligned(THREAD_SIZE)));
 static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
 static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
-static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
-static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 {
        asm volatile("xchgl     %%ebx,%%esp     \n"
@@ -129,7 +126,9 @@ void __cpuinit irq_ctx_init(int cpu)
        if (per_cpu(hardirq_ctx, cpu))
                return;
-        irqctx = &per_cpu(hardirq_stack, cpu);
+        irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
+                                               THREAD_FLAGS,
+                                               THREAD_ORDER));
        irqctx->tinfo.task              = NULL;
        irqctx->tinfo.exec_domain       = NULL;
        irqctx->tinfo.cpu               = cpu;
@@ -138,7 +137,9 @@ void __cpuinit irq_ctx_init(int cpu)
        per_cpu(hardirq_ctx, cpu) = irqctx;
-        irqctx = &per_cpu(softirq_stack, cpu);
+        irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
+                                               THREAD_FLAGS,
+                                               THREAD_ORDER));
        irqctx->tinfo.task              = NULL;
        irqctx->tinfo.exec_domain       = NULL;
        irqctx->tinfo.cpu               = cpu;
@@ -151,11 +152,6 @@ void __cpuinit irq_ctx_init(int cpu)
               cpu, per_cpu(hardirq_ctx, cpu),  per_cpu(softirq_ctx, cpu));
 }
-void irq_ctx_exit(int cpu)
-{
-        per_cpu(hardirq_ctx, cpu) = NULL;
-}
 asmlinkage void do_softirq(void)
 {
        unsigned long flags;
@@ -187,11 +183,6 @@ asmlinkage void do_softirq(void)
        local_irq_restore(flags);
 }
-#else
-static inline int
-execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
-#endif
 bool handle_irq(unsigned irq, struct pt_regs *regs)
 {
        struct irq_desc *desc;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 8afd9f321f10..90fcf62854bb 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -78,6 +78,7 @@ static int setup_data_open(struct inode *inode, struct file *file)
 static const struct file_operations fops_setup_data = {
        .read           = setup_data_read,
        .open           = setup_data_open,
+        .llseek         = default_llseek,
 };
 static int __init
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 852b81967a37..ec592caac4b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 *      disable hardware debugging while it is processing gdb packets or
 *      handling exception.
 */
-void kgdb_disable_hw_debug(struct pt_regs *regs)
+static void kgdb_disable_hw_debug(struct pt_regs *regs)
 {
        int i;
        int cpu = raw_smp_processor_id();
@@ -477,8 +477,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
                                   raw_smp_processor_id());
                }
-                kgdb_correct_hw_break();
                return 0;
        }
@@ -621,7 +619,12 @@ int kgdb_arch_init(void)
 static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
                struct perf_sample_data *data, struct pt_regs *regs)
 {
-        kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP);
+        struct task_struct *tsk = current;
+        int i;
+        for (i = 0; i < 4; i++)
+                if (breakinfo[i].enabled)
+                        tsk->thread.debugreg6 |= (DR_TRAP0 << i);
 }
 void kgdb_arch_late(void)
@@ -644,7 +647,7 @@ void kgdb_arch_late(void)
                if (breakinfo[i].pev)
                        continue;
                breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
-                if (IS_ERR(breakinfo[i].pev)) {
+                if (IS_ERR((void * __force)breakinfo[i].pev)) {
                        printk(KERN_ERR "kgdb: Could not allocate hw"
                               "breakpoints\nDisabling the kernel debugger\n");
                        breakinfo[i].pev = NULL;
@@ -721,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = {
        .flags                  = KGDB_HW_BREAKPOINT,
        .set_hw_breakpoint      = kgdb_set_hw_break,
        .remove_hw_breakpoint   = kgdb_remove_hw_break,
+        .disable_hw_break       = kgdb_disable_hw_debug,
        .remove_all_hw_break    = kgdb_remove_all_hw_break,
        .correct_hw_break       = kgdb_correct_hw_break,
 };
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index eb9b76c716c2..ca43ce31a19c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,13 +128,15 @@ static struct clocksource kvm_clock = {
 static int kvm_register_clock(char *txt)
 {
        int cpu = smp_processor_id();
-        int low, high;
+        int low, high, ret;
        low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
        high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+        ret = native_write_msr_safe(msr_kvm_system_time, low, high);
        printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
               cpu, high, low, txt);
-        return native_write_msr_safe(msr_kvm_system_time, low, high);
+        return ret;
 }
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1af7c055c7d..ce0cb4721c9a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -212,7 +212,7 @@ static int install_equiv_cpu_table(const u8 *buf)
                return 0;
        }
-        equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
+        equiv_cpu_table = vmalloc(size);
        if (!equiv_cpu_table) {
                pr_err("failed to allocate equivalent CPU table\n");
                return 0;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fa6551d36c10..1cca374a2bac 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -12,7 +12,7 @@
 *      Software Developer's Manual
 *      Order Number 253668 or free download from:
 *
- *      http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *      http://developer.intel.com/Assets/PDF/manual/253668.pdf 
 *
 *      For more information, go to http://www.urbanmyth.org/microcode
 *
@@ -232,6 +232,7 @@ static const struct file_operations microcode_fops = {
        .owner                  = THIS_MODULE,
        .write                  = microcode_write,
        .open                   = microcode_open,
+        .llseek         = no_llseek,
 };
 static struct miscdevice microcode_dev = {
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 356170262a93..dcb65cc0a053 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -12,7 +12,7 @@
 *      Software Developer's Manual
 *      Order Number 253668 or free download from:
 *
- *      http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *      http://developer.intel.com/Assets/PDF/manual/253668.pdf 
 *
 *      For more information, go to http://www.urbanmyth.org/microcode
 *
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 71825806cd44..6da143c2a6b8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -217,13 +217,13 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
        wrmsrl(address, val);
 }
-static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
+static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d)
 {
        pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF;
        return 0;
 }
-static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
+static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
        {
                .callback = set_check_enable_amd_mmconf,
                .ident = "Sun Microsystems Machine",
@@ -234,7 +234,8 @@ static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
        {}
 };
-void __cpuinit check_enable_amd_mmconf_dmi(void)
+/* Called from a __cpuinit function, but only on the BSP. */
+void __ref check_enable_amd_mmconf_dmi(void)
 {
        dmi_check_system(mmconf_dmi_table);
 }
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
deleted file mode 100644
index 79ae68154e87..000000000000
--- a/arch/x86/kernel/mrst.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * mrst.c: Intel Moorestown platform specific setup code
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sfi.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <asm/setup.h>
-#include <asm/mpspec_def.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/io_apic.h>
-#include <asm/mrst.h>
-#include <asm/io.h>
-#include <asm/i8259.h>
-#include <asm/apb_timer.h>
-/*
- * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
- * cmdline option x86_mrst_timer can be used to override the configuration
- * to prefer one or the other.
- * at runtime, there are basically three timer configurations:
- * 1. per cpu apbt clock only
- * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
- * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
- *
- * by default (without cmdline option), platform code first detects cpu type
- * to see if we are on lincroft or penwell, then set up both lapic or apbt
- * clocks accordingly.
- * i.e. by default, medfield uses configuration #2, moorestown uses #1.
- * config #3 is supported but not recommended on medfield.
- *
- * rating and feature summary:
- * lapic (with C3STOP) --------- 100
- * apbt (always-on) ------------ 110
- * lapic (always-on,ARAT) ------ 150
- */
-__cpuinitdata enum mrst_timer_options mrst_timer_options;
-static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
-static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
-enum mrst_cpu_type __mrst_cpu_chip;
-EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
-int sfi_mtimer_num;
-struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
-EXPORT_SYMBOL_GPL(sfi_mrtc_array);
-int sfi_mrtc_num;
-static inline void assign_to_mp_irq(struct mpc_intsrc *m,
-                                    struct mpc_intsrc *mp_irq)
-{
-        memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
-                                struct mpc_intsrc *m)
-{
-        return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-static void save_mp_irq(struct mpc_intsrc *m)
-{
-        int i;
-        for (i = 0; i < mp_irq_entries; i++) {
-                if (!mp_irq_cmp(&mp_irqs[i], m))
-                        return;
-        }
-        assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
-        if (++mp_irq_entries == MAX_IRQ_SOURCES)
-                panic("Max # of irq sources exceeded!!\n");
-}
-/* parse all the mtimer info to a static mtimer array */
-static int __init sfi_parse_mtmr(struct sfi_table_header *table)
-{
-        struct sfi_table_simple *sb;
-        struct sfi_timer_table_entry *pentry;
-        struct mpc_intsrc mp_irq;
-        int totallen;
-        sb = (struct sfi_table_simple *)table;
-        if (!sfi_mtimer_num) {
-                sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
-                                        struct sfi_timer_table_entry);
-                pentry = (struct sfi_timer_table_entry *) sb->pentry;
-                totallen = sfi_mtimer_num * sizeof(*pentry);
-                memcpy(sfi_mtimer_array, pentry, totallen);
-        }
-        printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
-        pentry = sfi_mtimer_array;
-        for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-                printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
-                        " irq = %d\n", totallen, (u32)pentry->phys_addr,
-                        pentry->freq_hz, pentry->irq);
-                        if (!pentry->irq)
-                                continue;
-                        mp_irq.type = MP_IOAPIC;
-                        mp_irq.irqtype = mp_INT;
-/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
-                        mp_irq.irqflag = 5;
-                        mp_irq.srcbus = 0;
-                        mp_irq.srcbusirq = pentry->irq; /* IRQ */
-                        mp_irq.dstapic = MP_APIC_ALL;
-                        mp_irq.dstirq = pentry->irq;
-                        save_mp_irq(&mp_irq);
-        }
-        return 0;
-}
-struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
-{
-        int i;
-        if (hint < sfi_mtimer_num) {
-                if (!sfi_mtimer_usage[hint]) {
-                        pr_debug("hint taken for timer %d irq %d\n",\
-                                hint, sfi_mtimer_array[hint].irq);
-                        sfi_mtimer_usage[hint] = 1;
-                        return &sfi_mtimer_array[hint];
-                }
-        }
-        /* take the first timer available */
-        for (i = 0; i < sfi_mtimer_num;) {
-                if (!sfi_mtimer_usage[i]) {
-                        sfi_mtimer_usage[i] = 1;
-                        return &sfi_mtimer_array[i];
-                }
-                i++;
-        }
-        return NULL;
-}
-void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
-{
-        int i;
-        for (i = 0; i < sfi_mtimer_num;) {
-                if (mtmr->irq == sfi_mtimer_array[i].irq) {
-                        sfi_mtimer_usage[i] = 0;
-                        return;
-                }
-                i++;
-        }
-}
-/* parse all the mrtc info to a global mrtc array */
-int __init sfi_parse_mrtc(struct sfi_table_header *table)
-{
-        struct sfi_table_simple *sb;
-        struct sfi_rtc_table_entry *pentry;
-        struct mpc_intsrc mp_irq;
-        int totallen;
-        sb = (struct sfi_table_simple *)table;
-        if (!sfi_mrtc_num) {
-                sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
-                                                struct sfi_rtc_table_entry);
-                pentry = (struct sfi_rtc_table_entry *)sb->pentry;
-                totallen = sfi_mrtc_num * sizeof(*pentry);
-                memcpy(sfi_mrtc_array, pentry, totallen);
-        }
-        printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
-        pentry = sfi_mrtc_array;
-        for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-                printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
-                        totallen, (u32)pentry->phys_addr, pentry->irq);
-                mp_irq.type = MP_IOAPIC;
-                mp_irq.irqtype = mp_INT;
-                mp_irq.irqflag = 0;
-                mp_irq.srcbus = 0;
-                mp_irq.srcbusirq = pentry->irq; /* IRQ */
-                mp_irq.dstapic = MP_APIC_ALL;
-                mp_irq.dstirq = pentry->irq;
-                save_mp_irq(&mp_irq);
-        }
-        return 0;
-}
-static unsigned long __init mrst_calibrate_tsc(void)
-{
-        unsigned long flags, fast_calibrate;
-        local_irq_save(flags);
-        fast_calibrate = apbt_quick_calibrate();
-        local_irq_restore(flags);
-        if (fast_calibrate)
-                return fast_calibrate;
-        return 0;
-}
-void __init mrst_time_init(void)
-{
-        switch (mrst_timer_options) {
-        case MRST_TIMER_APBT_ONLY:
-                break;
-        case MRST_TIMER_LAPIC_APBT:
-                x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
-                x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
-                break;
-        default:
-                if (!boot_cpu_has(X86_FEATURE_ARAT))
-                        break;
-                x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
-                x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
-                return;
-        }
-        /* we need at least one APB timer */
-        sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
-        pre_init_apic_IRQ0();
-        apbt_time_init();
-}
-void __init mrst_rtc_init(void)
-{
-        sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-}
-void __cpuinit mrst_arch_setup(void)
-{
-        if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
-                __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
-        else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
-                __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
-        else {
-                pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
-                        boot_cpu_data.x86, boot_cpu_data.x86_model);
-                __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
-        }
-        pr_debug("Moorestown CPU %s identified\n",
-                (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
-                "Lincroft" : "Penwell");
-}
-/* MID systems don't have i8042 controller */
-static int mrst_i8042_detect(void)
-{
-        return 0;
-}
-/*
- * Moorestown specific x86_init function overrides and early setup
- * calls.
- */
-void __init x86_mrst_early_setup(void)
-{
-        x86_init.resources.probe_roms = x86_init_noop;
-        x86_init.resources.reserve_resources = x86_init_noop;
-        x86_init.timers.timer_init = mrst_time_init;
-        x86_init.timers.setup_percpu_clockev = x86_init_noop;
-        x86_init.irqs.pre_vector_init = x86_init_noop;
-        x86_init.oem.arch_setup = mrst_arch_setup;
-        x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
-        x86_platform.calibrate_tsc = mrst_calibrate_tsc;
-        x86_platform.i8042_detect = mrst_i8042_detect;
-        x86_init.pci.init = pci_mrst_init;
-        x86_init.pci.fixup_irqs = x86_init_noop;
-        legacy_pic = &null_legacy_pic;
-        /* Avoid searching for BIOS MP tables */
-        x86_init.mpparse.find_smp_config = x86_init_noop;
-        x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-}
-/*
- * if user does not want to use per CPU apb timer, just give it a lower rating
- * than local apic timer and skip the late per cpu timer init.
- */
-static inline int __init setup_x86_mrst_timer(char *arg)
-{
-        if (!arg)
-                return -EINVAL;
-        if (strcmp("apbt_only", arg) == 0)
-                mrst_timer_options = MRST_TIMER_APBT_ONLY;
-        else if (strcmp("lapic_and_apbt", arg) == 0)
-                mrst_timer_options = MRST_TIMER_LAPIC_APBT;
-        else {
-                pr_warning("X86 MRST timer option %s not recognised"
-                           " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
-                           arg);
-                return -EINVAL;
-        }
-        return 0;
-}
-__setup("x86_mrst_timer=", setup_x86_mrst_timer);
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c
deleted file mode 100644
index f5442c03abc3..000000000000
--- a/arch/x86/kernel/olpc-xo1.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Support for features of the OLPC XO-1 laptop
- *
- * Copyright (C) 2010 One Laptop per Child
- * Copyright (C) 2006 Red Hat, Inc.
- * Copyright (C) 2006 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/platform_device.h>
-#include <linux/pm.h>
-#include <asm/io.h>
-#include <asm/olpc.h>
-#define DRV_NAME "olpc-xo1"
-#define PMS_BAR         4
-#define ACPI_BAR        5
-/* PMC registers (PMS block) */
-#define PM_SCLK         0x10
-#define PM_IN_SLPCTL    0x20
-#define PM_WKXD         0x34
-#define PM_WKD          0x30
-#define PM_SSC          0x54
-/* PM registers (ACPI block) */
-#define PM1_CNT         0x08
-#define PM_GPE0_STS     0x18
-static unsigned long acpi_base;
-static unsigned long pms_base;
-static void xo1_power_off(void)
-{
-        printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
-        /* Enable all of these controls with 0 delay */
-        outl(0x40000000, pms_base + PM_SCLK);
-        outl(0x40000000, pms_base + PM_IN_SLPCTL);
-        outl(0x40000000, pms_base + PM_WKXD);
-        outl(0x40000000, pms_base + PM_WKD);
-        /* Clear status bits (possibly unnecessary) */
-        outl(0x0002ffff, pms_base  + PM_SSC);
-        outl(0xffffffff, acpi_base + PM_GPE0_STS);
-        /* Write SLP_EN bit to start the machinery */
-        outl(0x00002000, acpi_base + PM1_CNT);
-}
-/* Read the base addresses from the PCI BAR info */
-static int __devinit setup_bases(struct pci_dev *pdev)
-{
-        int r;
-        r = pci_enable_device_io(pdev);
-        if (r) {
-                dev_err(&pdev->dev, "can't enable device IO\n");
-                return r;
-        }
-        r = pci_request_region(pdev, ACPI_BAR, DRV_NAME);
-        if (r) {
-                dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR);
-                return r;
-        }
-        r = pci_request_region(pdev, PMS_BAR, DRV_NAME);
-        if (r) {
-                dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR);
-                pci_release_region(pdev, ACPI_BAR);
-                return r;
-        }
-        acpi_base = pci_resource_start(pdev, ACPI_BAR);
-        pms_base = pci_resource_start(pdev, PMS_BAR);
-        return 0;
-}
-static int __devinit olpc_xo1_probe(struct platform_device *pdev)
-{
-        struct pci_dev *pcidev;
-        int r;
-        pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
-                                NULL);
-        if (!pdev)
-                return -ENODEV;
-        r = setup_bases(pcidev);
-        if (r)
-                return r;
-        pm_power_off = xo1_power_off;
-        printk(KERN_INFO "OLPC XO-1 support registered\n");
-        return 0;
-}
-static int __devexit olpc_xo1_remove(struct platform_device *pdev)
-{
-        pm_power_off = NULL;
-        return 0;
-}
-static struct platform_driver olpc_xo1_driver = {
-        .driver = {
-                .name = DRV_NAME,
-                .owner = THIS_MODULE,
-        },
-        .probe = olpc_xo1_probe,
-        .remove = __devexit_p(olpc_xo1_remove),
-};
-static int __init olpc_xo1_init(void)
-{
-        return platform_driver_register(&olpc_xo1_driver);
-}
-static void __exit olpc_xo1_exit(void)
-{
-        platform_driver_unregister(&olpc_xo1_driver);
-}
-MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:olpc-xo1");
-module_init(olpc_xo1_init);
-module_exit(olpc_xo1_exit);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
deleted file mode 100644
index edaf3fe8dc5e..000000000000
--- a/arch/x86/kernel/olpc.c
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Support for the OLPC DCON and OLPC EC access
- *
- * Copyright © 2006  Advanced Micro Devices, Inc.
- * Copyright © 2007-2008  Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/string.h>
-#include <linux/platform_device.h>
-#include <asm/geode.h>
-#include <asm/setup.h>
-#include <asm/olpc.h>
-#include <asm/olpc_ofw.h>
-struct olpc_platform_t olpc_platform_info;
-EXPORT_SYMBOL_GPL(olpc_platform_info);
-static DEFINE_SPINLOCK(ec_lock);
-/* what the timeout *should* be (in ms) */
-#define EC_BASE_TIMEOUT 20
-/* the timeout that bugs in the EC might force us to actually use */
-static int ec_timeout = EC_BASE_TIMEOUT;
-static int __init olpc_ec_timeout_set(char *str)
-{
-        if (get_option(&str, &ec_timeout) != 1) {
-                ec_timeout = EC_BASE_TIMEOUT;
-                printk(KERN_ERR "olpc-ec:  invalid argument to "
-                                "'olpc_ec_timeout=', ignoring!\n");
-        }
-        printk(KERN_DEBUG "olpc-ec:  using %d ms delay for EC commands.\n",
-                        ec_timeout);
-        return 1;
-}
-__setup("olpc_ec_timeout=", olpc_ec_timeout_set);
-/*
- * These {i,o}bf_status functions return whether the buffers are full or not.
- */
-static inline unsigned int ibf_status(unsigned int port)
-{
-        return !!(inb(port) & 0x02);
-}
-static inline unsigned int obf_status(unsigned int port)
-{
-        return inb(port) & 0x01;
-}
-#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d))
-static int __wait_on_ibf(unsigned int line, unsigned int port, int desired)
-{
-        unsigned int timeo;
-        int state = ibf_status(port);
-        for (timeo = ec_timeout; state != desired && timeo; timeo--) {
-                mdelay(1);
-                state = ibf_status(port);
-        }
-        if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
-                        timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
-                printk(KERN_WARNING "olpc-ec:  %d: waited %u ms for IBF!\n",
-                                line, ec_timeout - timeo);
-        }
-        return !(state == desired);
-}
-#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d))
-static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
-{
-        unsigned int timeo;
-        int state = obf_status(port);
-        for (timeo = ec_timeout; state != desired && timeo; timeo--) {
-                mdelay(1);
-                state = obf_status(port);
-        }
-        if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
-                        timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
-                printk(KERN_WARNING "olpc-ec:  %d: waited %u ms for OBF!\n",
-                                line, ec_timeout - timeo);
-        }
-        return !(state == desired);
-}
-/*
- * This allows the kernel to run Embedded Controller commands.  The EC is
- * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the
- * available EC commands are here:
- * <http://wiki.laptop.org/go/Ec_specification>.  Unfortunately, while
- * OpenFirmware's source is available, the EC's is not.
- */
-int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
-                unsigned char *outbuf,  size_t outlen)
-{
-        unsigned long flags;
-        int ret = -EIO;
-        int i;
-        int restarts = 0;
-        spin_lock_irqsave(&ec_lock, flags);
-        /* Clear OBF */
-        for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
-                inb(0x68);
-        if (i == 10) {
-                printk(KERN_ERR "olpc-ec:  timeout while attempting to "
-                                "clear OBF flag!\n");
-                goto err;
-        }
-        if (wait_on_ibf(0x6c, 0)) {
-                printk(KERN_ERR "olpc-ec:  timeout waiting for EC to "
-                                "quiesce!\n");
-                goto err;
-        }
-restart:
-        /*
-         * Note that if we time out during any IBF checks, that's a failure;
-         * we have to return.  There's no way for the kernel to clear that.
-         *
-         * If we time out during an OBF check, we can restart the command;
-         * reissuing it will clear the OBF flag, and we should be alright.
-         * The OBF flag will sometimes misbehave due to what we believe
-         * is a hardware quirk..
-         */
-        pr_devel("olpc-ec:  running cmd 0x%x\n", cmd);
-        outb(cmd, 0x6c);
-        if (wait_on_ibf(0x6c, 0)) {
-                printk(KERN_ERR "olpc-ec:  timeout waiting for EC to read "
-                                "command!\n");
-                goto err;
-        }
-        if (inbuf && inlen) {
-                /* write data to EC */
-                for (i = 0; i < inlen; i++) {
-                        if (wait_on_ibf(0x6c, 0)) {
-                                printk(KERN_ERR "olpc-ec:  timeout waiting for"
-                                                " EC accept data!\n");
-                                goto err;
-                        }
-                        pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-                        outb(inbuf[i], 0x68);
-                }
-        }
-        if (outbuf && outlen) {
-                /* read data from EC */
-                for (i = 0; i < outlen; i++) {
-                        if (wait_on_obf(0x6c, 1)) {
-                                printk(KERN_ERR "olpc-ec:  timeout waiting for"
-                                                " EC to provide data!\n");
-                                if (restarts++ < 10)
-                                        goto restart;
-                                goto err;
-                        }
-                        outbuf[i] = inb(0x68);
-                        pr_devel("olpc-ec:  received 0x%x\n", outbuf[i]);
-                }
-        }
-        ret = 0;
-err:
-        spin_unlock_irqrestore(&ec_lock, flags);
-        return ret;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_cmd);
-static bool __init check_ofw_architecture(void)
-{
-        size_t propsize;
-        char olpc_arch[5];
-        const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
-        void *res[] = { &propsize };
-        if (olpc_ofw("getprop", args, res)) {
-                printk(KERN_ERR "ofw: getprop call failed!\n");
-                return false;
-        }
-        return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
-}
-static u32 __init get_board_revision(void)
-{
-        size_t propsize;
-        __be32 rev;
-        const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
-        void *res[] = { &propsize };
-        if (olpc_ofw("getprop", args, res) || propsize != 4) {
-                printk(KERN_ERR "ofw: getprop call failed!\n");
-                return cpu_to_be32(0);
-        }
-        return be32_to_cpu(rev);
-}
-static bool __init platform_detect(void)
-{
-        if (!check_ofw_architecture())
-                return false;
-        olpc_platform_info.flags |= OLPC_F_PRESENT;
-        olpc_platform_info.boardrev = get_board_revision();
-        return true;
-}
-static int __init add_xo1_platform_devices(void)
-{
-        struct platform_device *pdev;
-        pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
-        if (IS_ERR(pdev))
-                return PTR_ERR(pdev);
-        pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
-        if (IS_ERR(pdev))
-                return PTR_ERR(pdev);
-        return 0;
-}
-static int __init olpc_init(void)
-{
-        int r = 0;
-        if (!olpc_ofw_present() || !platform_detect())
-                return 0;
-        spin_lock_init(&ec_lock);
-        /* assume B1 and above models always have a DCON */
-        if (olpc_board_at_least(olpc_board(0xb1)))
-                olpc_platform_info.flags |= OLPC_F_DCON;
-        /* get the EC revision */
-        olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
-                        (unsigned char *) &olpc_platform_info.ecver, 1);
-#ifdef CONFIG_PCI_OLPC
-        /* If the VSA exists let it emulate PCI, if not emulate in kernel.
-         * XO-1 only. */
-        if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
-                        !cs5535_has_vsa2())
-                x86_init.pci.arch_init = pci_olpc_init;
-#endif
-        printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
-                        ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
-                        olpc_platform_info.boardrev >> 4,
-                        olpc_platform_info.ecver);
-        if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
-                r = add_xo1_platform_devices();
-                if (r)
-                        return r;
-        }
-        return 0;
-}
-postcore_initcall(olpc_init);
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
deleted file mode 100644
index 787320464379..000000000000
--- a/arch/x86/kernel/olpc_ofw.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/olpc_ofw.h>
-/* address of OFW callback interface; will be NULL if OFW isn't found */
-static int (*olpc_ofw_cif)(int *);
-/* page dir entry containing OFW's pgdir table; filled in by head_32.S */
-u32 olpc_ofw_pgd __initdata;
-static DEFINE_SPINLOCK(ofw_lock);
-#define MAXARGS 10
-void __init setup_olpc_ofw_pgd(void)
-{
-        pgd_t *base, *ofw_pde;
-        if (!olpc_ofw_cif)
-                return;
-        /* fetch OFW's PDE */
-        base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
-        if (!base) {
-                printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n");
-                olpc_ofw_cif = NULL;
-                return;
-        }
-        ofw_pde = &base[OLPC_OFW_PDE_NR];
-        /* install OFW's PDE permanently into the kernel's pgtable */
-        set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde);
-        /* implicit optimization barrier here due to uninline function return */
-        early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
-}
-int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
-                void **res)
-{
-        int ofw_args[MAXARGS + 3];
-        unsigned long flags;
-        int ret, i, *p;
-        BUG_ON(nr_args + nr_res > MAXARGS);
-        if (!olpc_ofw_cif)
-                return -EIO;
-        ofw_args[0] = (int)name;
-        ofw_args[1] = nr_args;
-        ofw_args[2] = nr_res;
-        p = &ofw_args[3];
-        for (i = 0; i < nr_args; i++, p++)
-                *p = (int)args[i];
-        /* call into ofw */
-        spin_lock_irqsave(&ofw_lock, flags);
-        ret = olpc_ofw_cif(ofw_args);
-        spin_unlock_irqrestore(&ofw_lock, flags);
-        if (!ret) {
-                for (i = 0; i < nr_res; i++, p++)
-                        *((int *)res[i]) = *p;
-        }
-        return ret;
-}
-EXPORT_SYMBOL_GPL(__olpc_ofw);
-bool olpc_ofw_present(void)
-{
-        return olpc_ofw_cif != NULL;
-}
-EXPORT_SYMBOL_GPL(olpc_ofw_present);
-/* OFW cif _should_ be above this address */
-#define OFW_MIN 0xff000000
-/* OFW starts on a 1MB boundary */
-#define OFW_BOUND (1<<20)
-void __init olpc_ofw_detect(void)
-{
-        struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header;
-        unsigned long start;
-        /* ensure OFW booted us by checking for "OFW " string */
-        if (hdr->ofw_magic != OLPC_OFW_SIG)
-                return;
-        olpc_ofw_cif = (int (*)(int *))hdr->cif_handler;
-        if ((unsigned long)olpc_ofw_cif < OFW_MIN) {
-                printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n",
-                                (unsigned long)olpc_ofw_cif);
-                olpc_ofw_cif = NULL;
-                return;
-        }
-        /* determine where OFW starts in memory */
-        start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND);
-        printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n",
-                        (unsigned long)olpc_ofw_cif, (-start) >> 20);
-        reserve_top_address(-start);
-}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
        if (need_flush) {
-                k8_flush_garts();
+                amd_flush_garts();
                need_flush = false;
        }
        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
 {
        int i;
-        if (!k8_northbridges.gart_supported)
+        if (!amd_nb_has_feature(AMD_NB_GART))
                return;
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
-                struct pci_dev *dev = k8_northbridges.nb_misc[i];
+                struct pci_dev *dev = node_to_amd_nb(i)->misc;
                enable_gart_translation(dev, __pa(agp_gatt_table));
        }
        /* Flush the GART-TLB to remove stale entries */
-        k8_flush_garts();
+        amd_flush_garts();
 }
 /*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
        if (!fix_up_north_bridges)
                return;
-        if (!k8_northbridges.gart_supported)
+        if (!amd_nb_has_feature(AMD_NB_GART))
                return;
        pr_info("PCI-DMA: Restoring GART aperture settings\n");
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
-                struct pci_dev *dev = k8_northbridges.nb_misc[i];
+                struct pci_dev *dev = node_to_amd_nb(i)->misc;
                /*
                 * Don't enable translations just yet.  That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
 * Private Northbridge GATT initialization in case we cannot use the
 * AGP driver for some reason.
 */
-static __init int init_k8_gatt(struct agp_kern_info *info)
+static __init int init_amd_gatt(struct agp_kern_info *info)
 {
        unsigned aper_size, gatt_size, new_aper_size;
        unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
        aper_size = aper_base = info->aper_size = 0;
        dev = NULL;
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
-                dev = k8_northbridges.nb_misc[i];
+                dev = node_to_amd_nb(i)->misc;
                new_aper_base = read_aperture(dev, &new_aper_size);
                if (!new_aper_base)
                        goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
        if (!no_agp)
                return;
-        if (!k8_northbridges.gart_supported)
+        if (!amd_nb_has_feature(AMD_NB_GART))
                return;
-        for (i = 0; i < k8_northbridges.num; i++) {
+        for (i = 0; i < amd_nb_num(); i++) {
                u32 ctl;
-                dev = k8_northbridges.nb_misc[i];
+                dev = node_to_amd_nb(i)->misc;
                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
                ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
        unsigned long scratch;
        long i;
-        if (!k8_northbridges.gart_supported)
+        if (!amd_nb_has_feature(AMD_NB_GART))
                return 0;
 #ifndef CONFIG_AGP_AMD64
        no_agp = 1;
 #else
        /* Makefile puts PCI initialization via subsys_initcall first. */
-        /* Add other K8 AGP bridge drivers here */
+        /* Add other AMD AGP bridge drivers here */
        no_agp = no_agp ||
                (agp_amd64_init() < 0) ||
                (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
        if (no_iommu ||
            (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
            !gart_iommu_aperture ||
-            (no_agp && init_k8_gatt(&info) < 0)) {
+            (no_agp && init_amd_gatt(&info) < 0)) {
                if (max_pfn > MAX_DMA32_PFN) {
                        pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
                        pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 70c4872cd8aa..45892dc4b72a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child)
 static const struct user_regset_view user_x86_32_view; /* Initialized below. */
 #endif
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+                 unsigned long addr, unsigned long data)
 {
        int ret;
        unsigned long __user *datap = (unsigned long __user *)data;
@@ -812,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                unsigned long tmp;
                ret = -EIO;
-                if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+                if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
-                    addr >= sizeof(struct user))
                        break;
                tmp = 0;  /* Default return condition */
@@ -830,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
                ret = -EIO;
-                if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+                if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
-                    addr >= sizeof(struct user))
                        break;
                if (addr < sizeof(struct user_regs_struct))
@@ -888,17 +887,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
        case PTRACE_GET_THREAD_AREA:
-                if (addr < 0)
+                if ((int) addr < 0)
                        return -EIO;
                ret = do_get_thread_area(child, addr,
-                                         (struct user_desc __user *) data);
+                                        (struct user_desc __user *)data);
                break;
        case PTRACE_SET_THREAD_AREA:
-                if (addr < 0)
+                if ((int) addr < 0)
                        return -EIO;
                ret = do_set_thread_area(child, addr,
-                                         (struct user_desc __user *) data, 0);
+                                        (struct user_desc __user *)data, 0);
                break;
 #endif
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 239427ca02af..008b91eefa18 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -41,48 +41,11 @@ void pvclock_set_flags(u8 flags)
        valid_flags = flags;
 }
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-        u64 product;
-#ifdef __i386__
-        u32 tmp1, tmp2;
-#endif
-        if (shift < 0)
-                delta >>= -shift;
-        else
-                delta <<= shift;
-#ifdef __i386__
-        __asm__ (
-                "mul  %5       ; "
-                "mov  %4,%%eax ; "
-                "mov  %%edx,%4 ; "
-                "mul  %5       ; "
-                "xor  %5,%5    ; "
-                "add  %4,%%eax ; "
-                "adc  %5,%%edx ; "
-                : "=A" (product), "=r" (tmp1), "=r" (tmp2)
-                : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif defined(__x86_64__)
-        __asm__ (
-                "mul %%rdx ; shrd $32,%%rdx,%%rax"
-                : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-        return product;
-}
 static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
 {
        u64 delta = native_read_tsc() - shadow->tsc_timestamp;
-        return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+        return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
+                                   shadow->tsc_shift);
 }
 /*
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 939b9e98245f..8bbe8c56916d 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -344,6 +344,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
                         vt8237_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
                         vt8237_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_CX700,
+                         vt8237_force_enable_hpet);
 static void ati_force_hpet_resume(void)
 {
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7a4cf14223ba..c495aa8d4815 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -371,16 +371,10 @@ void machine_real_restart(const unsigned char *code, int length)
        CMOS_WRITE(0x00, 0x8f);
        spin_unlock(&rtc_lock);
-        /* Remap the kernel at virtual address zero, as well as offset zero
-           from the kernel segment.  This assumes the kernel segment starts at
-           virtual address PAGE_OFFSET. */
-        memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-                sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
        /*
-         * Use `swapper_pg_dir' as our page directory.
+         * Switch back to the initial page table.
         */
-        load_cr3(swapper_pg_dir);
+        load_cr3(initial_page_table);
        /* Write 0x1234 to absolute memory location 0x472.  The BIOS reads
           this on booting to tell it to "Bypass memory test (also warm
@@ -641,7 +635,7 @@ void native_machine_shutdown(void)
        /* O.K Now that I'm on the appropriate processor,
         * stop all of the others.
         */
-        smp_send_stop();
+        stop_other_cpus();
 #endif
        lapic_shutdown();
diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c
deleted file mode 100644
index 7e004acbe526..000000000000
--- a/arch/x86/kernel/scx200_32.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
- *
- *  National Semiconductor SCx200 support.
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/scx200.h>
-#include <linux/scx200_gpio.h>
-/* Verify that the configuration block really is there */
-#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
-#define NAME "scx200"
-MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
-MODULE_DESCRIPTION("NatSemi SCx200 Driver");
-MODULE_LICENSE("GPL");
-unsigned scx200_gpio_base = 0;
-unsigned long scx200_gpio_shadow[2];
-unsigned scx200_cb_base = 0;
-static struct pci_device_id scx200_tbl[] = {
-        { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
-        { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
-        { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS)   },
-        { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS)   },
-        { },
-};
-MODULE_DEVICE_TABLE(pci,scx200_tbl);
-static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
-static struct pci_driver scx200_pci_driver = {
-        .name = "scx200",
-        .id_table = scx200_tbl,
-        .probe = scx200_probe,
-};
-static DEFINE_MUTEX(scx200_gpio_config_lock);
-static void __devinit scx200_init_shadow(void)
-{
-        int bank;
-        /* read the current values driven on the GPIO signals */
-        for (bank = 0; bank < 2; ++bank)
-                scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
-}
-static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-        unsigned base;
-        if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
-            pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
-                base = pci_resource_start(pdev, 0);
-                printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
-                if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
-                        printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
-                        return -EBUSY;
-                }
-                scx200_gpio_base = base;
-                scx200_init_shadow();
-        } else {
-                /* find the base of the Configuration Block */
-                if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
-                        scx200_cb_base = SCx200_CB_BASE_FIXED;
-                } else {
-                        pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
-                        if (scx200_cb_probe(base)) {
-                                scx200_cb_base = base;
-                        } else {
-                                printk(KERN_WARNING NAME ": Configuration Block not found\n");
-                                return -ENODEV;
-                        }
-                }
-                printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
-        }
-        return 0;
-}
-u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
-{
-        u32 config, new_config;
-        mutex_lock(&scx200_gpio_config_lock);
-        outl(index, scx200_gpio_base + 0x20);
-        config = inl(scx200_gpio_base + 0x24);
-        new_config = (config & mask) | bits;
-        outl(new_config, scx200_gpio_base + 0x24);
-        mutex_unlock(&scx200_gpio_config_lock);
-        return config;
-}
-static int __init scx200_init(void)
-{
-        printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
-        return pci_register_driver(&scx200_pci_driver);
-}
-static void __exit scx200_cleanup(void)
-{
-        pci_unregister_driver(&scx200_pci_driver);
-        release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
-}
-module_init(scx200_init);
-module_exit(scx200_cleanup);
-EXPORT_SYMBOL(scx200_gpio_base);
-EXPORT_SYMBOL(scx200_gpio_shadow);
-EXPORT_SYMBOL(scx200_gpio_configure);
-EXPORT_SYMBOL(scx200_cb_base);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b8982e0fc0c2..0afb8c7e3803 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -694,12 +694,23 @@ static u64 __init get_max_mapped(void)
 void __init setup_arch(char **cmdline_p)
 {
        int acpi = 0;
-        int k8 = 0;
+        int amd = 0;
        unsigned long flags;
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
        visws_early_detect();
+        /*
+         * copy kernel address range established so far and switch
+         * to the proper swapper page table
+         */
+        clone_pgd_range(swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                        initial_page_table + KERNEL_PGD_BOUNDARY,
+                        KERNEL_PGD_PTRS);
+        load_cr3(swapper_pg_dir);
+        __flush_tlb_all();
 #else
        printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
@@ -758,6 +769,8 @@ void __init setup_arch(char **cmdline_p)
        x86_init.oem.arch_setup();
+        resource_alloc_from_bottom = 0;
+        iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
        setup_memory_map();
        parse_setup_data();
        /* update the e820_saved too */
@@ -968,12 +981,12 @@ void __init setup_arch(char **cmdline_p)
        acpi = acpi_numa_init();
 #endif
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
        if (!acpi)
-                k8 = !k8_numa_init(0, max_pfn);
+                amd = !amd_numa_init(0, max_pfn);
 #endif
-        initmem_init(0, max_pfn, acpi, k8);
+        initmem_init(0, max_pfn, acpi, amd);
        memblock_find_dma_reserve();
        dma32_reserve_bootmem();
@@ -985,7 +998,12 @@ void __init setup_arch(char **cmdline_p)
        paging_init();
        x86_init.paging.pagetable_setup_done(swapper_pg_dir);
-        setup_trampoline_page_table();
+#ifdef CONFIG_X86_32
+        /* sync back kernel address range */
+        clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+                        swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                        KERNEL_PGD_PTRS);
+#endif
        tboot_probe();
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
deleted file mode 100644
index dd4c281ffe57..000000000000
--- a/arch/x86/kernel/sfi.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * sfi.c - x86 architecture SFI support.
- *
- * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-#define KMSG_COMPONENT "SFI"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/io.h>
-#include <asm/io_apic.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-static void __init mp_sfi_register_lapic_address(unsigned long address)
-{
-        mp_lapic_addr = address;
-        set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-        if (boot_cpu_physical_apicid == -1U)
-                boot_cpu_physical_apicid = read_apic_id();
-        pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
-}
-/* All CPUs enumerated by SFI must be present and enabled */
-static void __cpuinit mp_sfi_register_lapic(u8 id)
-{
-        if (MAX_APICS - id <= 0) {
-                pr_warning("Processor #%d invalid (max %d)\n",
-                        id, MAX_APICS);
-                return;
-        }
-        pr_info("registering lapic[%d]\n", id);
-        generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
-}
-static int __init sfi_parse_cpus(struct sfi_table_header *table)
-{
-        struct sfi_table_simple *sb;
-        struct sfi_cpu_table_entry *pentry;
-        int i;
-        int cpu_num;
-        sb = (struct sfi_table_simple *)table;
-        cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
-        pentry = (struct sfi_cpu_table_entry *)sb->pentry;
-        for (i = 0; i < cpu_num; i++) {
-                mp_sfi_register_lapic(pentry->apic_id);
-                pentry++;
-        }
-        smp_found_config = 1;
-        return 0;
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_IO_APIC
-static int __init sfi_parse_ioapic(struct sfi_table_header *table)
-{
-        struct sfi_table_simple *sb;
-        struct sfi_apic_table_entry *pentry;
-        int i, num;
-        sb = (struct sfi_table_simple *)table;
-        num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
-        pentry = (struct sfi_apic_table_entry *)sb->pentry;
-        for (i = 0; i < num; i++) {
-                mp_register_ioapic(i, pentry->phys_addr, gsi_top);
-                pentry++;
-        }
-        WARN(pic_mode, KERN_WARNING
-                "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
-        pic_mode = 0;
-        return 0;
-}
-#endif /* CONFIG_X86_IO_APIC */
-/*
- * sfi_platform_init(): register lapics & io-apics
- */
-int __init sfi_platform_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-        mp_sfi_register_lapic_address(sfi_lapic_addr);
-        sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
-#endif
-#ifdef CONFIG_X86_IO_APIC
-        sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
-#endif
-        return 0;
-}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..513deac7228d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
        irq_exit();
 }
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
 {
        unsigned long flags;
-        unsigned long wait;
+        unsigned long timeout;
        if (reboot_force)
                return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
        if (num_online_cpus() > 1) {
                apic->send_IPI_allbutself(REBOOT_VECTOR);
-                /* Don't wait longer than a second */
+                /*
-                wait = USEC_PER_SEC;
+                 * Don't wait longer than a second if the caller
-                while (num_online_cpus() > 1 && wait--)
+                 * didn't ask us to wait.
+                 */
+                timeout = USEC_PER_SEC;
+                while (num_online_cpus() > 1 && (wait || timeout--))
                        udelay(1);
        }
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
        .smp_prepare_cpus       = native_smp_prepare_cpus,
        .smp_cpus_done          = native_smp_cpus_done,
-        .smp_send_stop          = native_smp_send_stop,
+        .stop_other_cpus        = native_stop_other_cpus,
        .smp_send_reschedule    = native_smp_send_reschedule,
        .cpu_up                 = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index dfb50890b5b7..083e99d1b7df 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -299,22 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused)
         * fragile that we want to limit the things done here to the
         * most necessary things.
         */
+        cpu_init();
+        preempt_disable();
+        smp_callin();
 #ifdef CONFIG_X86_32
-        /*
+        /* switch away from the initial page table */
-         * Switch away from the trampoline page-table
-         *
-         * Do this before cpu_init() because it needs to access per-cpu
-         * data which may not be mapped in the trampoline page-table.
-         */
        load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 #endif
-        cpu_init();
-        preempt_disable();
-        smp_callin();
        /* otherwise gcc will move up smp_processor_id before the cpu_init */
        barrier();
        /*
@@ -753,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
                .done   = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
        };
-        INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+        INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
        alternatives_smp_switch(1);
@@ -785,7 +779,6 @@ do_rest:
 #ifdef CONFIG_X86_32
        /* Stack for startup_32 can be just as for start_secondary onwards */
        irq_ctx_init(cpu);
-        initial_page_table = __pa(&trampoline_pg_dir);
 #else
        clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
@@ -934,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
        per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
        err = do_boot_cpu(apicid, cpu);
        if (err) {
                pr_debug("do_boot_cpu failed %d\n", err);
                return -EIO;
@@ -1381,7 +1373,6 @@ void play_dead_common(void)
 {
        idle_task_exit();
        reset_lazy_tlbstate();
-        irq_ctx_exit(raw_smp_processor_id());
        c1e_remove_cpu(raw_smp_processor_id());
        mb();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
deleted file mode 100644
index 312ef0292815..000000000000
--- a/arch/x86/kernel/tlb_uv.c
+++ /dev/null
@@ -1,1655 +0,0 @@
-/*
- *      SGI UltraViolet TLB flush routines.
- *
- *      (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI.
- *
- *      This code is released under the GNU General Public License version 2 or
- *      later.
- */
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <asm/mmu_context.h>
-#include <asm/uv/uv.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
-#include <asm/apic.h>
-#include <asm/idle.h>
-#include <asm/tsc.h>
-#include <asm/irq_vectors.h>
-#include <asm/timer.h>
-/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
-static int timeout_base_ns[] = {
-                20,
-                160,
-                1280,
-                10240,
-                81920,
-                655360,
-                5242880,
-                167772160
-};
-static int timeout_us;
-static int nobau;
-static int baudisabled;
-static spinlock_t disable_lock;
-static cycles_t congested_cycles;
-/* tunables: */
-static int max_bau_concurrent = MAX_BAU_CONCURRENT;
-static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT;
-static int plugged_delay = PLUGGED_DELAY;
-static int plugsb4reset = PLUGSB4RESET;
-static int timeoutsb4reset = TIMEOUTSB4RESET;
-static int ipi_reset_limit = IPI_RESET_LIMIT;
-static int complete_threshold = COMPLETE_THRESHOLD;
-static int congested_response_us = CONGESTED_RESPONSE_US;
-static int congested_reps = CONGESTED_REPS;
-static int congested_period = CONGESTED_PERIOD;
-static struct dentry *tunables_dir;
-static struct dentry *tunables_file;
-static int __init setup_nobau(char *arg)
-{
-        nobau = 1;
-        return 0;
-}
-early_param("nobau", setup_nobau);
-/* base pnode in this partition */
-static int uv_partition_base_pnode __read_mostly;
-/* position of pnode (which is nasid>>1): */
-static int uv_nshift __read_mostly;
-static unsigned long uv_mmask __read_mostly;
-static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-static DEFINE_PER_CPU(struct bau_control, bau_control);
-static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
-/*
- * Determine the first node on a uvhub. 'Nodes' are used for kernel
- * memory allocation.
- */
-static int __init uvhub_to_first_node(int uvhub)
-{
-        int node, b;
-        for_each_online_node(node) {
-                b = uv_node_to_blade_id(node);
-                if (uvhub == b)
-                        return node;
-        }
-        return -1;
-}
-/*
- * Determine the apicid of the first cpu on a uvhub.
- */
-static int __init uvhub_to_first_apicid(int uvhub)
-{
-        int cpu;
-        for_each_present_cpu(cpu)
-                if (uvhub == uv_cpu_to_blade_id(cpu))
-                        return per_cpu(x86_cpu_to_apicid, cpu);
-        return -1;
-}
-/*
- * Free a software acknowledge hardware resource by clearing its Pending
- * bit. This will return a reply to the sender.
- * If the message has timed out, a reply has already been sent by the
- * hardware but the resource has not been released. In that case our
- * clear of the Timeout bit (as well) will free the resource. No reply will
- * be sent (the hardware will only do one reply per message).
- */
-static inline void uv_reply_to_message(struct msg_desc *mdp,
-                                       struct bau_control *bcp)
-{
-        unsigned long dw;
-        struct bau_payload_queue_entry *msg;
-        msg = mdp->msg;
-        if (!msg->canceled) {
-                dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) |
-                                                msg->sw_ack_vector;
-                uv_write_local_mmr(
-                                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
-        }
-        msg->replied_to = 1;
-        msg->sw_ack_vector = 0;
-}
-/*
- * Process the receipt of a RETRY message
- */
-static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
-                                            struct bau_control *bcp)
-{
-        int i;
-        int cancel_count = 0;
-        int slot2;
-        unsigned long msg_res;
-        unsigned long mmr = 0;
-        struct bau_payload_queue_entry *msg;
-        struct bau_payload_queue_entry *msg2;
-        struct ptc_stats *stat;
-        msg = mdp->msg;
-        stat = bcp->statp;
-        stat->d_retries++;
-        /*
-         * cancel any message from msg+1 to the retry itself
-         */
-        for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
-                if (msg2 > mdp->va_queue_last)
-                        msg2 = mdp->va_queue_first;
-                if (msg2 == msg)
-                        break;
-                /* same conditions for cancellation as uv_do_reset */
-                if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
-                    (msg2->sw_ack_vector) && ((msg2->sw_ack_vector &
-                        msg->sw_ack_vector) == 0) &&
-                    (msg2->sending_cpu == msg->sending_cpu) &&
-                    (msg2->msg_type != MSG_NOOP)) {
-                        slot2 = msg2 - mdp->va_queue_first;
-                        mmr = uv_read_local_mmr
-                                (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-                        msg_res = msg2->sw_ack_vector;
-                        /*
-                         * This is a message retry; clear the resources held
-                         * by the previous message only if they timed out.
-                         * If it has not timed out we have an unexpected
-                         * situation to report.
-                         */
-                        if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
-                                /*
-                                 * is the resource timed out?
-                                 * make everyone ignore the cancelled message.
-                                 */
-                                msg2->canceled = 1;
-                                stat->d_canceled++;
-                                cancel_count++;
-                                uv_write_local_mmr(
-                                    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
-                                        (msg_res << UV_SW_ACK_NPENDING) |
-                                         msg_res);
-                        }
-                }
-        }
-        if (!cancel_count)
-                stat->d_nocanceled++;
-}
-/*
- * Do all the things a cpu should do for a TLB shootdown message.
- * Other cpu's may come here at the same time for this message.
- */
-static void uv_bau_process_message(struct msg_desc *mdp,
-                                   struct bau_control *bcp)
-{
-        int msg_ack_count;
-        short socket_ack_count = 0;
-        struct ptc_stats *stat;
-        struct bau_payload_queue_entry *msg;
-        struct bau_control *smaster = bcp->socket_master;
-        /*
-         * This must be a normal message, or retry of a normal message
-         */
-        msg = mdp->msg;
-        stat = bcp->statp;
-        if (msg->address == TLB_FLUSH_ALL) {
-                local_flush_tlb();
-                stat->d_alltlb++;
-        } else {
-                __flush_tlb_one(msg->address);
-                stat->d_onetlb++;
-        }
-        stat->d_requestee++;
-        /*
-         * One cpu on each uvhub has the additional job on a RETRY
-         * of releasing the resource held by the message that is
-         * being retried.  That message is identified by sending
-         * cpu number.
-         */
-        if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
-                uv_bau_process_retry_msg(mdp, bcp);
-        /*
-         * This is a sw_ack message, so we have to reply to it.
-         * Count each responding cpu on the socket. This avoids
-         * pinging the count's cache line back and forth between
-         * the sockets.
-         */
-        socket_ack_count = atomic_add_short_return(1, (struct atomic_short *)
-                        &smaster->socket_acknowledge_count[mdp->msg_slot]);
-        if (socket_ack_count == bcp->cpus_in_socket) {
-                /*
-                 * Both sockets dump their completed count total into
-                 * the message's count.
-                 */
-                smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
-                msg_ack_count = atomic_add_short_return(socket_ack_count,
-                                (struct atomic_short *)&msg->acknowledge_count);
-                if (msg_ack_count == bcp->cpus_in_uvhub) {
-                        /*
-                         * All cpus in uvhub saw it; reply
-                         */
-                        uv_reply_to_message(mdp, bcp);
-                }
-        }
-        return;
-}
-/*
- * Determine the first cpu on a uvhub.
- */
-static int uvhub_to_first_cpu(int uvhub)
-{
-        int cpu;
-        for_each_present_cpu(cpu)
-                if (uvhub == uv_cpu_to_blade_id(cpu))
-                        return cpu;
-        return -1;
-}
-/*
- * Last resort when we get a large number of destination timeouts is
- * to clear resources held by a given cpu.
- * Do this with IPI so that all messages in the BAU message queue
- * can be identified by their nonzero sw_ack_vector field.
- *
- * This is entered for a single cpu on the uvhub.
- * The sender want's this uvhub to free a specific message's
- * sw_ack resources.
- */
-static void
-uv_do_reset(void *ptr)
-{
-        int i;
-        int slot;
-        int count = 0;
-        unsigned long mmr;
-        unsigned long msg_res;
-        struct bau_control *bcp;
-        struct reset_args *rap;
-        struct bau_payload_queue_entry *msg;
-        struct ptc_stats *stat;
-        bcp = &per_cpu(bau_control, smp_processor_id());
-        rap = (struct reset_args *)ptr;
-        stat = bcp->statp;
-        stat->d_resets++;
-        /*
-         * We're looking for the given sender, and
-         * will free its sw_ack resource.
-         * If all cpu's finally responded after the timeout, its
-         * message 'replied_to' was set.
-         */
-        for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
-                /* uv_do_reset: same conditions for cancellation as
-                   uv_bau_process_retry_msg() */
-                if ((msg->replied_to == 0) &&
-                    (msg->canceled == 0) &&
-                    (msg->sending_cpu == rap->sender) &&
-                    (msg->sw_ack_vector) &&
-                    (msg->msg_type != MSG_NOOP)) {
-                        /*
-                         * make everyone else ignore this message
-                         */
-                        msg->canceled = 1;
-                        slot = msg - bcp->va_queue_first;
-                        count++;
-                        /*
-                         * only reset the resource if it is still pending
-                         */
-                        mmr = uv_read_local_mmr
-                                        (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-                        msg_res = msg->sw_ack_vector;
-                        if (mmr & msg_res) {
-                                stat->d_rcanceled++;
-                                uv_write_local_mmr(
-                                    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
-                                        (msg_res << UV_SW_ACK_NPENDING) |
-                                         msg_res);
-                        }
-                }
-        }
-        return;
-}
-/*
- * Use IPI to get all target uvhubs to release resources held by
- * a given sending cpu number.
- */
-static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution,
-                              int sender)
-{
-        int uvhub;
-        int cpu;
-        cpumask_t mask;
-        struct reset_args reset_args;
-        reset_args.sender = sender;
-        cpus_clear(mask);
-        /* find a single cpu for each uvhub in this distribution mask */
-        for (uvhub = 0;
-                    uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE;
-                    uvhub++) {
-                if (!bau_uvhub_isset(uvhub, distribution))
-                        continue;
-                /* find a cpu for this uvhub */
-                cpu = uvhub_to_first_cpu(uvhub);
-                cpu_set(cpu, mask);
-        }
-        /* IPI all cpus; Preemption is already disabled */
-        smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1);
-        return;
-}
-static inline unsigned long
-cycles_2_us(unsigned long long cyc)
-{
-        unsigned long long ns;
-        unsigned long us;
-        ns =  (cyc * per_cpu(cyc2ns, smp_processor_id()))
-                                                >> CYC2NS_SCALE_FACTOR;
-        us = ns / 1000;
-        return us;
-}
-/*
- * wait for all cpus on this hub to finish their sends and go quiet
- * leaves uvhub_quiesce set so that no new broadcasts are started by
- * bau_flush_send_and_wait()
- */
-static inline void
-quiesce_local_uvhub(struct bau_control *hmaster)
-{
-        atomic_add_short_return(1, (struct atomic_short *)
-                 &hmaster->uvhub_quiesce);
-}
-/*
- * mark this quiet-requestor as done
- */
-static inline void
-end_uvhub_quiesce(struct bau_control *hmaster)
-{
-        atomic_add_short_return(-1, (struct atomic_short *)
-                &hmaster->uvhub_quiesce);
-}
-/*
- * Wait for completion of a broadcast software ack message
- * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
- */
-static int uv_wait_completion(struct bau_desc *bau_desc,
-        unsigned long mmr_offset, int right_shift, int this_cpu,
-        struct bau_control *bcp, struct bau_control *smaster, long try)
-{
-        unsigned long descriptor_status;
-        cycles_t ttime;
-        struct ptc_stats *stat = bcp->statp;
-        struct bau_control *hmaster;
-        hmaster = bcp->uvhub_master;
-        /* spin on the status MMR, waiting for it to go idle */
-        while ((descriptor_status = (((unsigned long)
-                uv_read_local_mmr(mmr_offset) >>
-                        right_shift) & UV_ACT_STATUS_MASK)) !=
-                        DESC_STATUS_IDLE) {
-                /*
-                 * Our software ack messages may be blocked because there are
-                 * no swack resources available.  As long as none of them
-                 * has timed out hardware will NACK our message and its
-                 * state will stay IDLE.
-                 */
-                if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
-                        stat->s_stimeout++;
-                        return FLUSH_GIVEUP;
-                } else if (descriptor_status ==
-                                        DESC_STATUS_DESTINATION_TIMEOUT) {
-                        stat->s_dtimeout++;
-                        ttime = get_cycles();
-                        /*
-                         * Our retries may be blocked by all destination
-                         * swack resources being consumed, and a timeout
-                         * pending.  In that case hardware returns the
-                         * ERROR that looks like a destination timeout.
-                         */
-                        if (cycles_2_us(ttime - bcp->send_message) <
-                                                        timeout_us) {
-                                bcp->conseccompletes = 0;
-                                return FLUSH_RETRY_PLUGGED;
-                        }
-                        bcp->conseccompletes = 0;
-                        return FLUSH_RETRY_TIMEOUT;
-                } else {
-                        /*
-                         * descriptor_status is still BUSY
-                         */
-                        cpu_relax();
-                }
-        }
-        bcp->conseccompletes++;
-        return FLUSH_COMPLETE;
-}
-static inline cycles_t
-sec_2_cycles(unsigned long sec)
-{
-        unsigned long ns;
-        cycles_t cyc;
-        ns = sec * 1000000000;
-        cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
-        return cyc;
-}
-/*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'.  atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
-        spin_lock(lock);
-        if (atomic_read(v) >= u) {
-                spin_unlock(lock);
-                return 0;
-        }
-        atomic_inc(v);
-        spin_unlock(lock);
-        return 1;
-}
-/*
- * Our retries are blocked by all destination swack resources being
- * in use, and a timeout is pending. In that case hardware immediately
- * returns the ERROR that looks like a destination timeout.
- */
-static void
-destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp,
-                        struct bau_control *hmaster, struct ptc_stats *stat)
-{
-        udelay(bcp->plugged_delay);
-        bcp->plugged_tries++;
-        if (bcp->plugged_tries >= bcp->plugsb4reset) {
-                bcp->plugged_tries = 0;
-                quiesce_local_uvhub(hmaster);
-                spin_lock(&hmaster->queue_lock);
-                uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
-                spin_unlock(&hmaster->queue_lock);
-                end_uvhub_quiesce(hmaster);
-                bcp->ipi_attempts++;
-                stat->s_resets_plug++;
-        }
-}
-static void
-destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
-                        struct bau_control *hmaster, struct ptc_stats *stat)
-{
-        hmaster->max_bau_concurrent = 1;
-        bcp->timeout_tries++;
-        if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
-                bcp->timeout_tries = 0;
-                quiesce_local_uvhub(hmaster);
-                spin_lock(&hmaster->queue_lock);
-                uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
-                spin_unlock(&hmaster->queue_lock);
-                end_uvhub_quiesce(hmaster);
-                bcp->ipi_attempts++;
-                stat->s_resets_timeout++;
-        }
-}
-/*
- * Completions are taking a very long time due to a congested numalink
- * network.
- */
-static void
-disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
-{
-        int tcpu;
-        struct bau_control *tbcp;
-        /* let only one cpu do this disabling */
-        spin_lock(&disable_lock);
-        if (!baudisabled && bcp->period_requests &&
-            ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
-                /* it becomes this cpu's job to turn on the use of the
-                   BAU again */
-                baudisabled = 1;
-                bcp->set_bau_off = 1;
-                bcp->set_bau_on_time = get_cycles() +
-                        sec_2_cycles(bcp->congested_period);
-                stat->s_bau_disabled++;
-                for_each_present_cpu(tcpu) {
-                        tbcp = &per_cpu(bau_control, tcpu);
-                                tbcp->baudisabled = 1;
-                }
-        }
-        spin_unlock(&disable_lock);
-}
-/**
- * uv_flush_send_and_wait
- *
- * Send a broadcast and wait for it to complete.
- *
- * The flush_mask contains the cpus the broadcast is to be sent to including
- * cpus that are on the local uvhub.
- *
- * Returns 0 if all flushing represented in the mask was done.
- * Returns 1 if it gives up entirely and the original cpu mask is to be
- * returned to the kernel.
- */
-int uv_flush_send_and_wait(struct bau_desc *bau_desc,
-                           struct cpumask *flush_mask, struct bau_control *bcp)
-{
-        int right_shift;
-        int completion_status = 0;
-        int seq_number = 0;
-        long try = 0;
-        int cpu = bcp->uvhub_cpu;
-        int this_cpu = bcp->cpu;
-        unsigned long mmr_offset;
-        unsigned long index;
-        cycles_t time1;
-        cycles_t time2;
-        cycles_t elapsed;
-        struct ptc_stats *stat = bcp->statp;
-        struct bau_control *smaster = bcp->socket_master;
-        struct bau_control *hmaster = bcp->uvhub_master;
-        if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
-                        &hmaster->active_descriptor_count,
-                        hmaster->max_bau_concurrent)) {
-                stat->s_throttles++;
-                do {
-                        cpu_relax();
-                } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
-                        &hmaster->active_descriptor_count,
-                        hmaster->max_bau_concurrent));
-        }
-        while (hmaster->uvhub_quiesce)
-                cpu_relax();
-        if (cpu < UV_CPUS_PER_ACT_STATUS) {
-                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
-                right_shift = cpu * UV_ACT_STATUS_SIZE;
-        } else {
-                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-                right_shift =
-                    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
-        }
-        time1 = get_cycles();
-        do {
-                if (try == 0) {
-                        bau_desc->header.msg_type = MSG_REGULAR;
-                        seq_number = bcp->message_number++;
-                } else {
-                        bau_desc->header.msg_type = MSG_RETRY;
-                        stat->s_retry_messages++;
-                }
-                bau_desc->header.sequence = seq_number;
-                index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
-                        bcp->uvhub_cpu;
-                bcp->send_message = get_cycles();
-                uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-                try++;
-                completion_status = uv_wait_completion(bau_desc, mmr_offset,
-                        right_shift, this_cpu, bcp, smaster, try);
-                if (completion_status == FLUSH_RETRY_PLUGGED) {
-                        destination_plugged(bau_desc, bcp, hmaster, stat);
-                } else if (completion_status == FLUSH_RETRY_TIMEOUT) {
-                        destination_timeout(bau_desc, bcp, hmaster, stat);
-                }
-                if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
-                        bcp->ipi_attempts = 0;
-                        completion_status = FLUSH_GIVEUP;
-                        break;
-                }
-                cpu_relax();
-        } while ((completion_status == FLUSH_RETRY_PLUGGED) ||
-                 (completion_status == FLUSH_RETRY_TIMEOUT));
-        time2 = get_cycles();
-        bcp->plugged_tries = 0;
-        bcp->timeout_tries = 0;
-        if ((completion_status == FLUSH_COMPLETE) &&
-            (bcp->conseccompletes > bcp->complete_threshold) &&
-            (hmaster->max_bau_concurrent <
-                                        hmaster->max_bau_concurrent_constant))
-                        hmaster->max_bau_concurrent++;
-        while (hmaster->uvhub_quiesce)
-                cpu_relax();
-        atomic_dec(&hmaster->active_descriptor_count);
-        if (time2 > time1) {
-                elapsed = time2 - time1;
-                stat->s_time += elapsed;
-                if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
-                        bcp->period_requests++;
-                        bcp->period_time += elapsed;
-                        if ((elapsed > congested_cycles) &&
-                            (bcp->period_requests > bcp->congested_reps)) {
-                                disable_for_congestion(bcp, stat);
-                        }
-                }
-        } else
-                stat->s_requestor--;
-        if (completion_status == FLUSH_COMPLETE && try > 1)
-                stat->s_retriesok++;
-        else if (completion_status == FLUSH_GIVEUP) {
-                stat->s_giveup++;
-                return 1;
-        }
-        return 0;
-}
-/**
- * uv_flush_tlb_others - globally purge translation cache of a virtual
- * address or all TLB's
- * @cpumask: mask of all cpu's in which the address is to be removed
- * @mm: mm_struct containing virtual address range
- * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
- * @cpu: the current cpu
- *
- * This is the entry point for initiating any UV global TLB shootdown.
- *
- * Purges the translation caches of all specified processors of the given
- * virtual address, or purges all TLB's on specified processors.
- *
- * The caller has derived the cpumask from the mm_struct.  This function
- * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
- *
- * The cpumask is converted into a uvhubmask of the uvhubs containing
- * those cpus.
- *
- * Note that this function should be called with preemption disabled.
- *
- * Returns NULL if all remote flushing was done.
- * Returns pointer to cpumask if some remote flushing remains to be
- * done.  The returned pointer is valid till preemption is re-enabled.
- */
-const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-                                          struct mm_struct *mm,
-                                          unsigned long va, unsigned int cpu)
-{
-        int tcpu;
-        int uvhub;
-        int locals = 0;
-        int remotes = 0;
-        int hubs = 0;
-        struct bau_desc *bau_desc;
-        struct cpumask *flush_mask;
-        struct ptc_stats *stat;
-        struct bau_control *bcp;
-        struct bau_control *tbcp;
-        /* kernel was booted 'nobau' */
-        if (nobau)
-                return cpumask;
-        bcp = &per_cpu(bau_control, cpu);
-        stat = bcp->statp;
-        /* bau was disabled due to slow response */
-        if (bcp->baudisabled) {
-                /* the cpu that disabled it must re-enable it */
-                if (bcp->set_bau_off) {
-                        if (get_cycles() >= bcp->set_bau_on_time) {
-                                stat->s_bau_reenabled++;
-                                baudisabled = 0;
-                                for_each_present_cpu(tcpu) {
-                                        tbcp = &per_cpu(bau_control, tcpu);
-                                        tbcp->baudisabled = 0;
-                                        tbcp->period_requests = 0;
-                                        tbcp->period_time = 0;
-                                }
-                        }
-                }
-                return cpumask;
-        }
-        /*
-         * Each sending cpu has a per-cpu mask which it fills from the caller's
-         * cpu mask.  All cpus are converted to uvhubs and copied to the
-         * activation descriptor.
-         */
-        flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
-        /* don't actually do a shootdown of the local cpu */
-        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
-        if (cpu_isset(cpu, *cpumask))
-                stat->s_ntargself++;
-        bau_desc = bcp->descriptor_base;
-        bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
-        bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
-        /* cpu statistics */
-        for_each_cpu(tcpu, flush_mask) {
-                uvhub = uv_cpu_to_blade_id(tcpu);
-                bau_uvhub_set(uvhub, &bau_desc->distribution);
-                if (uvhub == bcp->uvhub)
-                        locals++;
-                else
-                        remotes++;
-        }
-        if ((locals + remotes) == 0)
-                return NULL;
-        stat->s_requestor++;
-        stat->s_ntargcpu += remotes + locals;
-        stat->s_ntargremotes += remotes;
-        stat->s_ntarglocals += locals;
-        remotes = bau_uvhub_weight(&bau_desc->distribution);
-        /* uvhub statistics */
-        hubs = bau_uvhub_weight(&bau_desc->distribution);
-        if (locals) {
-                stat->s_ntarglocaluvhub++;
-                stat->s_ntargremoteuvhub += (hubs - 1);
-        } else
-                stat->s_ntargremoteuvhub += hubs;
-        stat->s_ntarguvhub += hubs;
-        if (hubs >= 16)
-                stat->s_ntarguvhub16++;
-        else if (hubs >= 8)
-                stat->s_ntarguvhub8++;
-        else if (hubs >= 4)
-                stat->s_ntarguvhub4++;
-        else if (hubs >= 2)
-                stat->s_ntarguvhub2++;
-        else
-                stat->s_ntarguvhub1++;
-        bau_desc->payload.address = va;
-        bau_desc->payload.sending_cpu = cpu;
-        /*
-         * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
-         * or 1 if it gave up and the original cpumask should be returned.
-         */
-        if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
-                return NULL;
-        else
-                return cpumask;
-}
-/*
- * The BAU message interrupt comes here. (registered by set_intr_gate)
- * See entry_64.S
- *
- * We received a broadcast assist message.
- *
- * Interrupts are disabled; this interrupt could represent
- * the receipt of several messages.
- *
- * All cores/threads on this hub get this interrupt.
- * The last one to see it does the software ack.
- * (the resource will not be freed until noninterruptable cpus see this
- *  interrupt; hardware may timeout the s/w ack and reply ERROR)
- */
-void uv_bau_message_interrupt(struct pt_regs *regs)
-{
-        int count = 0;
-        cycles_t time_start;
-        struct bau_payload_queue_entry *msg;
-        struct bau_control *bcp;
-        struct ptc_stats *stat;
-        struct msg_desc msgdesc;
-        time_start = get_cycles();
-        bcp = &per_cpu(bau_control, smp_processor_id());
-        stat = bcp->statp;
-        msgdesc.va_queue_first = bcp->va_queue_first;
-        msgdesc.va_queue_last = bcp->va_queue_last;
-        msg = bcp->bau_msg_head;
-        while (msg->sw_ack_vector) {
-                count++;
-                msgdesc.msg_slot = msg - msgdesc.va_queue_first;
-                msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1;
-                msgdesc.msg = msg;
-                uv_bau_process_message(&msgdesc, bcp);
-                msg++;
-                if (msg > msgdesc.va_queue_last)
-                        msg = msgdesc.va_queue_first;
-                bcp->bau_msg_head = msg;
-        }
-        stat->d_time += (get_cycles() - time_start);
-        if (!count)
-                stat->d_nomsg++;
-        else if (count > 1)
-                stat->d_multmsg++;
-        ack_APIC_irq();
-}
-/*
- * uv_enable_timeouts
- *
- * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
- * shootdown message timeouts enabled.  The timeout does not cause
- * an interrupt, but causes an error message to be returned to
- * the sender.
- */
-static void uv_enable_timeouts(void)
-{
-        int uvhub;
-        int nuvhubs;
-        int pnode;
-        unsigned long mmr_image;
-        nuvhubs = uv_num_possible_blades();
-        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-                if (!uv_blade_nr_possible_cpus(uvhub))
-                        continue;
-                pnode = uv_blade_to_pnode(uvhub);
-                mmr_image =
-                    uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
-                /*
-                 * Set the timeout period and then lock it in, in three
-                 * steps; captures and locks in the period.
-                 *
-                 * To program the period, the SOFT_ACK_MODE must be off.
-                 */
-                mmr_image &= ~((unsigned long)1 <<
-                    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
-                uv_write_global_mmr64
-                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-                /*
-                 * Set the 4-bit period.
-                 */
-                mmr_image &= ~((unsigned long)0xf <<
-                     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
-                mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
-                     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
-                uv_write_global_mmr64
-                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-                /*
-                 * Subsequent reversals of the timebase bit (3) cause an
-                 * immediate timeout of one or all INTD resources as
-                 * indicated in bits 2:0 (7 causes all of them to timeout).
-                 */
-                mmr_image |= ((unsigned long)1 <<
-                    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
-                uv_write_global_mmr64
-                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-        }
-}
-static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
-{
-        if (*offset < num_possible_cpus())
-                return offset;
-        return NULL;
-}
-static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
-{
-        (*offset)++;
-        if (*offset < num_possible_cpus())
-                return offset;
-        return NULL;
-}
-static void uv_ptc_seq_stop(struct seq_file *file, void *data)
-{
-}
-static inline unsigned long long
-microsec_2_cycles(unsigned long microsec)
-{
-        unsigned long ns;
-        unsigned long long cyc;
-        ns = microsec * 1000;
-        cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
-        return cyc;
-}
-/*
- * Display the statistics thru /proc.
- * 'data' points to the cpu number
- */
-static int uv_ptc_seq_show(struct seq_file *file, void *data)
-{
-        struct ptc_stats *stat;
-        int cpu;
-        cpu = *(loff_t *)data;
-        if (!cpu) {
-                seq_printf(file,
-                        "# cpu sent stime self locals remotes ncpus localhub ");
-                seq_printf(file,
-                        "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
-                seq_printf(file,
-                        "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
-                seq_printf(file,
-                        "retries rok resetp resett giveup sto bz throt ");
-                seq_printf(file,
-                        "sw_ack recv rtime all ");
-                seq_printf(file,
-                        "one mult none retry canc nocan reset rcan ");
-                seq_printf(file,
-                        "disable enable\n");
-        }
-        if (cpu < num_possible_cpus() && cpu_online(cpu)) {
-                stat = &per_cpu(ptcstats, cpu);
-                /* source side statistics */
-                seq_printf(file,
-                        "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-                           cpu, stat->s_requestor, cycles_2_us(stat->s_time),
-                           stat->s_ntargself, stat->s_ntarglocals,
-                           stat->s_ntargremotes, stat->s_ntargcpu,
-                           stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
-                           stat->s_ntarguvhub, stat->s_ntarguvhub16);
-                seq_printf(file, "%ld %ld %ld %ld %ld ",
-                           stat->s_ntarguvhub8, stat->s_ntarguvhub4,
-                           stat->s_ntarguvhub2, stat->s_ntarguvhub1,
-                           stat->s_dtimeout);
-                seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
-                           stat->s_retry_messages, stat->s_retriesok,
-                           stat->s_resets_plug, stat->s_resets_timeout,
-                           stat->s_giveup, stat->s_stimeout,
-                           stat->s_busy, stat->s_throttles);
-                /* destination side statistics */
-                seq_printf(file,
-                           "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-                           uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
-                                        UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
-                           stat->d_requestee, cycles_2_us(stat->d_time),
-                           stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
-                           stat->d_nomsg, stat->d_retries, stat->d_canceled,
-                           stat->d_nocanceled, stat->d_resets,
-                           stat->d_rcanceled);
-                seq_printf(file, "%ld %ld\n",
-                        stat->s_bau_disabled, stat->s_bau_reenabled);
-        }
-        return 0;
-}
-/*
- * Display the tunables thru debugfs
- */
-static ssize_t tunables_read(struct file *file, char __user *userbuf,
-                                                size_t count, loff_t *ppos)
-{
-        char buf[300];
-        int ret;
-        ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
-                "max_bau_concurrent plugged_delay plugsb4reset",
-                "timeoutsb4reset ipi_reset_limit complete_threshold",
-                "congested_response_us congested_reps congested_period",
-                max_bau_concurrent, plugged_delay, plugsb4reset,
-                timeoutsb4reset, ipi_reset_limit, complete_threshold,
-                congested_response_us, congested_reps, congested_period);
-        return simple_read_from_buffer(userbuf, count, ppos, buf, ret);
-}
-/*
- * -1: resetf the statistics
- *  0: display meaning of the statistics
- */
-static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
-                                 size_t count, loff_t *data)
-{
-        int cpu;
-        long input_arg;
-        char optstr[64];
-        struct ptc_stats *stat;
-        if (count == 0 || count > sizeof(optstr))
-                return -EINVAL;
-        if (copy_from_user(optstr, user, count))
-                return -EFAULT;
-        optstr[count - 1] = '\0';
-        if (strict_strtol(optstr, 10, &input_arg) < 0) {
-                printk(KERN_DEBUG "%s is invalid\n", optstr);
-                return -EINVAL;
-        }
-        if (input_arg == 0) {
-                printk(KERN_DEBUG "# cpu:      cpu number\n");
-                printk(KERN_DEBUG "Sender statistics:\n");
-                printk(KERN_DEBUG
-                "sent:     number of shootdown messages sent\n");
-                printk(KERN_DEBUG
-                "stime:    time spent sending messages\n");
-                printk(KERN_DEBUG
-                "numuvhubs: number of hubs targeted with shootdown\n");
-                printk(KERN_DEBUG
-                "numuvhubs16: number times 16 or more hubs targeted\n");
-                printk(KERN_DEBUG
-                "numuvhubs8: number times 8 or more hubs targeted\n");
-                printk(KERN_DEBUG
-                "numuvhubs4: number times 4 or more hubs targeted\n");
-                printk(KERN_DEBUG
-                "numuvhubs2: number times 2 or more hubs targeted\n");
-                printk(KERN_DEBUG
-                "numuvhubs1: number times 1 hub targeted\n");
-                printk(KERN_DEBUG
-                "numcpus:  number of cpus targeted with shootdown\n");
-                printk(KERN_DEBUG
-                "dto:      number of destination timeouts\n");
-                printk(KERN_DEBUG
-                "retries:  destination timeout retries sent\n");
-                printk(KERN_DEBUG
-                "rok:   :  destination timeouts successfully retried\n");
-                printk(KERN_DEBUG
-                "resetp:   ipi-style resource resets for plugs\n");
-                printk(KERN_DEBUG
-                "resett:   ipi-style resource resets for timeouts\n");
-                printk(KERN_DEBUG
-                "giveup:   fall-backs to ipi-style shootdowns\n");
-                printk(KERN_DEBUG
-                "sto:      number of source timeouts\n");
-                printk(KERN_DEBUG
-                "bz:       number of stay-busy's\n");
-                printk(KERN_DEBUG
-                "throt:    number times spun in throttle\n");
-                printk(KERN_DEBUG "Destination side statistics:\n");
-                printk(KERN_DEBUG
-                "sw_ack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
-                printk(KERN_DEBUG
-                "recv:     shootdown messages received\n");
-                printk(KERN_DEBUG
-                "rtime:    time spent processing messages\n");
-                printk(KERN_DEBUG
-                "all:      shootdown all-tlb messages\n");
-                printk(KERN_DEBUG
-                "one:      shootdown one-tlb messages\n");
-                printk(KERN_DEBUG
-                "mult:     interrupts that found multiple messages\n");
-                printk(KERN_DEBUG
-                "none:     interrupts that found no messages\n");
-                printk(KERN_DEBUG
-                "retry:    number of retry messages processed\n");
-                printk(KERN_DEBUG
-                "canc:     number messages canceled by retries\n");
-                printk(KERN_DEBUG
-                "nocan:    number retries that found nothing to cancel\n");
-                printk(KERN_DEBUG
-                "reset:    number of ipi-style reset requests processed\n");
-                printk(KERN_DEBUG
-                "rcan:     number messages canceled by reset requests\n");
-                printk(KERN_DEBUG
-                "disable:  number times use of the BAU was disabled\n");
-                printk(KERN_DEBUG
-                "enable:   number times use of the BAU was re-enabled\n");
-        } else if (input_arg == -1) {
-                for_each_present_cpu(cpu) {
-                        stat = &per_cpu(ptcstats, cpu);
-                        memset(stat, 0, sizeof(struct ptc_stats));
-                }
-        }
-        return count;
-}
-static int local_atoi(const char *name)
-{
-        int val = 0;
-        for (;; name++) {
-                switch (*name) {
-                case '0' ... '9':
-                        val = 10*val+(*name-'0');
-                        break;
-                default:
-                        return val;
-                }
-        }
-}
-/*
- * set the tunables
- * 0 values reset them to defaults
- */
-static ssize_t tunables_write(struct file *file, const char __user *user,
-                                 size_t count, loff_t *data)
-{
-        int cpu;
-        int cnt = 0;
-        int val;
-        char *p;
-        char *q;
-        char instr[64];
-        struct bau_control *bcp;
-        if (count == 0 || count > sizeof(instr)-1)
-                return -EINVAL;
-        if (copy_from_user(instr, user, count))
-                return -EFAULT;
-        instr[count] = '\0';
-        /* count the fields */
-        p = instr + strspn(instr, WHITESPACE);
-        q = p;
-        for (; *p; p = q + strspn(q, WHITESPACE)) {
-                q = p + strcspn(p, WHITESPACE);
-                cnt++;
-                if (q == p)
-                        break;
-        }
-        if (cnt != 9) {
-                printk(KERN_INFO "bau tunable error: should be 9 numbers\n");
-                return -EINVAL;
-        }
-        p = instr + strspn(instr, WHITESPACE);
-        q = p;
-        for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
-                q = p + strcspn(p, WHITESPACE);
-                val = local_atoi(p);
-                switch (cnt) {
-                case 0:
-                        if (val == 0) {
-                                max_bau_concurrent = MAX_BAU_CONCURRENT;
-                                max_bau_concurrent_constant =
-                                                        MAX_BAU_CONCURRENT;
-                                continue;
-                        }
-                        bcp = &per_cpu(bau_control, smp_processor_id());
-                        if (val < 1 || val > bcp->cpus_in_uvhub) {
-                                printk(KERN_DEBUG
-                                "Error: BAU max concurrent %d is invalid\n",
-                                val);
-                                return -EINVAL;
-                        }
-                        max_bau_concurrent = val;
-                        max_bau_concurrent_constant = val;
-                        continue;
-                case 1:
-                        if (val == 0)
-                                plugged_delay = PLUGGED_DELAY;
-                        else
-                                plugged_delay = val;
-                        continue;
-                case 2:
-                        if (val == 0)
-                                plugsb4reset = PLUGSB4RESET;
-                        else
-                                plugsb4reset = val;
-                        continue;
-                case 3:
-                        if (val == 0)
-                                timeoutsb4reset = TIMEOUTSB4RESET;
-                        else
-                                timeoutsb4reset = val;
-                        continue;
-                case 4:
-                        if (val == 0)
-                                ipi_reset_limit = IPI_RESET_LIMIT;
-                        else
-                                ipi_reset_limit = val;
-                        continue;
-                case 5:
-                        if (val == 0)
-                                complete_threshold = COMPLETE_THRESHOLD;
-                        else
-                                complete_threshold = val;
-                        continue;
-                case 6:
-                        if (val == 0)
-                                congested_response_us = CONGESTED_RESPONSE_US;
-                        else
-                                congested_response_us = val;
-                        continue;
-                case 7:
-                        if (val == 0)
-                                congested_reps = CONGESTED_REPS;
-                        else
-                                congested_reps = val;
-                        continue;
-                case 8:
-                        if (val == 0)
-                                congested_period = CONGESTED_PERIOD;
-                        else
-                                congested_period = val;
-                        continue;
-                }
-                if (q == p)
-                        break;
-        }
-        for_each_present_cpu(cpu) {
-                bcp = &per_cpu(bau_control, cpu);
-                bcp->max_bau_concurrent = max_bau_concurrent;
-                bcp->max_bau_concurrent_constant = max_bau_concurrent;
-                bcp->plugged_delay = plugged_delay;
-                bcp->plugsb4reset = plugsb4reset;
-                bcp->timeoutsb4reset = timeoutsb4reset;
-                bcp->ipi_reset_limit = ipi_reset_limit;
-                bcp->complete_threshold = complete_threshold;
-                bcp->congested_response_us = congested_response_us;
-                bcp->congested_reps = congested_reps;
-                bcp->congested_period = congested_period;
-        }
-        return count;
-}
-static const struct seq_operations uv_ptc_seq_ops = {
-        .start          = uv_ptc_seq_start,
-        .next           = uv_ptc_seq_next,
-        .stop           = uv_ptc_seq_stop,
-        .show           = uv_ptc_seq_show
-};
-static int uv_ptc_proc_open(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &uv_ptc_seq_ops);
-}
-static int tunables_open(struct inode *inode, struct file *file)
-{
-        return 0;
-}
-static const struct file_operations proc_uv_ptc_operations = {
-        .open           = uv_ptc_proc_open,
-        .read           = seq_read,
-        .write          = uv_ptc_proc_write,
-        .llseek         = seq_lseek,
-        .release        = seq_release,
-};
-static const struct file_operations tunables_fops = {
-        .open           = tunables_open,
-        .read           = tunables_read,
-        .write          = tunables_write,
-};
-static int __init uv_ptc_init(void)
-{
-        struct proc_dir_entry *proc_uv_ptc;
-        if (!is_uv_system())
-                return 0;
-        proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
-                                  &proc_uv_ptc_operations);
-        if (!proc_uv_ptc) {
-                printk(KERN_ERR "unable to create %s proc entry\n",
-                       UV_PTC_BASENAME);
-                return -EINVAL;
-        }
-        tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
-        if (!tunables_dir) {
-                printk(KERN_ERR "unable to create debugfs directory %s\n",
-                       UV_BAU_TUNABLES_DIR);
-                return -EINVAL;
-        }
-        tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
-                        tunables_dir, NULL, &tunables_fops);
-        if (!tunables_file) {
-                printk(KERN_ERR "unable to create debugfs file %s\n",
-                       UV_BAU_TUNABLES_FILE);
-                return -EINVAL;
-        }
-        return 0;
-}
-/*
- * initialize the sending side's sending buffers
- */
-static void
-uv_activation_descriptor_init(int node, int pnode)
-{
-        int i;
-        int cpu;
-        unsigned long pa;
-        unsigned long m;
-        unsigned long n;
-        struct bau_desc *bau_desc;
-        struct bau_desc *bd2;
-        struct bau_control *bcp;
-        /*
-         * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
-         * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
-         */
-        bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
-                UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
-        BUG_ON(!bau_desc);
-        pa = uv_gpa(bau_desc); /* need the real nasid*/
-        n = pa >> uv_nshift;
-        m = pa & uv_mmask;
-        uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
-                              (n << UV_DESC_BASE_PNODE_SHIFT | m));
-        /*
-         * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
-         * cpu even though we only use the first one; one descriptor can
-         * describe a broadcast to 256 uv hubs.
-         */
-        for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
-                i++, bd2++) {
-                memset(bd2, 0, sizeof(struct bau_desc));
-                bd2->header.sw_ack_flag = 1;
-                /*
-                 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
-                 * in the partition. The bit map will indicate uvhub numbers,
-                 * which are 0-N in a partition. Pnodes are unique system-wide.
-                 */
-                bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
-                bd2->header.dest_subnodeid = 0x10; /* the LB */
-                bd2->header.command = UV_NET_ENDPOINT_INTD;
-                bd2->header.int_both = 1;
-                /*
-                 * all others need to be set to zero:
-                 *   fairness chaining multilevel count replied_to
-                 */
-        }
-        for_each_present_cpu(cpu) {
-                if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
-                        continue;
-                bcp = &per_cpu(bau_control, cpu);
-                bcp->descriptor_base = bau_desc;
-        }
-}
-/*
- * initialize the destination side's receiving buffers
- * entered for each uvhub in the partition
- * - node is first node (kernel memory notion) on the uvhub
- * - pnode is the uvhub's physical identifier
- */
-static void
-uv_payload_queue_init(int node, int pnode)
-{
-        int pn;
-        int cpu;
-        char *cp;
-        unsigned long pa;
-        struct bau_payload_queue_entry *pqp;
-        struct bau_payload_queue_entry *pqp_malloc;
-        struct bau_control *bcp;
-        pqp = (struct bau_payload_queue_entry *) kmalloc_node(
-                (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
-                GFP_KERNEL, node);
-        BUG_ON(!pqp);
-        pqp_malloc = pqp;
-        cp = (char *)pqp + 31;
-        pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
-        for_each_present_cpu(cpu) {
-                if (pnode != uv_cpu_to_pnode(cpu))
-                        continue;
-                /* for every cpu on this pnode: */
-                bcp = &per_cpu(bau_control, cpu);
-                bcp->va_queue_first = pqp;
-                bcp->bau_msg_head = pqp;
-                bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
-        }
-        /*
-         * need the pnode of where the memory was really allocated
-         */
-        pa = uv_gpa(pqp);
-        pn = pa >> uv_nshift;
-        uv_write_global_mmr64(pnode,
-                              UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
-                              ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
-                              uv_physnodeaddr(pqp));
-        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
-                              uv_physnodeaddr(pqp));
-        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
-                              (unsigned long)
-                              uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
-        /* in effect, all msg_type's are set to MSG_NOOP */
-        memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
-}
-/*
- * Initialization of each UV hub's structures
- */
-static void __init uv_init_uvhub(int uvhub, int vector)
-{
-        int node;
-        int pnode;
-        unsigned long apicid;
-        node = uvhub_to_first_node(uvhub);
-        pnode = uv_blade_to_pnode(uvhub);
-        uv_activation_descriptor_init(node, pnode);
-        uv_payload_queue_init(node, pnode);
-        /*
-         * the below initialization can't be in firmware because the
-         * messaging IRQ will be determined by the OS
-         */
-        apicid = uvhub_to_first_apicid(uvhub);
-        uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
-                                      ((apicid << 32) | vector));
-}
-/*
- * We will set BAU_MISC_CONTROL with a timeout period.
- * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
- * So the destination timeout period has be be calculated from them.
- */
-static int
-calculate_destination_timeout(void)
-{
-        unsigned long mmr_image;
-        int mult1;
-        int mult2;
-        int index;
-        int base;
-        int ret;
-        unsigned long ts_ns;
-        mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
-        mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
-        index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
-        mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
-        mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
-        base = timeout_base_ns[index];
-        ts_ns = base * mult1 * mult2;
-        ret = ts_ns / 1000;
-        return ret;
-}
-/*
- * initialize the bau_control structure for each cpu
- */
-static void __init uv_init_per_cpu(int nuvhubs)
-{
-        int i;
-        int cpu;
-        int pnode;
-        int uvhub;
-        int have_hmaster;
-        short socket = 0;
-        unsigned short socket_mask;
-        unsigned char *uvhub_mask;
-        struct bau_control *bcp;
-        struct uvhub_desc *bdp;
-        struct socket_desc *sdp;
-        struct bau_control *hmaster = NULL;
-        struct bau_control *smaster = NULL;
-        struct socket_desc {
-                short num_cpus;
-                short cpu_number[16];
-        };
-        struct uvhub_desc {
-                unsigned short socket_mask;
-                short num_cpus;
-                short uvhub;
-                short pnode;
-                struct socket_desc socket[2];
-        };
-        struct uvhub_desc *uvhub_descs;
-        timeout_us = calculate_destination_timeout();
-        uvhub_descs = (struct uvhub_desc *)
-                kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
-        memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
-        uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
-        for_each_present_cpu(cpu) {
-                bcp = &per_cpu(bau_control, cpu);
-                memset(bcp, 0, sizeof(struct bau_control));
-                pnode = uv_cpu_hub_info(cpu)->pnode;
-                uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
-                *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
-                bdp = &uvhub_descs[uvhub];
-                bdp->num_cpus++;
-                bdp->uvhub = uvhub;
-                bdp->pnode = pnode;
-                /* kludge: 'assuming' one node per socket, and assuming that
-                   disabling a socket just leaves a gap in node numbers */
-                socket = (cpu_to_node(cpu) & 1);
-                bdp->socket_mask |= (1 << socket);
-                sdp = &bdp->socket[socket];
-                sdp->cpu_number[sdp->num_cpus] = cpu;
-                sdp->num_cpus++;
-        }
-        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-                if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
-                        continue;
-                have_hmaster = 0;
-                bdp = &uvhub_descs[uvhub];
-                socket_mask = bdp->socket_mask;
-                socket = 0;
-                while (socket_mask) {
-                        if (!(socket_mask & 1))
-                                goto nextsocket;
-                        sdp = &bdp->socket[socket];
-                        for (i = 0; i < sdp->num_cpus; i++) {
-                                cpu = sdp->cpu_number[i];
-                                bcp = &per_cpu(bau_control, cpu);
-                                bcp->cpu = cpu;
-                                if (i == 0) {
-                                        smaster = bcp;
-                                        if (!have_hmaster) {
-                                                have_hmaster++;
-                                                hmaster = bcp;
-                                        }
-                                }
-                                bcp->cpus_in_uvhub = bdp->num_cpus;
-                                bcp->cpus_in_socket = sdp->num_cpus;
-                                bcp->socket_master = smaster;
-                                bcp->uvhub = bdp->uvhub;
-                                bcp->uvhub_master = hmaster;
-                                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
-                                                blade_processor_id;
-                        }
-nextsocket:
-                        socket++;
-                        socket_mask = (socket_mask >> 1);
-                }
-        }
-        kfree(uvhub_descs);
-        kfree(uvhub_mask);
-        for_each_present_cpu(cpu) {
-                bcp = &per_cpu(bau_control, cpu);
-                bcp->baudisabled = 0;
-                bcp->statp = &per_cpu(ptcstats, cpu);
-                /* time interval to catch a hardware stay-busy bug */
-                bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
-                bcp->max_bau_concurrent = max_bau_concurrent;
-                bcp->max_bau_concurrent_constant = max_bau_concurrent;
-                bcp->plugged_delay = plugged_delay;
-                bcp->plugsb4reset = plugsb4reset;
-                bcp->timeoutsb4reset = timeoutsb4reset;
-                bcp->ipi_reset_limit = ipi_reset_limit;
-                bcp->complete_threshold = complete_threshold;
-                bcp->congested_response_us = congested_response_us;
-                bcp->congested_reps = congested_reps;
-                bcp->congested_period = congested_period;
-        }
-}
-/*
- * Initialization of BAU-related structures
- */
-static int __init uv_bau_init(void)
-{
-        int uvhub;
-        int pnode;
-        int nuvhubs;
-        int cur_cpu;
-        int vector;
-        unsigned long mmr;
-        if (!is_uv_system())
-                return 0;
-        if (nobau)
-                return 0;
-        for_each_possible_cpu(cur_cpu)
-                zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
-                                       GFP_KERNEL, cpu_to_node(cur_cpu));
-        uv_nshift = uv_hub_info->m_val;
-        uv_mmask = (1UL << uv_hub_info->m_val) - 1;
-        nuvhubs = uv_num_possible_blades();
-        spin_lock_init(&disable_lock);
-        congested_cycles = microsec_2_cycles(congested_response_us);
-        uv_init_per_cpu(nuvhubs);
-        uv_partition_base_pnode = 0x7fffffff;
-        for (uvhub = 0; uvhub < nuvhubs; uvhub++)
-                if (uv_blade_nr_possible_cpus(uvhub) &&
-                        (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
-                        uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
-        vector = UV_BAU_MESSAGE;
-        for_each_possible_blade(uvhub)
-                if (uv_blade_nr_possible_cpus(uvhub))
-                        uv_init_uvhub(uvhub, vector);
-        uv_enable_timeouts();
-        alloc_intr_gate(vector, uv_bau_message_intr1);
-        for_each_possible_blade(uvhub) {
-                if (uv_blade_nr_possible_cpus(uvhub)) {
-                        pnode = uv_blade_to_pnode(uvhub);
-                        /* INIT the bau */
-                        uv_write_global_mmr64(pnode,
-                                        UVH_LB_BAU_SB_ACTIVATION_CONTROL,
-                                        ((unsigned long)1 << 63));
-                        mmr = 1; /* should be 1 to broadcast to both sockets */
-                        uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST,
-                                                mmr);
-                }
-        }
-        return 0;
-}
-core_initcall(uv_bau_init);
-fs_initcall(uv_ptc_init);
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 4c3da5674e67..a375616d77f7 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -38,19 +38,3 @@ unsigned long __trampinit setup_trampoline(void)
        memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
        return virt_to_phys(trampoline_base);
 }
-void __init setup_trampoline_page_table(void)
-{
-#ifdef CONFIG_X86_32
-        /* Copy kernel address range */
-        clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
-                        swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-                        KERNEL_PGD_PTRS);
-        /* Initialize low mappings */
-        clone_pgd_range(trampoline_pg_dir,
-                        swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-                        min_t(unsigned long, KERNEL_PGD_PTRS,
-                              KERNEL_PGD_BOUNDARY));
-#endif
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d43968503dd2..cb838ca42c96 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -575,6 +575,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
        if (regs->flags & X86_VM_MASK) {
                handle_vm86_trap((struct kernel_vm86_regs *) regs,
                                error_code, 1);
+                preempt_conditional_cli(regs);
                return;
        }
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
deleted file mode 100644
index 7b24460917d5..000000000000
--- a/arch/x86/kernel/uv_irq.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV IRQ functions
- *
- * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
- */
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-#include <asm/apic.h>
-#include <asm/uv/uv_irq.h>
-#include <asm/uv/uv_hub.h>
-/* MMR offset and pnode of hub sourcing interrupts for a given irq */
-struct uv_irq_2_mmr_pnode{
-        struct rb_node          list;
-        unsigned long           offset;
-        int                     pnode;
-        int                     irq;
-};
-static spinlock_t               uv_irq_lock;
-static struct rb_root           uv_irq_root;
-static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
-static void uv_noop(struct irq_data *data) { }
-static void uv_ack_apic(struct irq_data *data)
-{
-        ack_APIC_irq();
-}
-static struct irq_chip uv_irq_chip = {
-        .name                   = "UV-CORE",
-        .irq_mask               = uv_noop,
-        .irq_unmask             = uv_noop,
-        .irq_eoi                = uv_ack_apic,
-        .irq_set_affinity       = uv_set_irq_affinity,
-};
-/*
- * Add offset and pnode information of the hub sourcing interrupts to the
- * rb tree for a specific irq.
- */
-static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
-{
-        struct rb_node **link = &uv_irq_root.rb_node;
-        struct rb_node *parent = NULL;
-        struct uv_irq_2_mmr_pnode *n;
-        struct uv_irq_2_mmr_pnode *e;
-        unsigned long irqflags;
-        n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
-                                uv_blade_to_memory_nid(blade));
-        if (!n)
-                return -ENOMEM;
-        n->irq = irq;
-        n->offset = offset;
-        n->pnode = uv_blade_to_pnode(blade);
-        spin_lock_irqsave(&uv_irq_lock, irqflags);
-        /* Find the right place in the rbtree: */
-        while (*link) {
-                parent = *link;
-                e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
-                if (unlikely(irq == e->irq)) {
-                        /* irq entry exists */
-                        e->pnode = uv_blade_to_pnode(blade);
-                        e->offset = offset;
-                        spin_unlock_irqrestore(&uv_irq_lock, irqflags);
-                        kfree(n);
-                        return 0;
-                }
-                if (irq < e->irq)
-                        link = &(*link)->rb_left;
-                else
-                        link = &(*link)->rb_right;
-        }
-        /* Insert the node into the rbtree. */
-        rb_link_node(&n->list, parent, link);
-        rb_insert_color(&n->list, &uv_irq_root);
-        spin_unlock_irqrestore(&uv_irq_lock, irqflags);
-        return 0;
-}
-/* Retrieve offset and pnode information from the rb tree for a specific irq */
-int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
-{
-        struct uv_irq_2_mmr_pnode *e;
-        struct rb_node *n;
-        unsigned long irqflags;
-        spin_lock_irqsave(&uv_irq_lock, irqflags);
-        n = uv_irq_root.rb_node;
-        while (n) {
-                e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
-                if (e->irq == irq) {
-                        *offset = e->offset;
-                        *pnode = e->pnode;
-                        spin_unlock_irqrestore(&uv_irq_lock, irqflags);
-                        return 0;
-                }
-                if (irq < e->irq)
-                        n = n->rb_left;
-                else
-                        n = n->rb_right;
-        }
-        spin_unlock_irqrestore(&uv_irq_lock, irqflags);
-        return -1;
-}
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-static int
-arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-                       unsigned long mmr_offset, int limit)
-{
-        const struct cpumask *eligible_cpu = cpumask_of(cpu);
-        struct irq_cfg *cfg = get_irq_chip_data(irq);
-        unsigned long mmr_value;
-        struct uv_IO_APIC_route_entry *entry;
-        int mmr_pnode, err;
-        BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
-                        sizeof(unsigned long));
-        err = assign_irq_vector(irq, cfg, eligible_cpu);
-        if (err != 0)
-                return err;
-        if (limit == UV_AFFINITY_CPU)
-                irq_set_status_flags(irq, IRQ_NO_BALANCING);
-        else
-                irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-        set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-                                      irq_name);
-        mmr_value = 0;
-        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-        entry->vector           = cfg->vector;
-        entry->delivery_mode    = apic->irq_delivery_mode;
-        entry->dest_mode        = apic->irq_dest_mode;
-        entry->polarity         = 0;
-        entry->trigger          = 0;
-        entry->mask             = 0;
-        entry->dest             = apic->cpu_mask_to_apicid(eligible_cpu);
-        mmr_pnode = uv_blade_to_pnode(mmr_blade);
-        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-        if (cfg->move_in_progress)
-                send_cleanup_vector(cfg);
-        return irq;
-}
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
-        unsigned long mmr_value;
-        struct uv_IO_APIC_route_entry *entry;
-        BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
-                        sizeof(unsigned long));
-        mmr_value = 0;
-        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-        entry->mask = 1;
-        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-static int
-uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
-                    bool force)
-{
-        struct irq_cfg *cfg = data->chip_data;
-        unsigned int dest;
-        unsigned long mmr_value, mmr_offset;
-        struct uv_IO_APIC_route_entry *entry;
-        int mmr_pnode;
-        if (__ioapic_set_affinity(data, mask, &dest))
-                return -1;
-        mmr_value = 0;
-        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-        entry->vector           = cfg->vector;
-        entry->delivery_mode    = apic->irq_delivery_mode;
-        entry->dest_mode        = apic->irq_dest_mode;
-        entry->polarity         = 0;
-        entry->trigger          = 0;
-        entry->mask             = 0;
-        entry->dest             = dest;
-        /* Get previously stored MMR and pnode of hub sourcing interrupts */
-        if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
-                return -1;
-        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-        if (cfg->move_in_progress)
-                send_cleanup_vector(cfg);
-        return 0;
-}
-/*
- * Set up a mapping of an available irq and vector, and enable the specified
- * MMR that defines the MSI that is to be sent to the specified CPU when an
- * interrupt is raised.
- */
-int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
-                 unsigned long mmr_offset, int limit)
-{
-        int irq, ret;
-        irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
-        if (irq <= 0)
-                return -EBUSY;
-        ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
-                limit);
-        if (ret == irq)
-                uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
-        else
-                destroy_irq(irq);
-        return ret;
-}
-EXPORT_SYMBOL_GPL(uv_setup_irq);
-/*
- * Tear down a mapping of an irq and vector, and disable the specified MMR that
- * defined the MSI that was to be sent to the specified CPU when an interrupt
- * was raised.
- *
- * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
- */
-void uv_teardown_irq(unsigned int irq)
-{
-        struct uv_irq_2_mmr_pnode *e;
-        struct rb_node *n;
-        unsigned long irqflags;
-        spin_lock_irqsave(&uv_irq_lock, irqflags);
-        n = uv_irq_root.rb_node;
-        while (n) {
-                e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
-                if (e->irq == irq) {
-                        arch_disable_uv_irq(e->pnode, e->offset);
-                        rb_erase(n, &uv_irq_root);
-                        kfree(e);
-                        break;
-                }
-                if (irq < e->irq)
-                        n = n->rb_left;
-                else
-                        n = n->rb_right;
-        }
-        spin_unlock_irqrestore(&uv_irq_lock, irqflags);
-        destroy_irq(irq);
-}
-EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
deleted file mode 100644
index 309c70fb7759..000000000000
--- a/arch/x86/kernel/uv_sysfs.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson
- */
-#include <linux/sysdev.h>
-#include <asm/uv/bios.h>
-#include <asm/uv/uv.h>
-struct kobject *sgi_uv_kobj;
-static ssize_t partition_id_show(struct kobject *kobj,
-                        struct kobj_attribute *attr, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
-}
-static ssize_t coherence_id_show(struct kobject *kobj,
-                        struct kobj_attribute *attr, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
-}
-static struct kobj_attribute partition_id_attr =
-        __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
-static struct kobj_attribute coherence_id_attr =
-        __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
-static int __init sgi_uv_sysfs_init(void)
-{
-        unsigned long ret;
-        if (!is_uv_system())
-                return -ENODEV;
-        if (!sgi_uv_kobj)
-                sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
-        if (!sgi_uv_kobj) {
-                printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
-                return -EINVAL;
-        }
-        ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
-        if (ret) {
-                printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
-                return ret;
-        }
-        ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
-        if (ret) {
-                printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
-                return ret;
-        }
-        return 0;
-}
-device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
deleted file mode 100644
index 56e421bc379b..000000000000
--- a/arch/x86/kernel/uv_time.c
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * SGI RTC clock/timer routines.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Copyright (c) 2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Dimitri Sivanich
- */
-#include <linux/clockchips.h>
-#include <linux/slab.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/bios.h>
-#include <asm/uv/uv.h>
-#include <asm/apic.h>
-#include <asm/cpu.h>
-#define RTC_NAME                "sgi_rtc"
-static cycle_t uv_read_rtc(struct clocksource *cs);
-static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
-static void uv_rtc_timer_setup(enum clock_event_mode,
-                                struct clock_event_device *);
-static struct clocksource clocksource_uv = {
-        .name           = RTC_NAME,
-        .rating         = 400,
-        .read           = uv_read_rtc,
-        .mask           = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
-        .shift          = 10,
-        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-static struct clock_event_device clock_event_device_uv = {
-        .name           = RTC_NAME,
-        .features       = CLOCK_EVT_FEAT_ONESHOT,
-        .shift          = 20,
-        .rating         = 400,
-        .irq            = -1,
-        .set_next_event = uv_rtc_next_event,
-        .set_mode       = uv_rtc_timer_setup,
-        .event_handler  = NULL,
-};
-static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
-/* There is one of these allocated per node */
-struct uv_rtc_timer_head {
-        spinlock_t      lock;
-        /* next cpu waiting for timer, local node relative: */
-        int             next_cpu;
-        /* number of cpus on this node: */
-        int             ncpus;
-        struct {
-                int     lcpu;           /* systemwide logical cpu number */
-                u64     expires;        /* next timer expiration for this cpu */
-        } cpu[1];
-};
-/*
- * Access to uv_rtc_timer_head via blade id.
- */
-static struct uv_rtc_timer_head         **blade_info __read_mostly;
-static int                              uv_rtc_evt_enable;
-/*
- * Hardware interface routines
- */
-/* Send IPIs to another node */
-static void uv_rtc_send_IPI(int cpu)
-{
-        unsigned long apicid, val;
-        int pnode;
-        apicid = cpu_physical_id(cpu);
-        pnode = uv_apicid_to_pnode(apicid);
-        val = (1UL << UVH_IPI_INT_SEND_SHFT) |
-              (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-              (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
-        uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
-/* Check for an RTC interrupt pending */
-static int uv_intr_pending(int pnode)
-{
-        return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
-                UVH_EVENT_OCCURRED0_RTC1_MASK;
-}
-/* Setup interrupt and return non-zero if early expiration occurred. */
-static int uv_setup_intr(int cpu, u64 expires)
-{
-        u64 val;
-        int pnode = uv_cpu_to_pnode(cpu);
-        uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
-                UVH_RTC1_INT_CONFIG_M_MASK);
-        uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
-        uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
-                UVH_EVENT_OCCURRED0_RTC1_MASK);
-        val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
-                ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
-        /* Set configuration */
-        uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
-        /* Initialize comparator value */
-        uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
-        if (uv_read_rtc(NULL) <= expires)
-                return 0;
-        return !uv_intr_pending(pnode);
-}
-/*
- * Per-cpu timer tracking routines
- */
-static __init void uv_rtc_deallocate_timers(void)
-{
-        int bid;
-        for_each_possible_blade(bid) {
-                kfree(blade_info[bid]);
-        }
-        kfree(blade_info);
-}
-/* Allocate per-node list of cpu timer expiration times. */
-static __init int uv_rtc_allocate_timers(void)
-{
-        int cpu;
-        blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
-        if (!blade_info)
-                return -ENOMEM;
-        memset(blade_info, 0, uv_possible_blades * sizeof(void *));
-        for_each_present_cpu(cpu) {
-                int nid = cpu_to_node(cpu);
-                int bid = uv_cpu_to_blade_id(cpu);
-                int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
-                struct uv_rtc_timer_head *head = blade_info[bid];
-                if (!head) {
-                        head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
-                                (uv_blade_nr_possible_cpus(bid) *
-                                        2 * sizeof(u64)),
-                                GFP_KERNEL, nid);
-                        if (!head) {
-                                uv_rtc_deallocate_timers();
-                                return -ENOMEM;
-                        }
-                        spin_lock_init(&head->lock);
-                        head->ncpus = uv_blade_nr_possible_cpus(bid);
-                        head->next_cpu = -1;
-                        blade_info[bid] = head;
-                }
-                head->cpu[bcpu].lcpu = cpu;
-                head->cpu[bcpu].expires = ULLONG_MAX;
-        }
-        return 0;
-}
-/* Find and set the next expiring timer.  */
-static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
-{
-        u64 lowest = ULLONG_MAX;
-        int c, bcpu = -1;
-        head->next_cpu = -1;
-        for (c = 0; c < head->ncpus; c++) {
-                u64 exp = head->cpu[c].expires;
-                if (exp < lowest) {
-                        bcpu = c;
-                        lowest = exp;
-                }
-        }
-        if (bcpu >= 0) {
-                head->next_cpu = bcpu;
-                c = head->cpu[bcpu].lcpu;
-                if (uv_setup_intr(c, lowest))
-                        /* If we didn't set it up in time, trigger */
-                        uv_rtc_send_IPI(c);
-        } else {
-                uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
-                        UVH_RTC1_INT_CONFIG_M_MASK);
-        }
-}
-/*
- * Set expiration time for current cpu.
- *
- * Returns 1 if we missed the expiration time.
- */
-static int uv_rtc_set_timer(int cpu, u64 expires)
-{
-        int pnode = uv_cpu_to_pnode(cpu);
-        int bid = uv_cpu_to_blade_id(cpu);
-        struct uv_rtc_timer_head *head = blade_info[bid];
-        int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
-        u64 *t = &head->cpu[bcpu].expires;
-        unsigned long flags;
-        int next_cpu;
-        spin_lock_irqsave(&head->lock, flags);
-        next_cpu = head->next_cpu;
-        *t = expires;
-        /* Will this one be next to go off? */
-        if (next_cpu < 0 || bcpu == next_cpu ||
-                        expires < head->cpu[next_cpu].expires) {
-                head->next_cpu = bcpu;
-                if (uv_setup_intr(cpu, expires)) {
-                        *t = ULLONG_MAX;
-                        uv_rtc_find_next_timer(head, pnode);
-                        spin_unlock_irqrestore(&head->lock, flags);
-                        return -ETIME;
-                }
-        }
-        spin_unlock_irqrestore(&head->lock, flags);
-        return 0;
-}
-/*
- * Unset expiration time for current cpu.
- *
- * Returns 1 if this timer was pending.
- */
-static int uv_rtc_unset_timer(int cpu, int force)
-{
-        int pnode = uv_cpu_to_pnode(cpu);
-        int bid = uv_cpu_to_blade_id(cpu);
-        struct uv_rtc_timer_head *head = blade_info[bid];
-        int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
-        u64 *t = &head->cpu[bcpu].expires;
-        unsigned long flags;
-        int rc = 0;
-        spin_lock_irqsave(&head->lock, flags);
-        if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
-                rc = 1;
-        if (rc) {
-                *t = ULLONG_MAX;
-                /* Was the hardware setup for this timer? */
-                if (head->next_cpu == bcpu)
-                        uv_rtc_find_next_timer(head, pnode);
-        }
-        spin_unlock_irqrestore(&head->lock, flags);
-        return rc;
-}
-/*
- * Kernel interface routines.
- */
-/*
- * Read the RTC.
- *
- * Starting with HUB rev 2.0, the UV RTC register is replicated across all
- * cachelines of it's own page.  This allows faster simultaneous reads
- * from a given socket.
- */
-static cycle_t uv_read_rtc(struct clocksource *cs)
-{
-        unsigned long offset;
-        if (uv_get_min_hub_revision_id() == 1)
-                offset = 0;
-        else
-                offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
-        return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
-}
-/*
- * Program the next event, relative to now
- */
-static int uv_rtc_next_event(unsigned long delta,
-                             struct clock_event_device *ced)
-{
-        int ced_cpu = cpumask_first(ced->cpumask);
-        return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
-}
-/*
- * Setup the RTC timer in oneshot mode
- */
-static void uv_rtc_timer_setup(enum clock_event_mode mode,
-                               struct clock_event_device *evt)
-{
-        int ced_cpu = cpumask_first(evt->cpumask);
-        switch (mode) {
-        case CLOCK_EVT_MODE_PERIODIC:
-        case CLOCK_EVT_MODE_ONESHOT:
-        case CLOCK_EVT_MODE_RESUME:
-                /* Nothing to do here yet */
-                break;
-        case CLOCK_EVT_MODE_UNUSED:
-        case CLOCK_EVT_MODE_SHUTDOWN:
-                uv_rtc_unset_timer(ced_cpu, 1);
-                break;
-        }
-}
-static void uv_rtc_interrupt(void)
-{
-        int cpu = smp_processor_id();
-        struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
-        if (!ced || !ced->event_handler)
-                return;
-        if (uv_rtc_unset_timer(cpu, 0) != 1)
-                return;
-        ced->event_handler(ced);
-}
-static int __init uv_enable_evt_rtc(char *str)
-{
-        uv_rtc_evt_enable = 1;
-        return 1;
-}
-__setup("uvrtcevt", uv_enable_evt_rtc);
-static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
-{
-        struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
-        *ced = clock_event_device_uv;
-        ced->cpumask = cpumask_of(smp_processor_id());
-        clockevents_register_device(ced);
-}
-static __init int uv_rtc_setup_clock(void)
-{
-        int rc;
-        if (!is_uv_system())
-                return -ENODEV;
-        clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-                                clocksource_uv.shift);
-        /* If single blade, prefer tsc */
-        if (uv_num_possible_blades() == 1)
-                clocksource_uv.rating = 250;
-        rc = clocksource_register(&clocksource_uv);
-        if (rc)
-                printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
-        else
-                printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
-                        sn_rtc_cycles_per_second/(unsigned long)1E6);
-        if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
-                return rc;
-        /* Setup and register clockevents */
-        rc = uv_rtc_allocate_timers();
-        if (rc)
-                goto error;
-        x86_platform_ipi_callback = uv_rtc_interrupt;
-        clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
-                                NSEC_PER_SEC, clock_event_device_uv.shift);
-        clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
-                                                sn_rtc_cycles_per_second;
-        clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
-                                (NSEC_PER_SEC / sn_rtc_cycles_per_second);
-        rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
-        if (rc) {
-                x86_platform_ipi_callback = NULL;
-                uv_rtc_deallocate_timers();
-                goto error;
-        }
-        printk(KERN_INFO "UV RTC clockevents registered\n");
-        return 0;
-error:
-        clocksource_unregister(&clocksource_uv);
-        printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
-        return rc;
-}
-arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
deleted file mode 100644
index 3371bd053b89..000000000000
--- a/arch/x86/kernel/visws_quirks.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- *  SGI Visual Workstation support and quirks, unmaintained.
- *
- *  Split out from setup.c by davej@suse.de
- *
- *      Copyright (C) 1999 Bent Hagemark, Ingo Molnar
- *
- *  SGI Visual Workstation interrupt controller
- *
- *  The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
- *  which serves as the main interrupt controller in the system.  Non-legacy
- *  hardware in the system uses this controller directly.  Legacy devices
- *  are connected to the PIIX4 which in turn has its 8259(s) connected to
- *  a of the Cobalt APIC entry.
- *
- *  09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
- *
- *  25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
- */
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/piix4.h>
-#include <asm/io_apic.h>
-#include <asm/fixmap.h>
-#include <asm/reboot.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <linux/kernel_stat.h>
-#include <asm/i8259.h>
-#include <asm/irq_vectors.h>
-#include <asm/visws/lithium.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-extern int no_broadcast;
-char visws_board_type   = -1;
-char visws_board_rev    = -1;
-static void __init visws_time_init(void)
-{
-        printk(KERN_INFO "Starting Cobalt Timer system clock\n");
-        /* Set the countdown value */
-        co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
-        /* Start the timer */
-        co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
-        /* Enable (unmask) the timer interrupt */
-        co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
-        setup_default_timer_irq();
-}
-/* Replaces the default init_ISA_irqs in the generic setup */
-static void __init visws_pre_intr_init(void);
-/* Quirk for machine specific memory setup. */
-#define MB (1024 * 1024)
-unsigned long sgivwfb_mem_phys;
-unsigned long sgivwfb_mem_size;
-EXPORT_SYMBOL(sgivwfb_mem_phys);
-EXPORT_SYMBOL(sgivwfb_mem_size);
-long long mem_size __initdata = 0;
-static char * __init visws_memory_setup(void)
-{
-        long long gfx_mem_size = 8 * MB;
-        mem_size = boot_params.alt_mem_k;
-        if (!mem_size) {
-                printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
-                mem_size = 128 * MB;
-        }
-        /*
-         * this hardcodes the graphics memory to 8 MB
-         * it really should be sized dynamically (or at least
-         * set as a boot param)
-         */
-        if (!sgivwfb_mem_size) {
-                printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
-                sgivwfb_mem_size = 8 * MB;
-        }
-        /*
-         * Trim to nearest MB
-         */
-        sgivwfb_mem_size &= ~((1 << 20) - 1);
-        sgivwfb_mem_phys = mem_size - gfx_mem_size;
-        e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-        e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
-        e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
-        return "PROM";
-}
-static void visws_machine_emergency_restart(void)
-{
-        /*
-         * Visual Workstations restart after this
-         * register is poked on the PIIX4
-         */
-        outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
-}
-static void visws_machine_power_off(void)
-{
-        unsigned short pm_status;
-/*      extern unsigned int pci_bus0; */
-        while ((pm_status = inw(PMSTS_PORT)) & 0x100)
-                outw(pm_status, PMSTS_PORT);
-        outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
-        mdelay(10);
-#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
-        (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-/*      outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
-        outl(PIIX_SPECIAL_STOP, 0xCFC);
-}
-static void __init visws_get_smp_config(unsigned int early)
-{
-}
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-static void __init MP_processor_info(struct mpc_cpu *m)
-{
-        int ver, logical_apicid;
-        physid_mask_t apic_cpus;
-        if (!(m->cpuflag & CPU_ENABLED))
-                return;
-        logical_apicid = m->apicid;
-        printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
-               m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
-               m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
-               (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
-        if (m->cpuflag & CPU_BOOTPROCESSOR)
-                boot_cpu_physical_apicid = m->apicid;
-        ver = m->apicver;
-        if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
-                printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-                        m->apicid, MAX_APICS);
-                return;
-        }
-        apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
-        physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
-        /*
-         * Validate version
-         */
-        if (ver == 0x0) {
-                printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
-                        "fixing up to 0x10. (tell your hw vendor)\n",
-                        m->apicid);
-                ver = 0x10;
-        }
-        apic_version[m->apicid] = ver;
-}
-static void __init visws_find_smp_config(void)
-{
-        struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
-        unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
-        if (ncpus > CO_CPU_MAX) {
-                printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
-                        ncpus, mp);
-                ncpus = CO_CPU_MAX;
-        }
-        if (ncpus > setup_max_cpus)
-                ncpus = setup_max_cpus;
-#ifdef CONFIG_X86_LOCAL_APIC
-        smp_found_config = 1;
-#endif
-        while (ncpus--)
-                MP_processor_info(mp++);
-        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-static void visws_trap_init(void);
-void __init visws_early_detect(void)
-{
-        int raw;
-        visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
-                                                         >> PIIX_GPI_BD_SHIFT;
-        if (visws_board_type < 0)
-                return;
-        /*
-         * Override the default platform setup functions
-         */
-        x86_init.resources.memory_setup = visws_memory_setup;
-        x86_init.mpparse.get_smp_config = visws_get_smp_config;
-        x86_init.mpparse.find_smp_config = visws_find_smp_config;
-        x86_init.irqs.pre_vector_init = visws_pre_intr_init;
-        x86_init.irqs.trap_init = visws_trap_init;
-        x86_init.timers.timer_init = visws_time_init;
-        x86_init.pci.init = pci_visws_init;
-        x86_init.pci.init_irq = x86_init_noop;
-        /*
-         * Install reboot quirks:
-         */
-        pm_power_off                    = visws_machine_power_off;
-        machine_ops.emergency_restart   = visws_machine_emergency_restart;
-        /*
-         * Do not use broadcast IPIs:
-         */
-        no_broadcast = 0;
-#ifdef CONFIG_X86_IO_APIC
-        /*
-         * Turn off IO-APIC detection and initialization:
-         */
-        skip_ioapic_setup               = 1;
-#endif
-        /*
-         * Get Board rev.
-         * First, we have to initialize the 307 part to allow us access
-         * to the GPIO registers.  Let's map them at 0x0fc0 which is right
-         * after the PIIX4 PM section.
-         */
-        outb_p(SIO_DEV_SEL, SIO_INDEX);
-        outb_p(SIO_GP_DEV, SIO_DATA);   /* Talk to GPIO regs. */
-        outb_p(SIO_DEV_MSB, SIO_INDEX);
-        outb_p(SIO_GP_MSB, SIO_DATA);   /* MSB of GPIO base address */
-        outb_p(SIO_DEV_LSB, SIO_INDEX);
-        outb_p(SIO_GP_LSB, SIO_DATA);   /* LSB of GPIO base address */
-        outb_p(SIO_DEV_ENB, SIO_INDEX);
-        outb_p(1, SIO_DATA);            /* Enable GPIO registers. */
-        /*
-         * Now, we have to map the power management section to write
-         * a bit which enables access to the GPIO registers.
-         * What lunatic came up with this shit?
-         */
-        outb_p(SIO_DEV_SEL, SIO_INDEX);
-        outb_p(SIO_PM_DEV, SIO_DATA);   /* Talk to GPIO regs. */
-        outb_p(SIO_DEV_MSB, SIO_INDEX);
-        outb_p(SIO_PM_MSB, SIO_DATA);   /* MSB of PM base address */
-        outb_p(SIO_DEV_LSB, SIO_INDEX);
-        outb_p(SIO_PM_LSB, SIO_DATA);   /* LSB of PM base address */
-        outb_p(SIO_DEV_ENB, SIO_INDEX);
-        outb_p(1, SIO_DATA);            /* Enable PM registers. */
-        /*
-         * Now, write the PM register which enables the GPIO registers.
-         */
-        outb_p(SIO_PM_FER2, SIO_PM_INDEX);
-        outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
-        /*
-         * Now, initialize the GPIO registers.
-         * We want them all to be inputs which is the
-         * power on default, so let's leave them alone.
-         * So, let's just read the board rev!
-         */
-        raw = inb_p(SIO_GP_DATA1);
-        raw &= 0x7f;    /* 7 bits of valid board revision ID. */
-        if (visws_board_type == VISWS_320) {
-                if (raw < 0x6) {
-                        visws_board_rev = 4;
-                } else if (raw < 0xc) {
-                        visws_board_rev = 5;
-                } else {
-                        visws_board_rev = 6;
-                }
-        } else if (visws_board_type == VISWS_540) {
-                        visws_board_rev = 2;
-                } else {
-                        visws_board_rev = raw;
-                }
-        printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
-               (visws_board_type == VISWS_320 ? "320" :
-               (visws_board_type == VISWS_540 ? "540" :
-                "unknown")), visws_board_rev);
-}
-#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
-#define BCD (LI_INTB | LI_INTC | LI_INTD)
-#define ALLDEVS (A01234 | BCD)
-static __init void lithium_init(void)
-{
-        set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
-        set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
-        if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
-            (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
-                printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
-/*              panic("This machine is not SGI Visual Workstation 320/540"); */
-        }
-        if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
-            (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
-                printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
-/*              panic("This machine is not SGI Visual Workstation 320/540"); */
-        }
-        li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
-        li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
-}
-static __init void cobalt_init(void)
-{
-        /*
-         * On normal SMP PC this is used only with SMP, but we have to
-         * use it and set it up here to start the Cobalt clock
-         */
-        set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
-        setup_local_APIC();
-        printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
-                (unsigned int)apic_read(APIC_LVR),
-                (unsigned int)apic_read(APIC_ID));
-        set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
-        set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
-        printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
-                co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
-        /* Enable Cobalt APIC being careful to NOT change the ID! */
-        co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
-        printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
-                co_apic_read(CO_APIC_ID));
-}
-static void __init visws_trap_init(void)
-{
-        lithium_init();
-        cobalt_init();
-}
-/*
- * IRQ controller / APIC support:
- */
-static DEFINE_SPINLOCK(cobalt_lock);
-/*
- * Set the given Cobalt APIC Redirection Table entry to point
- * to the given IDT vector/index.
- */
-static inline void co_apic_set(int entry, int irq)
-{
-        co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
-        co_apic_write(CO_APIC_HI(entry), 0);
-}
-/*
- * Cobalt (IO)-APIC functions to handle PCI devices.
- */
-static inline int co_apic_ide0_hack(void)
-{
-        extern char visws_board_type;
-        extern char visws_board_rev;
-        if (visws_board_type == VISWS_320 && visws_board_rev == 5)
-                return 5;
-        return CO_APIC_IDE0;
-}
-static int is_co_apic(unsigned int irq)
-{
-        if (IS_CO_APIC(irq))
-                return CO_APIC(irq);
-        switch (irq) {
-                case 0: return CO_APIC_CPU;
-                case CO_IRQ_IDE0: return co_apic_ide0_hack();
-                case CO_IRQ_IDE1: return CO_APIC_IDE1;
-                default: return -1;
-        }
-}
-/*
- * This is the SGI Cobalt (IO-)APIC:
- */
-static void enable_cobalt_irq(struct irq_data *data)
-{
-        co_apic_set(is_co_apic(data->irq), data->irq);
-}
-static void disable_cobalt_irq(struct irq_data *data)
-{
-        int entry = is_co_apic(data->irq);
-        co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
-        co_apic_read(CO_APIC_LO(entry));
-}
-static void ack_cobalt_irq(struct irq_data *data)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&cobalt_lock, flags);
-        disable_cobalt_irq(data);
-        apic_write(APIC_EOI, APIC_EIO_ACK);
-        spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-static struct irq_chip cobalt_irq_type = {
-        .name           = "Cobalt-APIC",
-        .irq_enable     = enable_cobalt_irq,
-        .irq_disable    = disable_cobalt_irq,
-        .irq_ack        = ack_cobalt_irq,
-};
-/*
- * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
- * -- not the manner expected by the code in i8259.c.
- *
- * there is a 'master' physical interrupt source that gets sent to
- * the CPU. But in the chipset there are various 'virtual' interrupts
- * waiting to be handled. We represent this to Linux through a 'master'
- * interrupt controller type, and through a special virtual interrupt-
- * controller. Device drivers only see the virtual interrupt sources.
- */
-static unsigned int startup_piix4_master_irq(struct irq_data *data)
-{
-        legacy_pic->init(0);
-        enable_cobalt_irq(data);
-}
-static void end_piix4_master_irq(struct irq_data *data)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&cobalt_lock, flags);
-        enable_cobalt_irq(data);
-        spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-static struct irq_chip piix4_master_irq_type = {
-        .name           = "PIIX4-master",
-        .irq_startup    = startup_piix4_master_irq,
-        .irq_ack        = ack_cobalt_irq,
-};
-static void pii4_mask(struct irq_data *data) { }
-static struct irq_chip piix4_virtual_irq_type = {
-        .name           = "PIIX4-virtual",
-        .mask           = pii4_mask,
-};
-/*
- * PIIX4-8259 master/virtual functions to handle interrupt requests
- * from legacy devices: floppy, parallel, serial, rtc.
- *
- * None of these get Cobalt APIC entries, neither do they have IDT
- * entries. These interrupts are purely virtual and distributed from
- * the 'master' interrupt source: CO_IRQ_8259.
- *
- * When the 8259 interrupts its handler figures out which of these
- * devices is interrupting and dispatches to its handler.
- *
- * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
- * enable_irq gets the right irq. This 'master' irq is never directly
- * manipulated by any driver.
- */
-static irqreturn_t piix4_master_intr(int irq, void *dev_id)
-{
-        unsigned long flags;
-        int realirq;
-        raw_spin_lock_irqsave(&i8259A_lock, flags);
-        /* Find out what's interrupting in the PIIX4 master 8259 */
-        outb(0x0c, 0x20);               /* OCW3 Poll command */
-        realirq = inb(0x20);
-        /*
-         * Bit 7 == 0 means invalid/spurious
-         */
-        if (unlikely(!(realirq & 0x80)))
-                goto out_unlock;
-        realirq &= 7;
-        if (unlikely(realirq == 2)) {
-                outb(0x0c, 0xa0);
-                realirq = inb(0xa0);
-                if (unlikely(!(realirq & 0x80)))
-                        goto out_unlock;
-                realirq = (realirq & 7) + 8;
-        }
-        /* mask and ack interrupt */
-        cached_irq_mask |= 1 << realirq;
-        if (unlikely(realirq > 7)) {
-                inb(0xa1);
-                outb(cached_slave_mask, 0xa1);
-                outb(0x60 + (realirq & 7), 0xa0);
-                outb(0x60 + 2, 0x20);
-        } else {
-                inb(0x21);
-                outb(cached_master_mask, 0x21);
-                outb(0x60 + realirq, 0x20);
-        }
-        raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-        /*
-         * handle this 'virtual interrupt' as a Cobalt one now.
-         */
-        generic_handle_irq(realirq);
-        return IRQ_HANDLED;
-out_unlock:
-        raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-        return IRQ_NONE;
-}
-static struct irqaction master_action = {
-        .handler =      piix4_master_intr,
-        .name =         "PIIX4-8259",
-};
-static struct irqaction cascade_action = {
-        .handler =      no_action,
-        .name =         "cascade",
-};
-static inline void set_piix4_virtual_irq_type(void)
-{
-        piix4_virtual_irq_type.enable = i8259A_chip.unmask;
-        piix4_virtual_irq_type.disable = i8259A_chip.mask;
-        piix4_virtual_irq_type.unmask = i8259A_chip.unmask;
-}
-static void __init visws_pre_intr_init(void)
-{
-        int i;
-        set_piix4_virtual_irq_type();
-        for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-                struct irq_chip *chip = NULL;
-                if (i == 0)
-                        chip = &cobalt_irq_type;
-                else if (i == CO_IRQ_IDE0)
-                        chip = &cobalt_irq_type;
-                else if (i == CO_IRQ_IDE1)
-                        >chip = &cobalt_irq_type;
-                else if (i == CO_IRQ_8259)
-                        chip = &piix4_master_irq_type;
-                else if (i < CO_IRQ_APIC0)
-                        chip = &piix4_virtual_irq_type;
-                else if (IS_CO_APIC(i))
-                        chip = &cobalt_irq_type;
-                if (chip)
-                        set_irq_chip(i, chip);
-        }
-        setup_irq(CO_IRQ_8259, &master_action);
-        setup_irq(2, &cascade_action);
-}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5ffb5622f793..61fb98519622 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -551,8 +551,14 @@ cannot_handle:
 int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 {
        if (VMPI.is_vm86pus) {
-                if ((trapno == 3) || (trapno == 1))
+                if ((trapno == 3) || (trapno == 1)) {
-                        return_to_32bit(regs, VM86_TRAP + (trapno << 8));
+                        KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
+                        /* setting this flag forces the code in entry_32.S to
+                           call save_v86_state() and change the stack pointer
+                           to KVM86->regs32 */
+                        set_thread_flag(TIF_IRET);
+                        return 0;
+                }
                do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
                return 0;
        }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 38e2b67807e1..e03530aebfd0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -301,7 +301,7 @@ SECTIONS
        }
 #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
-        PERCPU(PAGE_SIZE)
+        PERCPU(THREAD_SIZE)
 #endif
        . = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index cd6da6bf3eca..ceb2911aa439 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -6,10 +6,12 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <asm/bios_ebda.h>
 #include <asm/paravirt.h>
 #include <asm/pci_x86.h>
+#include <asm/pci.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
 #include <asm/apic.h>
@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
 };
 EXPORT_SYMBOL_GPL(x86_platform);
+struct x86_msi_ops x86_msi = {
+        .setup_msi_irqs = native_setup_msi_irqs,
+        .teardown_msi_irq = native_teardown_msi_irq,
+        .teardown_msi_irqs = default_teardown_msi_irqs,
+};