15 files changed, 1930 insertions, 67 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c107641cd39b..9873716e9f76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
 obj-$(CONFIG_HIGHMEM)           += highmem_32.o
+obj-$(CONFIG_MMIOTRACE_HOOKS)   += kmmio.o
+obj-$(CONFIG_MMIOTRACE)         += mmiotrace.o
+mmiotrace-y                     := pf_in.o mmio-mod.o
+obj-$(CONFIG_MMIOTRACE_TEST)    += testmmiotrace.o
 ifeq ($(CONFIG_X86_32),y)
 obj-$(CONFIG_NUMA)              += discontig_32.o
 else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0f5fce77d95..455f3fe67b42 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/mmiotrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -49,6 +50,16 @@
 #define PF_RSVD         (1<<3)
 #define PF_INSTR        (1<<4)
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+{
+#ifdef CONFIG_MMIOTRACE_HOOKS
+        if (unlikely(is_kmmio_active()))
+                if (kmmio_handler(regs, addr) == 1)
+                        return -1;
+#endif
+        return 0;
+}
 static inline int notify_page_fault(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (notify_page_fault(regs))
                return;
+        if (unlikely(kmmio_fault(regs, address)))
+                return;
        /*
         * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b5a0fd5f4c5f..9689a5138e64 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 unsigned int __VMALLOC_RESERVE = 128 << 20;
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -1034,6 +1035,8 @@ void mark_rodata_ro(void)
        unsigned long start = PFN_ALIGN(_text);
        unsigned long size = PFN_ALIGN(_etext) - start;
+#ifndef CONFIG_DYNAMIC_FTRACE
+        /* Dynamic tracing modifies the kernel text section */
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
        printk(KERN_INFO "Write protecting the kernel text: %luk\n",
                size >> 10);
@@ -1046,6 +1049,8 @@ void mark_rodata_ro(void)
        printk(KERN_INFO "Testing CPA: write protecting again\n");
        set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 #endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
        start += size;
        size = (unsigned long)__end_rodata - start;
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 77d129d62c97..306049edd553 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
 * apertures, ACPI and other tables without having to play with fixmaps.
 */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 static unsigned long dma_reserve __initdata;
@@ -202,6 +203,46 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 }
 /*
+ * Create large page table mappings for a range of physical addresses.
+ */
+static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
+                                                pgprot_t prot)
+{
+        pgd_t *pgd;
+        pud_t *pud;
+        pmd_t *pmd;
+        BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
+        for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
+                pgd = pgd_offset_k((unsigned long)__va(phys));
+                if (pgd_none(*pgd)) {
+                        pud = (pud_t *) spp_getpage();
+                        set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+                                                _PAGE_USER));
+                }
+                pud = pud_offset(pgd, (unsigned long)__va(phys));
+                if (pud_none(*pud)) {
+                        pmd = (pmd_t *) spp_getpage();
+                        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
+                                                _PAGE_USER));
+                }
+                pmd = pmd_offset(pud, phys);
+                BUG_ON(!pmd_none(*pmd));
+                set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
+        }
+}
+void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
+{
+        __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
+}
+void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
+{
+        __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
+}
+/*
 * The head.S code sets up the kernel high mapping:
 *
 *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
@@ -262,11 +303,13 @@ static __meminit void unmap_low_page(void *adr)
        early_iounmap(adr, PAGE_SIZE);
 }
-static void __meminit
+static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
 {
        unsigned pages = 0;
+        unsigned long last_map_addr = end;
        int i;
        pte_t *pte = pte_page + pte_index(addr);
        for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
@@ -286,23 +329,28 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
                        printk("   pte=%p addr=%lx pte=%016lx\n",
                               pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
                set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
+                last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
                pages++;
        }
        update_page_count(PG_LEVEL_4K, pages);
+        return last_map_addr;
 }
-static void __meminit
+static unsigned long __meminit
 phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
 {
        pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-        phys_pte_init(pte, address, end);
+        return phys_pte_init(pte, address, end);
 }
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+                         unsigned long page_size_mask)
 {
        unsigned long pages = 0;
+        unsigned long last_map_addr = end;
        int i = pmd_index(address);
@@ -321,42 +369,46 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
                if (pmd_val(*pmd)) {
                        if (!pmd_large(*pmd))
-                                phys_pte_update(pmd, address, end);
+                                last_map_addr = phys_pte_update(pmd, address,
+                                                                 end);
                        continue;
                }
-                if (cpu_has_pse) {
+                if (page_size_mask & (1<<PG_LEVEL_2M)) {
                        pages++;
                        set_pte((pte_t *)pmd,
                                pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+                        last_map_addr = (address & PMD_MASK) + PMD_SIZE;
                        continue;
                }
                pte = alloc_low_page(&pte_phys);
-                phys_pte_init(pte, address, end);
+                last_map_addr = phys_pte_init(pte, address, end);
                unmap_low_page(pte);
                pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
        }
        update_page_count(PG_LEVEL_2M, pages);
-        return address;
+        return last_map_addr;
 }
 static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
+                         unsigned long page_size_mask)
 {
        pmd_t *pmd = pmd_offset(pud, 0);
        unsigned long last_map_addr;
        spin_lock(&init_mm.page_table_lock);
-        last_map_addr = phys_pmd_init(pmd, address, end);
+        last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
        spin_unlock(&init_mm.page_table_lock);
        __flush_tlb_all();
        return last_map_addr;
 }
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
+                         unsigned long page_size_mask)
 {
        unsigned long pages = 0;
        unsigned long last_map_addr = end;
@@ -378,11 +430,12 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
                if (pud_val(*pud)) {
                        if (!pud_large(*pud))
-                                last_map_addr = phys_pmd_update(pud, addr, end);
+                                last_map_addr = phys_pmd_update(pud, addr, end,
+                                                         page_size_mask);
                        continue;
                }
-                if (direct_gbpages) {
+                if (page_size_mask & (1<<PG_LEVEL_1G)) {
                        pages++;
                        set_pte((pte_t *)pud,
                                pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
@@ -393,7 +446,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
                pmd = alloc_low_page(&pmd_phys);
                spin_lock(&init_mm.page_table_lock);
-                last_map_addr = phys_pmd_init(pmd, addr, end);
+                last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
                unmap_low_page(pmd);
                pud_populate(&init_mm, pud, __va(pmd_phys));
                spin_unlock(&init_mm.page_table_lock);
@@ -406,29 +459,37 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 }
 static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
+                 unsigned long page_size_mask)
 {
        pud_t *pud;
        pud = (pud_t *)pgd_page_vaddr(*pgd);
-        return phys_pud_init(pud, addr, end);
+        return phys_pud_init(pud, addr, end, page_size_mask);
 }
 static void __init find_early_table_space(unsigned long end)
 {
-        unsigned long puds, tables, start;
+        unsigned long puds, pmds, ptes, tables, start;
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
-        if (!direct_gbpages) {
+        if (direct_gbpages) {
-                unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+                unsigned long extra;
-                tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+                extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
-        }
+                pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
-        if (!cpu_has_pse) {
+        } else
-                unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+                pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-                tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+        tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
-        }
+        if (cpu_has_pse) {
+                unsigned long extra;
+                extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+                ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        } else
+                ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
        /*
         * RED-PEN putting page tables only on node 0 could
@@ -568,29 +629,12 @@ static void __init early_memtest(unsigned long start, unsigned long end)
 }
 #endif
-/*
+static unsigned long __init kernel_physical_mapping_init(unsigned long start,
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+                                                unsigned long end,
- * This runs before bootmem is initialized and gets pages directly from
+                                                unsigned long page_size_mask)
- * the physical memory. To access them they are temporarily mapped.
- */
-unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 {
-        unsigned long next, last_map_addr = end;
-        unsigned long start_phys = start, end_phys = end;
-        printk(KERN_INFO "init_memory_mapping\n");
+        unsigned long next, last_map_addr = end;
-        /*
-         * Find space for the kernel direct mapping tables.
-         *
-         * Later we should allocate these tables in the local node of the
-         * memory mapped. Unfortunately this is done currently before the
-         * nodes are discovered.
-         */
-        if (!after_bootmem) {
-                init_gbpages();
-                find_early_table_space(end);
-        }
        start = (unsigned long)__va(start);
        end = (unsigned long)__va(end);
@@ -600,12 +644,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
                unsigned long pud_phys;
                pud_t *pud;
-                next = start + PGDIR_SIZE;
+                next = (start + PGDIR_SIZE) & PGDIR_MASK;
                if (next > end)
                        next = end;
                if (pgd_val(*pgd)) {
-                        last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
+                        last_map_addr = phys_pud_update(pgd, __pa(start),
+                                                 __pa(end), page_size_mask);
                        continue;
                }
@@ -614,22 +659,151 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
                else
                        pud = alloc_low_page(&pud_phys);
-                last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+                last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+                                                 page_size_mask);
                unmap_low_page(pud);
                pgd_populate(&init_mm, pgd_offset_k(start),
                             __va(pud_phys));
        }
+        return last_map_addr;
+}
+struct map_range {
+        unsigned long start;
+        unsigned long end;
+        unsigned page_size_mask;
+};
+#define NR_RANGE_MR 5
+static int save_mr(struct map_range *mr, int nr_range,
+                   unsigned long start_pfn, unsigned long end_pfn,
+                   unsigned long page_size_mask)
+{
+        if (start_pfn < end_pfn) {
+                if (nr_range >= NR_RANGE_MR)
+                        panic("run out of range for init_memory_mapping\n");
+                mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+                mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
+                mr[nr_range].page_size_mask = page_size_mask;
+                nr_range++;
+        }
+        return nr_range;
+}
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+                                               unsigned long end)
+{
+        unsigned long last_map_addr = 0;
+        unsigned long page_size_mask = 0;
+        unsigned long start_pfn, end_pfn;
+        struct map_range mr[NR_RANGE_MR];
+        int nr_range, i;
+        printk(KERN_INFO "init_memory_mapping\n");
+        /*
+         * Find space for the kernel direct mapping tables.
+         *
+         * Later we should allocate these tables in the local node of the
+         * memory mapped. Unfortunately this is done currently before the
+         * nodes are discovered.
+         */
+        if (!after_bootmem)
+                init_gbpages();
+        if (direct_gbpages)
+                page_size_mask |= 1 << PG_LEVEL_1G;
+        if (cpu_has_pse)
+                page_size_mask |= 1 << PG_LEVEL_2M;
+        memset(mr, 0, sizeof(mr));
+        nr_range = 0;
+        /* head if not big page alignment ?*/
+        start_pfn = start >> PAGE_SHIFT;
+        end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+                        << (PMD_SHIFT - PAGE_SHIFT);
+        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+        /* big page (2M) range*/
+        start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+                         << (PMD_SHIFT - PAGE_SHIFT);
+        end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+                         << (PUD_SHIFT - PAGE_SHIFT);
+        if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
+                end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
+        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+                        page_size_mask & (1<<PG_LEVEL_2M));
+        /* big page (1G) range */
+        start_pfn = end_pfn;
+        end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+                                page_size_mask &
+                                 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+        /* tail is not big page (1G) alignment */
+        start_pfn = end_pfn;
+        end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+                        page_size_mask & (1<<PG_LEVEL_2M));
+        /* tail is not big page (2M) alignment */
+        start_pfn = end_pfn;
+        end_pfn = end>>PAGE_SHIFT;
+        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+        /* try to merge same page size and continuous */
+        for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+                unsigned long old_start;
+                if (mr[i].end != mr[i+1].start ||
+                    mr[i].page_size_mask != mr[i+1].page_size_mask)
+                        continue;
+                /* move it */
+                old_start = mr[i].start;
+                memmove(&mr[i], &mr[i+1],
+                         (nr_range - 1 - i) * sizeof (struct map_range));
+                mr[i].start = old_start;
+                nr_range--;
+        }
+        for (i = 0; i < nr_range; i++)
+                printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+                                mr[i].start, mr[i].end,
+                        (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+                         (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+        if (!after_bootmem)
+                find_early_table_space(end);
+        for (i = 0; i < nr_range; i++)
+                last_map_addr = kernel_physical_mapping_init(
+                                        mr[i].start, mr[i].end,
+                                        mr[i].page_size_mask);
        if (!after_bootmem)
                mmu_cr4_features = read_cr4();
        __flush_tlb_all();
-        if (!after_bootmem)
+        if (!after_bootmem && table_end > table_start)
                reserve_early(table_start << PAGE_SHIFT,
                                 table_end << PAGE_SHIFT, "PGTABLE");
+        printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
+                         last_map_addr, end);
        if (!after_bootmem)
-                early_memtest(start_phys, end_phys);
+                early_memtest(start, end);
        return last_map_addr >> PAGE_SHIFT;
 }
@@ -817,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
 void mark_rodata_ro(void)
 {
        unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+        unsigned long rodata_start =
+                ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+#ifdef CONFIG_DYNAMIC_FTRACE
+        /* Dynamic tracing modifies the kernel text section */
+        start = rodata_start;
+#endif
        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
               (end - start) >> 10);
@@ -826,8 +1007,7 @@ void mark_rodata_ro(void)
         * The rodata section (but not the kernel text!) should also be
         * not-executable.
         */
-        start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+        set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
-        set_memory_nx(start, (end - start) >> PAGE_SHIFT);
        rodata_test();
@@ -1036,9 +1216,6 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
                                                PAGE_KERNEL_LARGE);
                                set_pmd(pmd, __pmd(pte_val(entry)));
-                                addr_end = addr + PMD_SIZE;
-                                p_end = p + PMD_SIZE;
                                /* check to see if we have contiguous blocks */
                                if (p_end != p || node_start != node) {
                                        if (p_start)
@@ -1048,6 +1225,9 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
                                        node_start = node;
                                        p_start = p;
                                }
+                                addr_end = addr + PMD_SIZE;
+                                p_end = p + PMD_SIZE;
                        } else
                                vmemmap_verify((pte_t *)pmd, node, addr, next);
                }
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 115f13ee40c9..24c1d3c30186 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/mmiotrace.h>
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 {
        unsigned long pfn, offset, vaddr;
        resource_size_t last_addr;
+        const resource_size_t unaligned_phys_addr = phys_addr;
+        const unsigned long unaligned_size = size;
        struct vm_struct *area;
        unsigned long new_prot_val;
        pgprot_t prot;
        int retval;
+        void __iomem *ret_addr;
        /* Don't allow wraparound or zero size */
        last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
                return NULL;
        }
-        return (void __iomem *) (vaddr + offset);
+        ret_addr = (void __iomem *) (vaddr + offset);
+        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
+        return ret_addr;
 }
 /**
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr)
        addr = (volatile void __iomem *)
                (PAGE_MASK & (unsigned long __force)addr);
+        mmiotrace_iounmap(addr);
        /* Use the vm area unlocked, assuming the caller
           ensures there isn't another iounmap for the same address
           in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 000000000000..93d82038af4b
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
+/* Support for MMIO probes.
+ * Benfit many code from kprobes
+ * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
+ *     2007 Alexander Eichner
+ *     2008 Pekka Paalanen <pq@iki.fi>
+ */
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <linux/errno.h>
+#include <asm/debugreg.h>
+#include <linux/mmiotrace.h>
+#define KMMIO_PAGE_HASH_BITS 4
+#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
+struct kmmio_fault_page {
+        struct list_head list;
+        struct kmmio_fault_page *release_next;
+        unsigned long page; /* location of the fault page */
+        /*
+         * Number of times this page has been registered as a part
+         * of a probe. If zero, page is disarmed and this may be freed.
+         * Used only by writers (RCU).
+         */
+        int count;
+};
+struct kmmio_delayed_release {
+        struct rcu_head rcu;
+        struct kmmio_fault_page *release_list;
+};
+struct kmmio_context {
+        struct kmmio_fault_page *fpage;
+        struct kmmio_probe *probe;
+        unsigned long saved_flags;
+        unsigned long addr;
+        int active;
+};
+static DEFINE_SPINLOCK(kmmio_lock);
+/* Protected by kmmio_lock */
+unsigned int kmmio_count;
+/* Read-protected by RCU, write-protected by kmmio_lock. */
+static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
+static LIST_HEAD(kmmio_probes);
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+        return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+/* Accessed per-cpu */
+static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
+/*
+ * this is basically a dynamic stabbing problem:
+ * Could use the existing prio tree code or
+ * Possible better implementations:
+ * The Interval Skip List: A Data Structure for Finding All Intervals That
+ * Overlap a Point (might be simple)
+ * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
+ */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
+static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
+{
+        struct kmmio_probe *p;
+        list_for_each_entry_rcu(p, &kmmio_probes, list) {
+                if (addr >= p->addr && addr <= (p->addr + p->len))
+                        return p;
+        }
+        return NULL;
+}
+/* You must be holding RCU read lock. */
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+{
+        struct list_head *head;
+        struct kmmio_fault_page *p;
+        page &= PAGE_MASK;
+        head = kmmio_page_list(page);
+        list_for_each_entry_rcu(p, head, list) {
+                if (p->page == page)
+                        return p;
+        }
+        return NULL;
+}
+static void set_page_present(unsigned long addr, bool present,
+                                                        unsigned int *pglevel)
+{
+        pteval_t pteval;
+        pmdval_t pmdval;
+        unsigned int level;
+        pmd_t *pmd;
+        pte_t *pte = lookup_address(addr, &level);
+        if (!pte) {
+                pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+                return;
+        }
+        if (pglevel)
+                *pglevel = level;
+        switch (level) {
+        case PG_LEVEL_2M:
+                pmd = (pmd_t *)pte;
+                pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
+                if (present)
+                        pmdval |= _PAGE_PRESENT;
+                set_pmd(pmd, __pmd(pmdval));
+                break;
+        case PG_LEVEL_4K:
+                pteval = pte_val(*pte) & ~_PAGE_PRESENT;
+                if (present)
+                        pteval |= _PAGE_PRESENT;
+                set_pte_atomic(pte, __pte(pteval));
+                break;
+        default:
+                pr_err("kmmio: unexpected page level 0x%x.\n", level);
+                return;
+        }
+        __flush_tlb_one(addr);
+}
+/** Mark the given page as not present. Access to it will trigger a fault. */
+static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+        set_page_present(page & PAGE_MASK, false, pglevel);
+}
+/** Mark the given page as present. */
+static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+        set_page_present(page & PAGE_MASK, true, pglevel);
+}
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ */
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+        struct kmmio_context *ctx;
+        struct kmmio_fault_page *faultpage;
+        int ret = 0; /* default to fault not handled */
+        /*
+         * Preemption is now disabled to prevent process switch during
+         * single stepping. We can only handle one active kmmio trace
+         * per cpu, so ensure that we finish it before something else
+         * gets to run. We also hold the RCU read lock over single
+         * stepping to avoid looking up the probe and kmmio_fault_page
+         * again.
+         */
+        preempt_disable();
+        rcu_read_lock();
+        faultpage = get_kmmio_fault_page(addr);
+        if (!faultpage) {
+                /*
+                 * Either this page fault is not caused by kmmio, or
+                 * another CPU just pulled the kmmio probe from under
+                 * our feet. The latter case should not be possible.
+                 */
+                goto no_kmmio;
+        }
+        ctx = &get_cpu_var(kmmio_ctx);
+        if (ctx->active) {
+                disarm_kmmio_fault_page(faultpage->page, NULL);
+                if (addr == ctx->addr) {
+                        /*
+                         * On SMP we sometimes get recursive probe hits on the
+                         * same address. Context is already saved, fall out.
+                         */
+                        pr_debug("kmmio: duplicate probe hit on CPU %d, for "
+                                                "address 0x%08lx.\n",
+                                                smp_processor_id(), addr);
+                        ret = 1;
+                        goto no_kmmio_ctx;
+                }
+                /*
+                 * Prevent overwriting already in-flight context.
+                 * This should not happen, let's hope disarming at least
+                 * prevents a panic.
+                 */
+                pr_emerg("kmmio: recursive probe hit on CPU %d, "
+                                        "for address 0x%08lx. Ignoring.\n",
+                                        smp_processor_id(), addr);
+                pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
+                                        ctx->addr);
+                goto no_kmmio_ctx;
+        }
+        ctx->active++;
+        ctx->fpage = faultpage;
+        ctx->probe = get_kmmio_probe(addr);
+        ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
+        ctx->addr = addr;
+        if (ctx->probe && ctx->probe->pre_handler)
+                ctx->probe->pre_handler(ctx->probe, regs, addr);
+        /*
+         * Enable single-stepping and disable interrupts for the faulting
+         * context. Local interrupts must not get enabled during stepping.
+         */
+        regs->flags |= X86_EFLAGS_TF;
+        regs->flags &= ~X86_EFLAGS_IF;
+        /* Now we set present bit in PTE and single step. */
+        disarm_kmmio_fault_page(ctx->fpage->page, NULL);
+        /*
+         * If another cpu accesses the same page while we are stepping,
+         * the access will not be caught. It will simply succeed and the
+         * only downside is we lose the event. If this becomes a problem,
+         * the user should drop to single cpu before tracing.
+         */
+        put_cpu_var(kmmio_ctx);
+        return 1; /* fault handled */
+no_kmmio_ctx:
+        put_cpu_var(kmmio_ctx);
+no_kmmio:
+        rcu_read_unlock();
+        preempt_enable_no_resched();
+        return ret;
+}
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ * This must always get called as the pair to kmmio_handler().
+ */
+static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
+{
+        int ret = 0;
+        struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+        if (!ctx->active) {
+                pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+                                                        smp_processor_id());
+                goto out;
+        }
+        if (ctx->probe && ctx->probe->post_handler)
+                ctx->probe->post_handler(ctx->probe, condition, regs);
+        arm_kmmio_fault_page(ctx->fpage->page, NULL);
+        regs->flags &= ~X86_EFLAGS_TF;
+        regs->flags |= ctx->saved_flags;
+        /* These were acquired in kmmio_handler(). */
+        ctx->active--;
+        BUG_ON(ctx->active);
+        rcu_read_unlock();
+        preempt_enable_no_resched();
+        /*
+         * if somebody else is singlestepping across a probe point, flags
+         * will have TF set, in which case, continue the remaining processing
+         * of do_debug, as if this is not a probe hit.
+         */
+        if (!(regs->flags & X86_EFLAGS_TF))
+                ret = 1;
+out:
+        put_cpu_var(kmmio_ctx);
+        return ret;
+}
+/* You must be holding kmmio_lock. */
+static int add_kmmio_fault_page(unsigned long page)
+{
+        struct kmmio_fault_page *f;
+        page &= PAGE_MASK;
+        f = get_kmmio_fault_page(page);
+        if (f) {
+                if (!f->count)
+                        arm_kmmio_fault_page(f->page, NULL);
+                f->count++;
+                return 0;
+        }
+        f = kmalloc(sizeof(*f), GFP_ATOMIC);
+        if (!f)
+                return -1;
+        f->count = 1;
+        f->page = page;
+        list_add_rcu(&f->list, kmmio_page_list(f->page));
+        arm_kmmio_fault_page(f->page, NULL);
+        return 0;
+}
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+                                struct kmmio_fault_page **release_list)
+{
+        struct kmmio_fault_page *f;
+        page &= PAGE_MASK;
+        f = get_kmmio_fault_page(page);
+        if (!f)
+                return;
+        f->count--;
+        BUG_ON(f->count < 0);
+        if (!f->count) {
+                disarm_kmmio_fault_page(f->page, NULL);
+                f->release_next = *release_list;
+                *release_list = f;
+        }
+}
+/*
+ * With page-unaligned ioremaps, one or two armed pages may contain
+ * addresses from outside the intended mapping. Events for these addresses
+ * are currently silently dropped. The events may result only from programming
+ * mistakes by accessing addresses before the beginning or past the end of a
+ * mapping.
+ */
+int register_kmmio_probe(struct kmmio_probe *p)
+{
+        unsigned long flags;
+        int ret = 0;
+        unsigned long size = 0;
+        const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+        spin_lock_irqsave(&kmmio_lock, flags);
+        if (get_kmmio_probe(p->addr)) {
+                ret = -EEXIST;
+                goto out;
+        }
+        kmmio_count++;
+        list_add_rcu(&p->list, &kmmio_probes);
+        while (size < size_lim) {
+                if (add_kmmio_fault_page(p->addr + size))
+                        pr_err("kmmio: Unable to set page fault.\n");
+                size += PAGE_SIZE;
+        }
+out:
+        spin_unlock_irqrestore(&kmmio_lock, flags);
+        /*
+         * XXX: What should I do here?
+         * Here was a call to global_flush_tlb(), but it does not exist
+         * anymore. It seems it's not needed after all.
+         */
+        return ret;
+}
+EXPORT_SYMBOL(register_kmmio_probe);
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+        struct kmmio_delayed_release *dr = container_of(
+                                                head,
+                                                struct kmmio_delayed_release,
+                                                rcu);
+        struct kmmio_fault_page *p = dr->release_list;
+        while (p) {
+                struct kmmio_fault_page *next = p->release_next;
+                BUG_ON(p->count);
+                kfree(p);
+                p = next;
+        }
+        kfree(dr);
+}
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+        struct kmmio_delayed_release *dr = container_of(
+                                                head,
+                                                struct kmmio_delayed_release,
+                                                rcu);
+        struct kmmio_fault_page *p = dr->release_list;
+        struct kmmio_fault_page **prevp = &dr->release_list;
+        unsigned long flags;
+        spin_lock_irqsave(&kmmio_lock, flags);
+        while (p) {
+                if (!p->count)
+                        list_del_rcu(&p->list);
+                else
+                        *prevp = p->release_next;
+                prevp = &p->release_next;
+                p = p->release_next;
+        }
+        spin_unlock_irqrestore(&kmmio_lock, flags);
+        /* This is the real RCU destroy call. */
+        call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore. Only after that
+ * you may actually release your struct kmmio_probe.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ *    Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ *    Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ *    Actally free the kmmio_fault_page structs as with RCU.
+ */
+void unregister_kmmio_probe(struct kmmio_probe *p)
+{
+        unsigned long flags;
+        unsigned long size = 0;
+        const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+        struct kmmio_fault_page *release_list = NULL;
+        struct kmmio_delayed_release *drelease;
+        spin_lock_irqsave(&kmmio_lock, flags);
+        while (size < size_lim) {
+                release_kmmio_fault_page(p->addr + size, &release_list);
+                size += PAGE_SIZE;
+        }
+        list_del_rcu(&p->list);
+        kmmio_count--;
+        spin_unlock_irqrestore(&kmmio_lock, flags);
+        drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+        if (!drelease) {
+                pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+                return;
+        }
+        drelease->release_list = release_list;
+        /*
+         * This is not really RCU here. We have just disarmed a set of
+         * pages so that they cannot trigger page faults anymore. However,
+         * we cannot remove the pages from kmmio_page_table,
+         * because a probe hit might be in flight on another CPU. The
+         * pages are collected into a list, and they will be removed from
+         * kmmio_page_table when it is certain that no probe hit related to
+         * these pages can be in flight. RCU grace period sounds like a
+         * good choice.
+         *
+         * If we removed the pages too early, kmmio page fault handler might
+         * not find the respective kmmio_fault_page and determine it's not
+         * a kmmio fault, when it actually is. This would lead to madness.
+         */
+        call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
+}
+EXPORT_SYMBOL(unregister_kmmio_probe);
+static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
+                                                                void *args)
+{
+        struct die_args *arg = args;
+        if (val == DIE_DEBUG && (arg->err & DR_STEP))
+                if (post_kmmio_handler(arg->err, arg->regs) == 1)
+                        return NOTIFY_STOP;
+        return NOTIFY_DONE;
+}
+static struct notifier_block nb_die = {
+        .notifier_call = kmmio_die_notifier
+};
+static int __init init_kmmio(void)
+{
+        int i;
+        for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+                INIT_LIST_HEAD(&kmmio_page_table[i]);
+        return register_die_notifier(&nb_die);
+}
+fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 000000000000..e7397e108beb
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2005
+ *               Jeff Muizelaar, 2006, 2007
+ *               Pekka Paalanen, 2008 <pq@iki.fi>
+ *
+ * Derived from the read-mod example from relay-examples by Tom Zanussi.
+ */
+#define DEBUG 1
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <asm/pgtable.h>
+#include <linux/mmiotrace.h>
+#include <asm/e820.h> /* for ISA_START_ADDRESS */
+#include <asm/atomic.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include "pf_in.h"
+#define NAME "mmiotrace: "
+struct trap_reason {
+        unsigned long addr;
+        unsigned long ip;
+        enum reason_type type;
+        int active_traces;
+};
+struct remap_trace {
+        struct list_head list;
+        struct kmmio_probe probe;
+        resource_size_t phys;
+        unsigned long id;
+};
+/* Accessed per-cpu. */
+static DEFINE_PER_CPU(struct trap_reason, pf_reason);
+static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
+#if 0 /* XXX: no way gather this info anymore */
+/* Access to this is not per-cpu. */
+static DEFINE_PER_CPU(atomic_t, dropped);
+#endif
+static struct dentry *marker_file;
+static DEFINE_MUTEX(mmiotrace_mutex);
+static DEFINE_SPINLOCK(trace_lock);
+static atomic_t mmiotrace_enabled;
+static LIST_HEAD(trace_list);           /* struct remap_trace */
+/*
+ * Locking in this file:
+ * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
+ * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
+ *   and trace_lock.
+ * - Routines depending on is_enabled() must take trace_lock.
+ * - trace_list users must hold trace_lock.
+ * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - pre/post callbacks assume the effect of is_enabled() being true.
+ */
+/* module parameters */
+static unsigned long    filter_offset;
+static int              nommiotrace;
+static int              trace_pc;
+module_param(filter_offset, ulong, 0);
+module_param(nommiotrace, bool, 0);
+module_param(trace_pc, bool, 0);
+MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
+MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
+MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
+static bool is_enabled(void)
+{
+        return atomic_read(&mmiotrace_enabled);
+}
+#if 0 /* XXX: needs rewrite */
+/*
+ * Write callback for the debugfs entry:
+ * Read a marker and write it to the mmio trace log
+ */
+static ssize_t write_marker(struct file *file, const char __user *buffer,
+                                                size_t count, loff_t *ppos)
+{
+        char *event = NULL;
+        struct mm_io_header *headp;
+        ssize_t len = (count > 65535) ? 65535 : count;
+        event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
+        if (!event)
+                return -ENOMEM;
+        headp = (struct mm_io_header *)event;
+        headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
+        headp->data_len = len;
+        if (copy_from_user(event + sizeof(*headp), buffer, len)) {
+                kfree(event);
+                return -EFAULT;
+        }
+        spin_lock_irq(&trace_lock);
+#if 0 /* XXX: convert this to use tracing */
+        if (is_enabled())
+                relay_write(chan, event, sizeof(*headp) + len);
+        else
+#endif
+                len = -EINVAL;
+        spin_unlock_irq(&trace_lock);
+        kfree(event);
+        return len;
+}
+#endif
+static void print_pte(unsigned long address)
+{
+        unsigned int level;
+        pte_t *pte = lookup_address(address, &level);
+        if (!pte) {
+                pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
+                                                        __func__, address);
+                return;
+        }
+        if (level == PG_LEVEL_2M) {
+                pr_emerg(NAME "4MB pages are not currently supported: "
+                                                        "0x%08lx\n", address);
+                BUG();
+        }
+        pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
+                (unsigned long long)pte_val(*pte),
+                (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
+}
+/*
+ * For some reason the pre/post pairs have been called in an
+ * unmatched order. Report and die.
+ */
+static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
+{
+        const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+        pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
+                                        "last fault for address: 0x%08lx\n",
+                                        addr, my_reason->addr);
+        print_pte(addr);
+        print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
+        print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
+#ifdef __i386__
+        pr_emerg("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+                        regs->ax, regs->bx, regs->cx, regs->dx);
+        pr_emerg("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+                        regs->si, regs->di, regs->bp, regs->sp);
+#else
+        pr_emerg("rax: %016lx   rcx: %016lx   rdx: %016lx\n",
+                                        regs->ax, regs->cx, regs->dx);
+        pr_emerg("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
+                                regs->si, regs->di, regs->bp, regs->sp);
+#endif
+        put_cpu_var(pf_reason);
+        BUG();
+}
+static void pre(struct kmmio_probe *p, struct pt_regs *regs,
+                                                unsigned long addr)
+{
+        struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+        struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+        const unsigned long instptr = instruction_pointer(regs);
+        const enum reason_type type = get_ins_type(instptr);
+        struct remap_trace *trace = p->private;
+        /* it doesn't make sense to have more than one active trace per cpu */
+        if (my_reason->active_traces)
+                die_kmmio_nesting_error(regs, addr);
+        else
+                my_reason->active_traces++;
+        my_reason->type = type;
+        my_reason->addr = addr;
+        my_reason->ip = instptr;
+        my_trace->phys = addr - trace->probe.addr + trace->phys;
+        my_trace->map_id = trace->id;
+        /*
+         * Only record the program counter when requested.
+         * It may taint clean-room reverse engineering.
+         */
+        if (trace_pc)
+                my_trace->pc = instptr;
+        else
+                my_trace->pc = 0;
+        /*
+         * XXX: the timestamp recorded will be *after* the tracing has been
+         * done, not at the time we hit the instruction. SMP implications
+         * on event ordering?
+         */
+        switch (type) {
+        case REG_READ:
+                my_trace->opcode = MMIO_READ;
+                my_trace->width = get_ins_mem_width(instptr);
+                break;
+        case REG_WRITE:
+                my_trace->opcode = MMIO_WRITE;
+                my_trace->width = get_ins_mem_width(instptr);
+                my_trace->value = get_ins_reg_val(instptr, regs);
+                break;
+        case IMM_WRITE:
+                my_trace->opcode = MMIO_WRITE;
+                my_trace->width = get_ins_mem_width(instptr);
+                my_trace->value = get_ins_imm_val(instptr);
+                break;
+        default:
+                {
+                        unsigned char *ip = (unsigned char *)instptr;
+                        my_trace->opcode = MMIO_UNKNOWN_OP;
+                        my_trace->width = 0;
+                        my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
+                                                                *(ip + 2);
+                }
+        }
+        put_cpu_var(cpu_trace);
+        put_cpu_var(pf_reason);
+}
+static void post(struct kmmio_probe *p, unsigned long condition,
+                                                        struct pt_regs *regs)
+{
+        struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+        struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+        /* this should always return the active_trace count to 0 */
+        my_reason->active_traces--;
+        if (my_reason->active_traces) {
+                pr_emerg(NAME "unexpected post handler");
+                BUG();
+        }
+        switch (my_reason->type) {
+        case REG_READ:
+                my_trace->value = get_ins_reg_val(my_reason->ip, regs);
+                break;
+        default:
+                break;
+        }
+        mmio_trace_rw(my_trace);
+        put_cpu_var(cpu_trace);
+        put_cpu_var(pf_reason);
+}
+static void ioremap_trace_core(resource_size_t offset, unsigned long size,
+                                                        void __iomem *addr)
+{
+        static atomic_t next_id;
+        struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
+        /* These are page-unaligned. */
+        struct mmiotrace_map map = {
+                .phys = offset,
+                .virt = (unsigned long)addr,
+                .len = size,
+                .opcode = MMIO_PROBE
+        };
+        if (!trace) {
+                pr_err(NAME "kmalloc failed in ioremap\n");
+                return;
+        }
+        *trace = (struct remap_trace) {
+                .probe = {
+                        .addr = (unsigned long)addr,
+                        .len = size,
+                        .pre_handler = pre,
+                        .post_handler = post,
+                        .private = trace
+                },
+                .phys = offset,
+                .id = atomic_inc_return(&next_id)
+        };
+        map.map_id = trace->id;
+        spin_lock_irq(&trace_lock);
+        if (!is_enabled())
+                goto not_enabled;
+        mmio_trace_mapping(&map);
+        list_add_tail(&trace->list, &trace_list);
+        if (!nommiotrace)
+                register_kmmio_probe(&trace->probe);
+not_enabled:
+        spin_unlock_irq(&trace_lock);
+}
+void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
+                                                void __iomem *addr)
+{
+        if (!is_enabled()) /* recheck and proper locking in *_core() */
+                return;
+        pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
+                                (unsigned long long)offset, size, addr);
+        if ((filter_offset) && (offset != filter_offset))
+                return;
+        ioremap_trace_core(offset, size, addr);
+}
+static void iounmap_trace_core(volatile void __iomem *addr)
+{
+        struct mmiotrace_map map = {
+                .phys = 0,
+                .virt = (unsigned long)addr,
+                .len = 0,
+                .opcode = MMIO_UNPROBE
+        };
+        struct remap_trace *trace;
+        struct remap_trace *tmp;
+        struct remap_trace *found_trace = NULL;
+        pr_debug(NAME "Unmapping %p.\n", addr);
+        spin_lock_irq(&trace_lock);
+        if (!is_enabled())
+                goto not_enabled;
+        list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+                if ((unsigned long)addr == trace->probe.addr) {
+                        if (!nommiotrace)
+                                unregister_kmmio_probe(&trace->probe);
+                        list_del(&trace->list);
+                        found_trace = trace;
+                        break;
+                }
+        }
+        map.map_id = (found_trace) ? found_trace->id : -1;
+        mmio_trace_mapping(&map);
+not_enabled:
+        spin_unlock_irq(&trace_lock);
+        if (found_trace) {
+                synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+                kfree(found_trace);
+        }
+}
+void mmiotrace_iounmap(volatile void __iomem *addr)
+{
+        might_sleep();
+        if (is_enabled()) /* recheck and proper locking in *_core() */
+                iounmap_trace_core(addr);
+}
+static void clear_trace_list(void)
+{
+        struct remap_trace *trace;
+        struct remap_trace *tmp;
+        /*
+         * No locking required, because the caller ensures we are in a
+         * critical section via mutex, and is_enabled() is false,
+         * i.e. nothing can traverse or modify this list.
+         * Caller also ensures is_enabled() cannot change.
+         */
+        list_for_each_entry(trace, &trace_list, list) {
+                pr_notice(NAME "purging non-iounmapped "
+                                        "trace @0x%08lx, size 0x%lx.\n",
+                                        trace->probe.addr, trace->probe.len);
+                if (!nommiotrace)
+                        unregister_kmmio_probe(&trace->probe);
+        }
+        synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+        list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+                list_del(&trace->list);
+                kfree(trace);
+        }
+}
+#ifdef CONFIG_HOTPLUG_CPU
+static cpumask_t downed_cpus;
+static void enter_uniprocessor(void)
+{
+        int cpu;
+        int err;
+        get_online_cpus();
+        downed_cpus = cpu_online_map;
+        cpu_clear(first_cpu(cpu_online_map), downed_cpus);
+        if (num_online_cpus() > 1)
+                pr_notice(NAME "Disabling non-boot CPUs...\n");
+        put_online_cpus();
+        for_each_cpu_mask(cpu, downed_cpus) {
+                err = cpu_down(cpu);
+                if (!err)
+                        pr_info(NAME "CPU%d is down.\n", cpu);
+                else
+                        pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+        }
+        if (num_online_cpus() > 1)
+                pr_warning(NAME "multiple CPUs still online, "
+                                                "may miss events.\n");
+}
+static void leave_uniprocessor(void)
+{
+        int cpu;
+        int err;
+        if (cpus_weight(downed_cpus) == 0)
+                return;
+        pr_notice(NAME "Re-enabling CPUs...\n");
+        for_each_cpu_mask(cpu, downed_cpus) {
+                err = cpu_up(cpu);
+                if (!err)
+                        pr_info(NAME "enabled CPU%d.\n", cpu);
+                else
+                        pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
+        }
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static void enter_uniprocessor(void)
+{
+        if (num_online_cpus() > 1)
+                pr_warning(NAME "multiple CPUs are online, may miss events. "
+                        "Suggest booting with maxcpus=1 kernel argument.\n");
+}
+static void leave_uniprocessor(void)
+{
+}
+#endif
+#if 0 /* XXX: out of order */
+static struct file_operations fops_marker = {
+        .owner =        THIS_MODULE,
+        .write =        write_marker
+};
+#endif
+void enable_mmiotrace(void)
+{
+        mutex_lock(&mmiotrace_mutex);
+        if (is_enabled())
+                goto out;
+#if 0 /* XXX: tracing does not support text entries */
+        marker_file = debugfs_create_file("marker", 0660, dir, NULL,
+                                                                &fops_marker);
+        if (!marker_file)
+                pr_err(NAME "marker file creation failed.\n");
+#endif
+        if (nommiotrace)
+                pr_info(NAME "MMIO tracing disabled.\n");
+        enter_uniprocessor();
+        spin_lock_irq(&trace_lock);
+        atomic_inc(&mmiotrace_enabled);
+        spin_unlock_irq(&trace_lock);
+        pr_info(NAME "enabled.\n");
+out:
+        mutex_unlock(&mmiotrace_mutex);
+}
+void disable_mmiotrace(void)
+{
+        mutex_lock(&mmiotrace_mutex);
+        if (!is_enabled())
+                goto out;
+        spin_lock_irq(&trace_lock);
+        atomic_dec(&mmiotrace_enabled);
+        BUG_ON(is_enabled());
+        spin_unlock_irq(&trace_lock);
+        clear_trace_list(); /* guarantees: no more kmmio callbacks */
+        leave_uniprocessor();
+        if (marker_file) {
+                debugfs_remove(marker_file);
+                marker_file = NULL;
+        }
+        pr_info(NAME "disabled.\n");
+out:
+        mutex_unlock(&mmiotrace_mutex);
+}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index afd40054d157..65c6e46bf059 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -141,7 +141,7 @@ static void cpa_flush_all(unsigned long cache)
 {
        BUG_ON(irqs_disabled());
-        on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
+        on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 static void __cpa_flush_range(void *arg)
@@ -162,7 +162,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
        BUG_ON(irqs_disabled());
        WARN_ON(PAGE_ALIGN(start) != start);
-        on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+        on_each_cpu(__cpa_flush_range, NULL, 1);
        if (!cache)
                return;
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
        return pte_offset_kernel(pmd, address);
 }
+EXPORT_SYMBOL_GPL(lookup_address);
 /*
 * Set the new pmd in all the pgds we know about:
@@ -536,8 +537,14 @@ static int split_large_page(pte_t *kpte, unsigned long address)
                set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
        if (address >= (unsigned long)__va(0) &&
+                address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+                split_page_count(level);
+#ifdef CONFIG_X86_64
+        if (address >= (unsigned long)__va(1UL<<32) &&
                address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
                split_page_count(level);
+#endif
        /*
         * Install the new, split up pagetable. Important details here:
@@ -652,15 +659,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
        struct cpa_data alias_cpa;
        int ret = 0;
-        if (cpa->pfn > max_pfn_mapped)
+        if (cpa->pfn >= max_pfn_mapped)
                return 0;
+#ifdef CONFIG_X86_64
+        if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+                return 0;
+#endif
        /*
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-        if (!within(cpa->vaddr, PAGE_OFFSET,
+        if (!(within(cpa->vaddr, PAGE_OFFSET,
-                    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+                    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+                || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+                    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+        )) {
                alias_cpa = *cpa;
                alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index a885a1019b8a..d4585077977a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
        if (retval < 0)
                return 0;
-        if (pfn <= max_pfn_mapped &&
+        if (((pfn < max_low_pfn_mapped) ||
+             (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                free_memtype(offset, offset + size);
                printk(KERN_INFO
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 000000000000..efa1911e20ca
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
+/*
+ *  Fault Injection Test harness (FI)
+ *  Copyright (C) Intel Crop.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ *  USA.
+ *
+ */
+/*  Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
+ *  Copyright by Intel Crop., 2002
+ *  Louis Zhuang (louis.zhuang@intel.com)
+ *
+ *  Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
+ */
+#include <linux/module.h>
+#include <linux/ptrace.h> /* struct pt_regs */
+#include "pf_in.h"
+#ifdef __i386__
+/* IA32 Manual 3, 2-1 */
+static unsigned char prefix_codes[] = {
+        0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
+        0x65, 0x2E, 0x3E, 0x66, 0x67
+};
+/* IA32 Manual 3, 3-432*/
+static unsigned int reg_rop[] = {
+        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+/* IA32 Manual 3, 3-432*/
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw32[] = {
+        0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw64[] = {};
+#else /* not __i386__ */
+static unsigned char prefix_codes[] = {
+        0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
+        0xF0, 0xF3, 0xF2,
+        /* REX Prefixes */
+        0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
+};
+/* AMD64 Manual 3, Appendix A*/
+static unsigned int reg_rop[] = {
+        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw32[] = {
+        0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+/* 8 bit only */
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+/* 16 bit only */
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+/* 16 or 32 bit */
+static unsigned int mw32[] = { 0xC7 };
+/* 16, 32 or 64 bit */
+static unsigned int mw64[] = { 0x89, 0x8B };
+#endif /* not __i386__ */
+static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
+                                                                int *rexr)
+{
+        int i;
+        unsigned char *p = addr;
+        *shorted = 0;
+        *enlarged = 0;
+        *rexr = 0;
+restart:
+        for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
+                if (*p == prefix_codes[i]) {
+                        if (*p == 0x66)
+                                *shorted = 1;
+#ifdef __amd64__
+                        if ((*p & 0xf8) == 0x48)
+                                *enlarged = 1;
+                        if ((*p & 0xf4) == 0x44)
+                                *rexr = 1;
+#endif
+                        p++;
+                        goto restart;
+                }
+        }
+        return (p - addr);
+}
+static int get_opcode(unsigned char *addr, unsigned int *opcode)
+{
+        int len;
+        if (*addr == 0x0F) {
+                /* 0x0F is extension instruction */
+                *opcode = *(unsigned short *)addr;
+                len = 2;
+        } else {
+                *opcode = *addr;
+                len = 1;
+        }
+        return len;
+}
+#define CHECK_OP_TYPE(opcode, array, type) \
+        for (i = 0; i < ARRAY_SIZE(array); i++) { \
+                if (array[i] == opcode) { \
+                        rv = type; \
+                        goto exit; \
+                } \
+        }
+enum reason_type get_ins_type(unsigned long ins_addr)
+{
+        unsigned int opcode;
+        unsigned char *p;
+        int shorted, enlarged, rexr;
+        int i;
+        enum reason_type rv = OTHERS;
+        p = (unsigned char *)ins_addr;
+        p += skip_prefix(p, &shorted, &enlarged, &rexr);
+        p += get_opcode(p, &opcode);
+        CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
+        CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
+        CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
+exit:
+        return rv;
+}
+#undef CHECK_OP_TYPE
+static unsigned int get_ins_reg_width(unsigned long ins_addr)
+{
+        unsigned int opcode;
+        unsigned char *p;
+        int i, shorted, enlarged, rexr;
+        p = (unsigned char *)ins_addr;
+        p += skip_prefix(p, &shorted, &enlarged, &rexr);
+        p += get_opcode(p, &opcode);
+        for (i = 0; i < ARRAY_SIZE(rw8); i++)
+                if (rw8[i] == opcode)
+                        return 1;
+        for (i = 0; i < ARRAY_SIZE(rw32); i++)
+                if (rw32[i] == opcode)
+                        return (shorted ? 2 : (enlarged ? 8 : 4));
+        printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+        return 0;
+}
+unsigned int get_ins_mem_width(unsigned long ins_addr)
+{
+        unsigned int opcode;
+        unsigned char *p;
+        int i, shorted, enlarged, rexr;
+        p = (unsigned char *)ins_addr;
+        p += skip_prefix(p, &shorted, &enlarged, &rexr);
+        p += get_opcode(p, &opcode);
+        for (i = 0; i < ARRAY_SIZE(mw8); i++)
+                if (mw8[i] == opcode)
+                        return 1;
+        for (i = 0; i < ARRAY_SIZE(mw16); i++)
+                if (mw16[i] == opcode)
+                        return 2;
+        for (i = 0; i < ARRAY_SIZE(mw32); i++)
+                if (mw32[i] == opcode)
+                        return shorted ? 2 : 4;
+        for (i = 0; i < ARRAY_SIZE(mw64); i++)
+                if (mw64[i] == opcode)
+                        return shorted ? 2 : (enlarged ? 8 : 4);
+        printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+        return 0;
+}
+/*
+ * Define register ident in mod/rm byte.
+ * Note: these are NOT the same as in ptrace-abi.h.
+ */
+enum {
+        arg_AL = 0,
+        arg_CL = 1,
+        arg_DL = 2,
+        arg_BL = 3,
+        arg_AH = 4,
+        arg_CH = 5,
+        arg_DH = 6,
+        arg_BH = 7,
+        arg_AX = 0,
+        arg_CX = 1,
+        arg_DX = 2,
+        arg_BX = 3,
+        arg_SP = 4,
+        arg_BP = 5,
+        arg_SI = 6,
+        arg_DI = 7,
+#ifdef __amd64__
+        arg_R8  = 8,
+        arg_R9  = 9,
+        arg_R10 = 10,
+        arg_R11 = 11,
+        arg_R12 = 12,
+        arg_R13 = 13,
+        arg_R14 = 14,
+        arg_R15 = 15
+#endif
+};
+static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+{
+        unsigned char *rv = NULL;
+        switch (no) {
+        case arg_AL:
+                rv = (unsigned char *)&regs->ax;
+                break;
+        case arg_BL:
+                rv = (unsigned char *)&regs->bx;
+                break;
+        case arg_CL:
+                rv = (unsigned char *)&regs->cx;
+                break;
+        case arg_DL:
+                rv = (unsigned char *)&regs->dx;
+                break;
+        case arg_AH:
+                rv = 1 + (unsigned char *)&regs->ax;
+                break;
+        case arg_BH:
+                rv = 1 + (unsigned char *)&regs->bx;
+                break;
+        case arg_CH:
+                rv = 1 + (unsigned char *)&regs->cx;
+                break;
+        case arg_DH:
+                rv = 1 + (unsigned char *)&regs->dx;
+                break;
+#ifdef __amd64__
+        case arg_R8:
+                rv = (unsigned char *)&regs->r8;
+                break;
+        case arg_R9:
+                rv = (unsigned char *)&regs->r9;
+                break;
+        case arg_R10:
+                rv = (unsigned char *)&regs->r10;
+                break;
+        case arg_R11:
+                rv = (unsigned char *)&regs->r11;
+                break;
+        case arg_R12:
+                rv = (unsigned char *)&regs->r12;
+                break;
+        case arg_R13:
+                rv = (unsigned char *)&regs->r13;
+                break;
+        case arg_R14:
+                rv = (unsigned char *)&regs->r14;
+                break;
+        case arg_R15:
+                rv = (unsigned char *)&regs->r15;
+                break;
+#endif
+        default:
+                printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+                break;
+        }
+        return rv;
+}
+static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
+{
+        unsigned long *rv = NULL;
+        switch (no) {
+        case arg_AX:
+                rv = &regs->ax;
+                break;
+        case arg_BX:
+                rv = &regs->bx;
+                break;
+        case arg_CX:
+                rv = &regs->cx;
+                break;
+        case arg_DX:
+                rv = &regs->dx;
+                break;
+        case arg_SP:
+                rv = &regs->sp;
+                break;
+        case arg_BP:
+                rv = &regs->bp;
+                break;
+        case arg_SI:
+                rv = &regs->si;
+                break;
+        case arg_DI:
+                rv = &regs->di;
+                break;
+#ifdef __amd64__
+        case arg_R8:
+                rv = &regs->r8;
+                break;
+        case arg_R9:
+                rv = &regs->r9;
+                break;
+        case arg_R10:
+                rv = &regs->r10;
+                break;
+        case arg_R11:
+                rv = &regs->r11;
+                break;
+        case arg_R12:
+                rv = &regs->r12;
+                break;
+        case arg_R13:
+                rv = &regs->r13;
+                break;
+        case arg_R14:
+                rv = &regs->r14;
+                break;
+        case arg_R15:
+                rv = &regs->r15;
+                break;
+#endif
+        default:
+                printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+        }
+        return rv;
+}
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
+{
+        unsigned int opcode;
+        unsigned char mod_rm;
+        int reg;
+        unsigned char *p;
+        int i, shorted, enlarged, rexr;
+        unsigned long rv;
+        p = (unsigned char *)ins_addr;
+        p += skip_prefix(p, &shorted, &enlarged, &rexr);
+        p += get_opcode(p, &opcode);
+        for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
+                if (reg_rop[i] == opcode) {
+                        rv = REG_READ;
+                        goto do_work;
+                }
+        for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
+                if (reg_wop[i] == opcode) {
+                        rv = REG_WRITE;
+                        goto do_work;
+                }
+        printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
+                                                        "0x%02x\n", opcode);
+        goto err;
+do_work:
+        mod_rm = *p;
+        reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+        switch (get_ins_reg_width(ins_addr)) {
+        case 1:
+                return *get_reg_w8(reg, regs);
+        case 2:
+                return *(unsigned short *)get_reg_w32(reg, regs);
+        case 4:
+                return *(unsigned int *)get_reg_w32(reg, regs);
+#ifdef __amd64__
+        case 8:
+                return *(unsigned long *)get_reg_w32(reg, regs);
+#endif
+        default:
+                printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
+        }
+err:
+        return 0;
+}
+unsigned long get_ins_imm_val(unsigned long ins_addr)
+{
+        unsigned int opcode;
+        unsigned char mod_rm;
+        unsigned char mod;
+        unsigned char *p;
+        int i, shorted, enlarged, rexr;
+        unsigned long rv;
+        p = (unsigned char *)ins_addr;
+        p += skip_prefix(p, &shorted, &enlarged, &rexr);
+        p += get_opcode(p, &opcode);
+        for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
+                if (imm_wop[i] == opcode) {
+                        rv = IMM_WRITE;
+                        goto do_work;
+                }
+        printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
+                                                        "0x%02x\n", opcode);
+        goto err;
+do_work:
+        mod_rm = *p;
+        mod = mod_rm >> 6;
+        p++;
+        switch (mod) {
+        case 0:
+                /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2)  */
+                /* AMD64: XXX Check for address size prefix? */
+                if ((mod_rm & 0x7) == 0x5)
+                        p += 4;
+                break;
+        case 1:
+                p += 1;
+                break;
+        case 2:
+                p += 4;
+                break;
+        case 3:
+        default:
+                printk(KERN_ERR "mmiotrace: not a memory access instruction "
+                                                "at 0x%lx, rm_mod=0x%02x\n",
+                                                ins_addr, mod_rm);
+        }
+        switch (get_ins_reg_width(ins_addr)) {
+        case 1:
+                return *(unsigned char *)p;
+        case 2:
+                return *(unsigned short *)p;
+        case 4:
+                return *(unsigned int *)p;
+#ifdef __amd64__
+        case 8:
+                return *(unsigned long *)p;
+#endif
+        default:
+                printk(KERN_ERR "mmiotrace: Error: width.\n");
+        }
+err:
+        return 0;
+}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 000000000000..e05341a51a27
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
+/*
+ *  Fault Injection Test harness (FI)
+ *  Copyright (C) Intel Crop.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ *  USA.
+ *
+ */
+#ifndef __PF_H_
+#define __PF_H_
+enum reason_type {
+        NOT_ME, /* page fault is not in regions */
+        NOTHING,        /* access others point in regions */
+        REG_READ,       /* read from addr to reg */
+        REG_WRITE,      /* write from reg to addr */
+        IMM_WRITE,      /* write from imm to addr */
+        OTHERS  /* Other instructions can not intercept */
+};
+enum reason_type get_ins_type(unsigned long ins_addr);
+unsigned int get_ins_mem_width(unsigned long ins_addr);
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
+unsigned long get_ins_imm_val(unsigned long ins_addr);
+#endif /* __PF_H_ */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 828907d001e8..b4becbf8c570 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -141,7 +141,6 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
        __flush_tlb_one(vaddr);
 }
-static int fixmaps;
 unsigned long __FIXADDR_TOP = 0xfffff000;
 EXPORT_SYMBOL(__FIXADDR_TOP);
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index f41d67f8f831..1eb2973a301c 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
        num_memory_chunks++;
-        printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
+        printk(KERN_DEBUG "Memory range %08lx to %08lx"
                          " in proximity domain %02x %s\n",
                start_pfn, end_pfn,
-                memory_affinity->memory_type,
                pxm,
                ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
                 "enabled and removable" : "enabled" ) );
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 0fd67b81a8b6..1b4763e26ea9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -100,7 +100,19 @@ static __init inline int srat_disabled(void)
 /* Callback for SLIT parsing */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
-        acpi_slit = slit;
+        unsigned length;
+        unsigned long phys;
+        length = slit->header.length;
+        phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
+                 PAGE_SIZE);
+        if (phys == -1L)
+                panic(" Can not save slit!\n");
+        acpi_slit = __va(phys);
+        memcpy(acpi_slit, slit, length);
+        reserve_early(phys, phys + length, "ACPI SLIT");
 }
 /* Callback for Proximity Domain -> LAPIC mapping */
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 000000000000..d877c5b423ef
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
+/*
+ * Written by Pekka Paalanen, 2008 <pq@iki.fi>
+ */
+#include <linux/module.h>
+#include <linux/io.h>
+#define MODULE_NAME "testmmiotrace"
+static unsigned long mmio_address;
+module_param(mmio_address, ulong, 0);
+MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
+static void do_write_test(void __iomem *p)
+{
+        unsigned int i;
+        for (i = 0; i < 256; i++)
+                iowrite8(i, p + i);
+        for (i = 1024; i < (5 * 1024); i += 2)
+                iowrite16(i * 12 + 7, p + i);
+        for (i = (5 * 1024); i < (16 * 1024); i += 4)
+                iowrite32(i * 212371 + 13, p + i);
+}
+static void do_read_test(void __iomem *p)
+{
+        unsigned int i;
+        for (i = 0; i < 256; i++)
+                ioread8(p + i);
+        for (i = 1024; i < (5 * 1024); i += 2)
+                ioread16(p + i);
+        for (i = (5 * 1024); i < (16 * 1024); i += 4)
+                ioread32(p + i);
+}
+static void do_test(void)
+{
+        void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
+        if (!p) {
+                pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
+                return;
+        }
+        do_write_test(p);
+        do_read_test(p);
+        iounmap(p);
+}
+static int __init init(void)
+{
+        if (mmio_address == 0) {
+                pr_err(MODULE_NAME ": you have to use the module argument "
+                                                        "mmio_address.\n");
+                pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
+                                " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
+                return -ENXIO;
+        }
+        pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
+                                        "in PCI address space, and writing "
+                                        "rubbish in there.\n", mmio_address);
+        do_test();
+        return 0;
+}
+static void __exit cleanup(void)
+{
+        pr_debug(MODULE_NAME ": unloaded.\n");
+}
+module_init(init);
+module_exit(cleanup);
+MODULE_LICENSE("GPL");