14 files changed, 695 insertions, 331 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e2..b7b3e4c7cfc9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
 obj-y   :=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-            pat.o
+            pat.o pgtable.o
 obj-$(CONFIG_X86_32)            += pgtable_32.o
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 18378850e25a..914ccf983687 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
-#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
-/*
- * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
- *
- * These stub functions are needed to compile 32-bit NUMA when SRAT is
- * not set. There are functions in srat_64.c for parsing this table
- * and it may be possible to make them common functions.
- */
-void acpi_numa_slit_init (struct acpi_table_slit *slit)
-{
-        printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
-}
-void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
-{
-}
-void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
-{
-}
-void acpi_numa_arch_fixup(void)
-{
-}
-#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 6791b8334bc6..2c24bea92c66 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -324,7 +324,7 @@ static const struct file_operations ptdump_fops = {
        .release        = single_release,
 };
-int pt_dump_init(void)
+static int pt_dump_init(void)
 {
        struct dentry *pe;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 9cf33d3ee5bc..165c871ba9af 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -155,4 +155,3 @@ EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_to_page);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..de236e419cb5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
        if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
                pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-                paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+                paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
                pud = pud_offset(pgd, 0);
                BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
                                (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
                }
-                paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+                paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
                BUG_ON(page_table != pte_offset_kernel(pmd, 0));
        }
@@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr)
        return 0;
 }
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+        if (pagenr <= 256)
+                return 1;
+        if (!page_is_ram(pagenr))
+                return 1;
+        return 0;
+}
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
@@ -268,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
        pkmap_page_table = pte;
 }
-static void __meminit free_new_highpage(struct page *page)
-{
-        init_page_count(page);
-        __free_page(page);
-        totalhigh_pages++;
-}
 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
        if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
                ClearPageReserved(page);
-                free_new_highpage(page);
+                init_page_count(page);
+                __free_page(page);
+                totalhigh_pages++;
        } else
                SetPageReserved(page);
 }
-static int __meminit
-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
-        free_new_highpage(page);
-        totalram_pages++;
-#ifdef CONFIG_FLATMEM
-        max_mapnr = max(pfn, max_mapnr);
-#endif
-        num_physpages++;
-        return 0;
-}
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM.
- */
-void __meminit online_page(struct page *page)
-{
-        ClearPageReserved(page);
-        add_one_highpage_hotplug(page, page_to_pfn(page));
-}
 #ifndef CONFIG_NUMA
 static void __init set_highmem_pages_init(int bad_ppro)
 {
@@ -365,7 +354,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
                pte_clear(NULL, va, pte);
        }
-        paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
+        paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
 }
 void __init native_pagetable_setup_done(pgd_t *base)
@@ -457,7 +446,7 @@ void zap_low_mappings(void)
         * Note that "pgd_clear()" doesn't do it for
         * us, because pgd_clear() is a no-op on i386.
         */
-        for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+        for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
 #ifdef CONFIG_X86_PAE
                set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
@@ -547,9 +536,9 @@ void __init paging_init(void)
 /*
 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
+ * and also on some strange 486's. All 586+'s are OK. This used to involve
- * used to involve black magic jumps to work around some nasty CPU bugs,
+ * black magic jumps to work around some nasty CPU bugs, but fortunately the
- * but fortunately the switch to using exceptions got rid of all that.
+ * switch to using exceptions got rid of all that.
 */
 static void __init test_wp_bit(void)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..32ba13b0f818 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -135,7 +135,7 @@ static __init void *spp_getpage(void)
        return ptr;
 }
-static __init void
+static void
 set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 {
        pgd_t *pgd;
@@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
        new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
        pte = pte_offset_kernel(pmd, vaddr);
-        if (!pte_none(*pte) &&
+        if (!pte_none(*pte) && pte_val(new_pte) &&
            pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
                pte_ERROR(*pte);
        set_pte(pte, new_pte);
@@ -214,8 +214,7 @@ void __init cleanup_highmap(void)
 }
 /* NOTE: this is meant to be run only at boot */
-void __init
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
        unsigned long address = __fix_to_virt(idx);
@@ -621,15 +620,6 @@ void __init paging_init(void)
 /*
 * Memory hotplug specific functions
 */
-void online_page(struct page *page)
-{
-        ClearPageReserved(page);
-        init_page_count(page);
-        __free_page(page);
-        totalram_pages++;
-        num_physpages++;
-}
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
 * Memory is added always to NORMAL zone. This means you will never get
@@ -664,6 +654,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif /* CONFIG_MEMORY_HOTPLUG */
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+        if (pagenr <= 256)
+                return 1;
+        if (!page_is_ram(pagenr))
+                return 1;
+        return 0;
+}
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
                         kcore_modules, kcore_vsyscall;
@@ -791,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 {
 #ifdef CONFIG_NUMA
-        int nid = phys_to_nid(phys);
+        int nid, next_nid;
 #endif
        unsigned long pfn = phys >> PAGE_SHIFT;
@@ -810,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
        /* Should check here against the e820 map to avoid double free */
 #ifdef CONFIG_NUMA
-        reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+        nid = phys_to_nid(phys);
+        next_nid = phys_to_nid(phys + len - 1);
+        if (nid == next_nid)
+                reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+        else
+                reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #else
        reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #endif
        if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
                dma_reserve += len / PAGE_SIZE;
                set_dma_reserve(dma_reserve);
@@ -907,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 /*
 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
 */
+static long __meminitdata addr_start, addr_end;
+static void __meminitdata *p_start, *p_end;
+static int __meminitdata node_start;
 int __meminit
 vmemmap_populate(struct page *start_page, unsigned long size, int node)
 {
@@ -941,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
                                                        PAGE_KERNEL_LARGE);
                        set_pmd(pmd, __pmd(pte_val(entry)));
-                        printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
+                        /* check to see if we have contiguous blocks */
-                                addr, addr + PMD_SIZE - 1, p, node);
+                        if (p_end != p || node_start != node) {
+                                if (p_start)
+                                        printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+                                                addr_start, addr_end-1, p_start, p_end-1, node_start);
+                                addr_start = addr;
+                                node_start = node;
+                                p_start = p;
+                        }
+                        addr_end = addr + PMD_SIZE;
+                        p_end = p + PMD_SIZE;
                } else {
                        vmemmap_verify((pte_t *)pmd, node, addr, next);
                }
        }
        return 0;
 }
+void __meminit vmemmap_populate_print_last(void)
+{
+        if (p_start) {
+                printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+                        addr_start, addr_end-1, p_start, p_end-1, node_start);
+                p_start = NULL;
+                p_end = NULL;
+                node_start = 0;
+        }
+}
 #endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..71bb3159031a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 * have to convert them into an offset in a page-aligned mapping, but the
 * caller shouldn't need to know that small detail.
 */
-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
+static void __iomem *__ioremap_caller(resource_size_t phys_addr,
-                               unsigned long prot_val)
+                unsigned long size, unsigned long prot_val, void *caller)
 {
        unsigned long pfn, offset, vaddr;
        resource_size_t last_addr;
@@ -149,7 +149,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
         * Don't allow anybody to remap normal RAM that we're using..
         */
        for (pfn = phys_addr >> PAGE_SHIFT;
-                                (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+                                (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK);
+                                pfn++) {
                int is_ram = page_is_ram(pfn);
@@ -176,11 +177,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
                /*
                 * Do not fallback to certain memory types with certain
                 * requested type:
-                 * - request is uncached, return cannot be write-back
+                 * - request is uc-, return cannot be write-back
-                 * - request is uncached, return cannot be write-combine
+                 * - request is uc-, return cannot be write-combine
                 * - request is write-combine, return cannot be write-back
                 */
-                if ((prot_val == _PAGE_CACHE_UC &&
+                if ((prot_val == _PAGE_CACHE_UC_MINUS &&
                     (new_prot_val == _PAGE_CACHE_WB ||
                      new_prot_val == _PAGE_CACHE_WC)) ||
                    (prot_val == _PAGE_CACHE_WC &&
@@ -201,6 +202,9 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
        default:
                prot = PAGE_KERNEL_NOCACHE;
                break;
+        case _PAGE_CACHE_UC_MINUS:
+                prot = PAGE_KERNEL_UC_MINUS;
+                break;
        case _PAGE_CACHE_WC:
                prot = PAGE_KERNEL_WC;
                break;
@@ -212,7 +216,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
        /*
         * Ok, go for it..
         */
-        area = get_vm_area(size, VM_IOREMAP);
+        area = get_vm_area_caller(size, VM_IOREMAP, caller);
        if (!area)
                return NULL;
        area->phys_addr = phys_addr;
@@ -255,7 +259,17 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 */
 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 {
-        return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
+        /*
+         * Ideally, this should be:
+         *      pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
+         *
+         * Till we fix all X drivers to use ioremap_wc(), we will use
+         * UC MINUS.
+         */
+        unsigned long val = _PAGE_CACHE_UC_MINUS;
+        return __ioremap_caller(phys_addr, size, val,
+                                __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_nocache);
@@ -272,7 +286,8 @@ EXPORT_SYMBOL(ioremap_nocache);
 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
 {
        if (pat_wc_enabled)
-                return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
+                return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+                                        __builtin_return_address(0));
        else
                return ioremap_nocache(phys_addr, size);
 }
@@ -280,7 +295,8 @@ EXPORT_SYMBOL(ioremap_wc);
 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 {
-        return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
+        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
+                                __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_cache);
@@ -336,6 +352,35 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+void *xlate_dev_mem_ptr(unsigned long phys)
+{
+        void *addr;
+        unsigned long start = phys & PAGE_MASK;
+        /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
+        if (page_is_ram(start >> PAGE_SHIFT))
+                return __va(phys);
+        addr = (void *)ioremap(start, PAGE_SIZE);
+        if (addr)
+                addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+        return addr;
+}
+void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
+{
+        if (page_is_ram(phys >> PAGE_SHIFT))
+                return;
+        iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+        return;
+}
 #ifdef CONFIG_X86_32
 int __initdata early_ioremap_debug;
@@ -407,7 +452,7 @@ void __init early_ioremap_clear(void)
        pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
        pmd_clear(pmd);
-        paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
+        paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
        __flush_tlb_all();
 }
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 86808e666f9c..1f476e477844 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -13,12 +13,15 @@
 #include <linux/nodemask.h>
 #include <asm/io.h>
 #include <linux/pci_ids.h>
+#include <linux/acpi.h>
 #include <asm/types.h>
 #include <asm/mmzone.h>
 #include <asm/proto.h>
 #include <asm/e820.h>
 #include <asm/pci-direct.h>
 #include <asm/numa.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
 static __init int find_northbridge(void)
 {
@@ -44,6 +47,30 @@ static __init int find_northbridge(void)
        return -1;
 }
+static __init void early_get_boot_cpu_id(void)
+{
+        /*
+         * need to get boot_cpu_id so can use that to create apicid_to_node
+         * in k8_scan_nodes()
+         */
+        /*
+         * Find possible boot-time SMP configuration:
+         */
+        early_find_smp_config();
+#ifdef CONFIG_ACPI
+        /*
+         * Read APIC information from ACPI tables.
+         */
+        early_acpi_boot_init();
+#endif
+        /*
+         * get boot-time SMP configuration:
+         */
+        if (smp_found_config)
+                early_get_smp_config();
+        early_init_lapic_mapping();
+}
 int __init k8_scan_nodes(unsigned long start, unsigned long end)
 {
        unsigned long prevbase;
@@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
        unsigned cores;
        unsigned bits;
        int j;
+        unsigned apicid_base;
        if (!early_pci_allowed())
                return -1;
@@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
        /* use the coreid bits from early_identify_cpu */
        bits = boot_cpu_data.x86_coreid_bits;
        cores = (1<<bits);
+        apicid_base = 0;
+        /* need to get boot_cpu_id early for system with apicid lifting */
+        early_get_boot_cpu_id();
+        if (boot_cpu_physical_apicid > 0) {
+                printk(KERN_INFO "BSP APIC ID: %02x\n",
+                                 boot_cpu_physical_apicid);
+                apicid_base = boot_cpu_physical_apicid;
+        }
        for (i = 0; i < 8; i++) {
                if (nodes[i].start != nodes[i].end) {
                        nodeid = nodeids[i];
-                        for (j = 0; j < cores; j++)
+                        for (j = apicid_base; j < cores + apicid_base; j++)
                                apicid_to_node[(nodeid << bits) + j] = i;
                        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
                }
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b27..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        unsigned long bootmap_start, nodedata_phys;
        void *bootmap;
        const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+        int nid;
        start = round_up(start, ZONE_ALIGN);
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        NODE_DATA(nodeid)->node_start_pfn = start_pfn;
        NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-        /* Find a place for the bootmem map */
+        /*
+         * Find a place for the bootmem map
+         * nodedata_phys could be on other nodes by alloc_bootmem,
+         * so need to sure bootmap_start not to be small, otherwise
+         * early_node_mem will get that with find_e820_area instead
+         * of alloc_bootmem, that could clash with reserved range
+         */
        bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-        bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+        nid = phys_to_nid(nodedata_phys);
+        if (nid == nodeid)
+                bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+        else
+                bootmap_start = round_up(start, PAGE_SIZE);
        /*
         * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
         * to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        free_bootmem_with_active_regions(nodeid, end);
-        reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
+        /*
-                        BOOTMEM_DEFAULT);
+         * convert early reserve to bootmem reserve earlier
-        reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+         * otherwise early_node_mem could use early reserved mem
-                        bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+         * on previous node
+         */
+        early_res_to_bootmem(start, end);
+        /*
+         * in some case early_node_mem could use alloc_bootmem
+         * to get range on other node, don't reserve that again
+         */
+        if (nid != nodeid)
+                printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
+        else
+                reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
+                                        pgdat_size, BOOTMEM_DEFAULT);
+        nid = phys_to_nid(bootmap_start);
+        if (nid != nodeid)
+                printk(KERN_INFO "    bootmap(%d) on node %d\n", nodeid, nid);
+        else
+                reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+                                 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 #ifdef CONFIG_ACPI_NUMA
        srat_reserve_add_area(nodeid);
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f7823a172868..60bcb5b6a37e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
                goto out_unlock;
        pbase = (pte_t *)page_address(base);
-#ifdef CONFIG_X86_32
+        paravirt_alloc_pte(&init_mm, page_to_pfn(base));
-        paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-#endif
        ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 #ifdef CONFIG_X86_64
@@ -779,14 +777,20 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
 int _set_memory_uc(unsigned long addr, int numpages)
 {
+        /*
+         * for now UC MINUS. see comments in ioremap_nocache()
+         */
        return change_page_attr_set(addr, numpages,
-                                    __pgprot(_PAGE_CACHE_UC));
+                                    __pgprot(_PAGE_CACHE_UC_MINUS));
 }
 int set_memory_uc(unsigned long addr, int numpages)
 {
+        /*
+         * for now UC MINUS. see comments in ioremap_nocache()
+         */
        if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
-                            _PAGE_CACHE_UC, NULL))
+                            _PAGE_CACHE_UC_MINUS, NULL))
                return -EINVAL;
        return _set_memory_uc(addr, numpages);
@@ -993,7 +997,7 @@ static const struct file_operations dpa_fops = {
        .release        = single_release,
 };
-int __init debug_pagealloc_proc_init(void)
+static int __init debug_pagealloc_proc_init(void)
 {
        struct dentry *de;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 72c0f6097402..277446cd30b6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,16 +11,19 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/fs.h>
+#include <linux/bootmem.h>
 #include <asm/msr.h>
 #include <asm/tlbflush.h>
 #include <asm/processor.h>
+#include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pat.h>
 #include <asm/e820.h>
 #include <asm/cacheflush.h>
 #include <asm/fcntl.h>
 #include <asm/mtrr.h>
+#include <asm/io.h>
 int pat_wc_enabled = 1;
@@ -190,6 +193,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
        return 0;
 }
+/*
+ * req_type typically has one of the:
+ * - _PAGE_CACHE_WB
+ * - _PAGE_CACHE_WC
+ * - _PAGE_CACHE_UC_MINUS
+ * - _PAGE_CACHE_UC
+ *
+ * req_type will have a special case value '-1', when requester want to inherit
+ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
+ *
+ * If ret_type is NULL, function will return an error if it cannot reserve the
+ * region with req_type. If ret_type is non-null, function will return
+ * available type in ret_type in case of no error. In case of any error
+ * it will return a negative return value.
+ */
 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
                        unsigned long *ret_type)
 {
@@ -200,9 +218,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
        /* Only track when pat_wc_enabled */
        if (!pat_wc_enabled) {
-                if (ret_type)
+                /* This is identical to page table setting without PAT */
-                        *ret_type = req_type;
+                if (ret_type) {
+                        if (req_type == -1) {
+                                *ret_type = _PAGE_CACHE_WB;
+                        } else {
+                                *ret_type = req_type;
+                        }
+                }
                return 0;
        }
@@ -214,8 +237,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
                return 0;
        }
-        req_type &= _PAGE_CACHE_MASK;
+        if (req_type == -1) {
-        err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+                /*
+                 * Special case where caller wants to inherit from mtrr or
+                 * existing pat mapping, defaulting to UC_MINUS in case of
+                 * no match.
+                 */
+                u8 mtrr_type = mtrr_type_lookup(start, end);
+                if (mtrr_type == 0xFE) { /* MTRR match error */
+                        err = -1;
+                }
+                if (mtrr_type == MTRR_TYPE_WRBACK) {
+                        req_type = _PAGE_CACHE_WB;
+                        actual_type = _PAGE_CACHE_WB;
+                } else {
+                        req_type = _PAGE_CACHE_UC_MINUS;
+                        actual_type = _PAGE_CACHE_UC_MINUS;
+                }
+        } else {
+                req_type &= _PAGE_CACHE_MASK;
+                err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+        }
        if (err) {
                if (ret_type)
                        *ret_type = actual_type;
@@ -241,7 +285,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
                struct memtype *saved_ptr;
                if (parse->start >= end) {
-                        printk("New Entry\n");
+                        pr_debug("New Entry\n");
                        list_add(&new_entry->nd, parse->nd.prev);
                        new_entry = NULL;
                        break;
@@ -291,7 +335,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
                                break;
                        }
-                        printk("Overlap at 0x%Lx-0x%Lx\n",
+                        pr_debug("Overlap at 0x%Lx-0x%Lx\n",
                               saved_ptr->start, saved_ptr->end);
                        /* No conflict. Go ahead and add this new entry */
                        list_add(&new_entry->nd, saved_ptr->nd.prev);
@@ -343,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
                                break;
                        }
-                        printk("Overlap at 0x%Lx-0x%Lx\n",
+                        pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
-                               saved_ptr->start, saved_ptr->end);
+                                 saved_ptr->start, saved_ptr->end);
                        /* No conflict. Go ahead and add this new entry */
                        list_add(&new_entry->nd, &saved_ptr->nd);
                        new_entry = NULL;
@@ -353,7 +397,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
        }
        if (err) {
-                printk(
+                printk(KERN_INFO
        "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
                        start, end, cattr_name(new_entry->type),
                        cattr_name(req_type));
@@ -365,16 +409,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
        if (new_entry) {
                /* No conflict. Not yet added to the list. Add to the tail */
                list_add_tail(&new_entry->nd, &memtype_list);
-                printk("New Entry\n");
+                pr_debug("New Entry\n");
-        }
+        }
        if (ret_type) {
-                printk(
+                pr_debug(
        "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
                        start, end, cattr_name(actual_type),
                        cattr_name(req_type), cattr_name(*ret_type));
        } else {
-                printk(
+                pr_debug(
        "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
                        start, end, cattr_name(actual_type),
                        cattr_name(req_type));
@@ -411,11 +455,142 @@ int free_memtype(u64 start, u64 end)
        spin_unlock(&memtype_lock);
        if (err) {
-                printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
+                printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
                        current->comm, current->pid, start, end);
        }
-        printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
+        pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
        return err;
 }
+/*
+ * /dev/mem mmap interface. The memtype used for mapping varies:
+ * - Use UC for mappings with O_SYNC flag
+ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
+ *   inherit the memtype from existing mapping.
+ * - Else use UC_MINUS memtype (for backward compatibility with existing
+ *   X drivers.
+ */
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+                                unsigned long size, pgprot_t vma_prot)
+{
+        return vma_prot;
+}
+#ifdef CONFIG_NONPROMISC_DEVMEM
+/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+        return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+        u64 from = ((u64)pfn) << PAGE_SHIFT;
+        u64 to = from + size;
+        u64 cursor = from;
+        while (cursor < to) {
+                if (!devmem_is_allowed(pfn)) {
+                        printk(KERN_INFO
+                "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
+                                current->comm, from, to);
+                        return 0;
+                }
+                cursor += PAGE_SIZE;
+                pfn++;
+        }
+        return 1;
+}
+#endif /* CONFIG_NONPROMISC_DEVMEM */
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+                                unsigned long size, pgprot_t *vma_prot)
+{
+        u64 offset = ((u64) pfn) << PAGE_SHIFT;
+        unsigned long flags = _PAGE_CACHE_UC_MINUS;
+        int retval;
+        if (!range_is_allowed(pfn, size))
+                return 0;
+        if (file->f_flags & O_SYNC) {
+                flags = _PAGE_CACHE_UC;
+        }
+#ifdef CONFIG_X86_32
+        /*
+         * On the PPro and successors, the MTRRs are used to set
+         * memory types for physical addresses outside main memory,
+         * so blindly setting UC or PWT on those pages is wrong.
+         * For Pentiums and earlier, the surround logic should disable
+         * caching for the high addresses through the KEN pin, but
+         * we maintain the tradition of paranoia in this code.
+         */
+        if (!pat_wc_enabled &&
+            ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
+                test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
+                test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
+                test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
+           (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+                flags = _PAGE_CACHE_UC;
+        }
+#endif
+        /*
+         * With O_SYNC, we can only take UC mapping. Fail if we cannot.
+         * Without O_SYNC, we want to get
+         * - WB for WB-able memory and no other conflicting mappings
+         * - UC_MINUS for non-WB-able memory with no other conflicting mappings
+         * - Inherit from confliting mappings otherwise
+         */
+        if (flags != _PAGE_CACHE_UC_MINUS) {
+                retval = reserve_memtype(offset, offset + size, flags, NULL);
+        } else {
+                retval = reserve_memtype(offset, offset + size, -1, &flags);
+        }
+        if (retval < 0)
+                return 0;
+        if (pfn <= max_pfn_mapped &&
+            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+                free_memtype(offset, offset + size);
+                printk(KERN_INFO
+                "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
+                        current->comm, current->pid,
+                        cattr_name(flags),
+                        offset, offset + size);
+                return 0;
+        }
+        *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+                             flags);
+        return 1;
+}
+void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+        u64 addr = (u64)pfn << PAGE_SHIFT;
+        unsigned long flags;
+        unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+        reserve_memtype(addr, addr + size, want_flags, &flags);
+        if (flags != want_flags) {
+                printk(KERN_INFO
+                "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
+                        current->comm, current->pid,
+                        cattr_name(want_flags),
+                        addr, addr + size,
+                        cattr_name(flags));
+        }
+}
+void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+        u64 addr = (u64)pfn << PAGE_SHIFT;
+        free_memtype(addr, addr + size);
+}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 000000000000..50159764f694
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,276 @@
+#include <linux/mm.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+        return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+}
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+        struct page *pte;
+#ifdef CONFIG_HIGHPTE
+        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+#else
+        pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
+        if (pte)
+                pgtable_page_ctor(pte);
+        return pte;
+}
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+        pgtable_page_dtor(pte);
+        paravirt_release_pte(page_to_pfn(pte));
+        tlb_remove_page(tlb, pte);
+}
+#if PAGETABLE_LEVELS > 2
+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+        paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+        tlb_remove_page(tlb, virt_to_page(pmd));
+}
+#if PAGETABLE_LEVELS > 3
+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+{
+        paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
+        tlb_remove_page(tlb, virt_to_page(pud));
+}
+#endif  /* PAGETABLE_LEVELS > 3 */
+#endif  /* PAGETABLE_LEVELS > 2 */
+static inline void pgd_list_add(pgd_t *pgd)
+{
+        struct page *page = virt_to_page(pgd);
+        list_add(&page->lru, &pgd_list);
+}
+static inline void pgd_list_del(pgd_t *pgd)
+{
+        struct page *page = virt_to_page(pgd);
+        list_del(&page->lru);
+}
+#define UNSHARED_PTRS_PER_PGD                           \
+        (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+static void pgd_ctor(void *p)
+{
+        pgd_t *pgd = p;
+        unsigned long flags;
+        /* Clear usermode parts of PGD */
+        memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
+        spin_lock_irqsave(&pgd_lock, flags);
+        /* If the pgd points to a shared pagetable level (either the
+           ptes in non-PAE, or shared PMD in PAE), then just copy the
+           references from swapper_pg_dir. */
+        if (PAGETABLE_LEVELS == 2 ||
+            (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
+            PAGETABLE_LEVELS == 4) {
+                clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
+                                swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+                                KERNEL_PGD_PTRS);
+                paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
+                                         __pa(swapper_pg_dir) >> PAGE_SHIFT,
+                                         KERNEL_PGD_BOUNDARY,
+                                         KERNEL_PGD_PTRS);
+        }
+        /* list required to sync kernel mapping updates */
+        if (!SHARED_KERNEL_PMD)
+                pgd_list_add(pgd);
+        spin_unlock_irqrestore(&pgd_lock, flags);
+}
+static void pgd_dtor(void *pgd)
+{
+        unsigned long flags; /* can be called from interrupt context */
+        if (SHARED_KERNEL_PMD)
+                return;
+        spin_lock_irqsave(&pgd_lock, flags);
+        pgd_list_del(pgd);
+        spin_unlock_irqrestore(&pgd_lock, flags);
+}
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * -- wli
+ */
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+{
+        int i;
+        for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+                pgd_t pgd = pgdp[i];
+                if (pgd_val(pgd) != 0) {
+                        pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+                        pgdp[i] = native_make_pgd(0);
+                        paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
+                        pmd_free(mm, pmd);
+                }
+        }
+}
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update.  Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+        pud_t *pud;
+        unsigned long addr;
+        int i;
+        pud = pud_offset(pgd, 0);
+        for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+             i++, pud++, addr += PUD_SIZE) {
+                pmd_t *pmd = pmd_alloc_one(mm, addr);
+                if (!pmd) {
+                        pgd_mop_up_pmds(mm, pgd);
+                        return 0;
+                }
+                if (i >= KERNEL_PGD_BOUNDARY)
+                        memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+                               sizeof(pmd_t) * PTRS_PER_PMD);
+                pud_populate(mm, pud, pmd);
+        }
+        return 1;
+}
+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+        paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
+        /* Note: almost everything apart from _PAGE_PRESENT is
+           reserved at the pmd (PDPT) level. */
+        set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+        /*
+         * According to Intel App note "TLBs, Paging-Structure Caches,
+         * and Their Invalidation", April 2007, document 317080-001,
+         * section 8.1: in PAE mode we explicitly have to flush the
+         * TLB via cr3 if the top-level pgd is changed...
+         */
+        if (mm == current->active_mm)
+                write_cr3(read_cr3());
+}
+#else  /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+        return 1;
+}
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
+{
+}
+#endif  /* CONFIG_X86_PAE */
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+        pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+        /* so that alloc_pmd can use it */
+        mm->pgd = pgd;
+        if (pgd)
+                pgd_ctor(pgd);
+        if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+                pgd_dtor(pgd);
+                free_page((unsigned long)pgd);
+                pgd = NULL;
+        }
+        return pgd;
+}
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+        pgd_mop_up_pmds(mm, pgd);
+        pgd_dtor(pgd);
+        free_page((unsigned long)pgd);
+}
+int ptep_set_access_flags(struct vm_area_struct *vma,
+                          unsigned long address, pte_t *ptep,
+                          pte_t entry, int dirty)
+{
+        int changed = !pte_same(*ptep, entry);
+        if (changed && dirty) {
+                *ptep = entry;
+                pte_update_defer(vma->vm_mm, address, ptep);
+                flush_tlb_page(vma, address);
+        }
+        return changed;
+}
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+                              unsigned long addr, pte_t *ptep)
+{
+        int ret = 0;
+        if (pte_young(*ptep))
+                ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+                                         &ptep->pte);
+        if (ret)
+                pte_update(vma->vm_mm, addr, ptep);
+        return ret;
+}
+int ptep_clear_flush_young(struct vm_area_struct *vma,
+                           unsigned long address, pte_t *ptep)
+{
+        int young;
+        young = ptep_test_and_clear_young(vma, address, ptep);
+        if (young)
+                flush_tlb_page(vma, address);
+        return young;
+}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6fb9e7c6893f..9ee007be9142 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve)
        __VMALLOC_RESERVE += reserve;
 }
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-        return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-}
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-        struct page *pte;
-#ifdef CONFIG_HIGHPTE
-        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-#else
-        pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-#endif
-        if (pte)
-                pgtable_page_ctor(pte);
-        return pte;
-}
-/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
- * against pageattr.c; it is the unique case in which a valid change
- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
- * vmalloc faults work because attached pagetables are never freed.
- * -- wli
- */
-static inline void pgd_list_add(pgd_t *pgd)
-{
-        struct page *page = virt_to_page(pgd);
-        list_add(&page->lru, &pgd_list);
-}
-static inline void pgd_list_del(pgd_t *pgd)
-{
-        struct page *page = virt_to_page(pgd);
-        list_del(&page->lru);
-}
-#define UNSHARED_PTRS_PER_PGD                           \
-        (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-static void pgd_ctor(void *p)
-{
-        pgd_t *pgd = p;
-        unsigned long flags;
-        /* Clear usermode parts of PGD */
-        memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-        spin_lock_irqsave(&pgd_lock, flags);
-        /* If the pgd points to a shared pagetable level (either the
-           ptes in non-PAE, or shared PMD in PAE), then just copy the
-           references from swapper_pg_dir. */
-        if (PAGETABLE_LEVELS == 2 ||
-            (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
-                clone_pgd_range(pgd + USER_PTRS_PER_PGD,
-                                swapper_pg_dir + USER_PTRS_PER_PGD,
-                                KERNEL_PGD_PTRS);
-                paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-                                        __pa(swapper_pg_dir) >> PAGE_SHIFT,
-                                        USER_PTRS_PER_PGD,
-                                        KERNEL_PGD_PTRS);
-        }
-        /* list required to sync kernel mapping updates */
-        if (!SHARED_KERNEL_PMD)
-                pgd_list_add(pgd);
-        spin_unlock_irqrestore(&pgd_lock, flags);
-}
-static void pgd_dtor(void *pgd)
-{
-        unsigned long flags; /* can be called from interrupt context */
-        if (SHARED_KERNEL_PMD)
-                return;
-        spin_lock_irqsave(&pgd_lock, flags);
-        pgd_list_del(pgd);
-        spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#ifdef CONFIG_X86_PAE
-/*
- * Mop up any pmd pages which may still be attached to the pgd.
- * Normally they will be freed by munmap/exit_mmap, but any pmd we
- * preallocate which never got a corresponding vma will need to be
- * freed manually.
- */
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-        int i;
-        for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
-                pgd_t pgd = pgdp[i];
-                if (pgd_val(pgd) != 0) {
-                        pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
-                        pgdp[i] = native_make_pgd(0);
-                        paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
-                        pmd_free(mm, pmd);
-                }
-        }
-}
-/*
- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
- * updating the top-level pagetable entries to guarantee the
- * processor notices the update.  Since this is expensive, and
- * all 4 top-level entries are used almost immediately in a
- * new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
- */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-        pud_t *pud;
-        unsigned long addr;
-        int i;
-        pud = pud_offset(pgd, 0);
-        for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
-             i++, pud++, addr += PUD_SIZE) {
-                pmd_t *pmd = pmd_alloc_one(mm, addr);
-                if (!pmd) {
-                        pgd_mop_up_pmds(mm, pgd);
-                        return 0;
-                }
-                if (i >= USER_PTRS_PER_PGD)
-                        memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
-                               sizeof(pmd_t) * PTRS_PER_PMD);
-                pud_populate(mm, pud, pmd);
-        }
-        return 1;
-}
-#else  /* !CONFIG_X86_PAE */
-/* No need to prepopulate any pagetable entries in non-PAE modes. */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-        return 1;
-}
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-}
-#endif  /* CONFIG_X86_PAE */
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-        pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-        /* so that alloc_pd can use it */
-        mm->pgd = pgd;
-        if (pgd)
-                pgd_ctor(pgd);
-        if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-                pgd_dtor(pgd);
-                free_page((unsigned long)pgd);
-                pgd = NULL;
-        }
-        return pgd;
-}
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-        pgd_mop_up_pmds(mm, pgd);
-        pgd_dtor(pgd);
-        free_page((unsigned long)pgd);
-}
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
-{
-        pgtable_page_dtor(pte);
-        paravirt_release_pt(page_to_pfn(pte));
-        tlb_remove_page(tlb, pte);
-}
-#ifdef CONFIG_X86_PAE
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-{
-        paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-        tlb_remove_page(tlb, virt_to_page(pmd));
-}
-#endif
 int pmd_bad(pmd_t pmd)
 {
        WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index fb43d89f46f3..3890234e5b26 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -163,7 +163,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
               pxm, apic_id, node);
 }
-int update_end_of_memory(unsigned long end) {return -1;}
+static int update_end_of_memory(unsigned long end) {return -1;}
 static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}