10 files changed, 159 insertions, 195 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d406c5239019..e71c5cbc8f35 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -266,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        if (!after_bootmem)
                find_early_table_space(end, use_pse, use_gbpages);
-#ifdef CONFIG_X86_32
-        for (i = 0; i < nr_range; i++)
-                kernel_physical_mapping_init(mr[i].start, mr[i].end,
-                                             mr[i].page_size_mask);
-        ret = end;
-#else /* CONFIG_X86_64 */
        for (i = 0; i < nr_range; i++)
                ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
                                                   mr[i].page_size_mask);
-#endif
 #ifdef CONFIG_X86_32
        early_ioremap_page_table_range_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9a0c258a86be..2226f2c70ea3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
                             unsigned long page_size_mask)
 {
        int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
+        unsigned long last_map_addr = end;
        unsigned long start_pfn, end_pfn;
        pgd_t *pgd_base = swapper_pg_dir;
        int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
                                        prot = PAGE_KERNEL_EXEC;
                                pages_4k++;
-                                if (mapping_iter == 1)
+                                if (mapping_iter == 1) {
                                        set_pte(pte, pfn_pte(pfn, init_prot));
-                                else
+                                        last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
+                                } else
                                        set_pte(pte, pfn_pte(pfn, prot));
                        }
                }
@@ -368,7 +370,7 @@ repeat:
                mapping_iter = 2;
                goto repeat;
        }
-        return 0;
+        return last_map_addr;
 }
 pte_t *kmap_pte;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c246d259822d..5eb1ba74a3a9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
 #include "physaddr.h"
-int page_is_ram(unsigned long pagenr)
-{
-        resource_size_t addr, end;
-        int i;
-        /*
-         * A special case is the first 4Kb of memory;
-         * This is a BIOS owned area, not kernel ram, but generally
-         * not listed as such in the E820 table.
-         */
-        if (pagenr == 0)
-                return 0;
-        /*
-         * Second special case: Some BIOSen report the PC BIOS
-         * area (640->1Mb) as ram even though it is not.
-         */
-        if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-                    pagenr < (BIOS_END >> PAGE_SHIFT))
-                return 0;
-        for (i = 0; i < e820.nr_map; i++) {
-                /*
-                 * Not usable memory:
-                 */
-                if (e820.map[i].type != E820_RAM)
-                        continue;
-                addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
-                end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-                if ((pagenr >= addr) && (pagenr < end))
-                        return 1;
-        }
-        return 0;
-}
 /*
 * Fix up the linear direct mapping of the kernel to avoid cache attribute
 * conflicts.
@@ -422,6 +385,10 @@ void __init early_ioremap_init(void)
         * The boot-ioremap range spans multiple pmds, for which
         * we are not prepared:
         */
+#define __FIXADDR_TOP (-PAGE_SIZE)
+        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+#undef __FIXADDR_TOP
        if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
                WARN_ON(1);
                printk(KERN_WARNING "pmd %p != %p\n",
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 8cc183344140..b3b531a4f8e5 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
        if (!shadow)
                return true;
-        status = kmemcheck_shadow_test(shadow, size);
+        status = kmemcheck_shadow_test_all(shadow, size);
        return status == KMEMCHECK_SHADOW_INITIALIZED;
 }
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index 3f66b82076a3..aec124214d97 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
 enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
 {
+#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
        uint8_t *x;
        unsigned int i;
        x = shadow;
-#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
        /*
         * Make sure _some_ bytes are initialized. Gcc frequently generates
         * code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
                if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
                        return x[i];
        }
+        return x[0];
 #else
+        return kmemcheck_shadow_test_all(shadow, size);
+#endif
+}
+enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
+{
+        uint8_t *x;
+        unsigned int i;
+        x = shadow;
        /* All bytes must be initialized. */
        for (i = 0; i < size; ++i) {
                if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
                        return x[i];
        }
-#endif
        return x[0];
 }
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index af46d9ab9d86..ff0b2f70fbcb 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
 void *kmemcheck_shadow_lookup(unsigned long address);
 enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
+enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
+                                                unsigned int size);
 void kmemcheck_shadow_set(void *shadow, unsigned int size);
 #endif
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191defc38a..1dab5194fd9d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
-        if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+        if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
                return 1;
        return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
 static unsigned long mmap_base(void)
 {
-        unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+        unsigned long gap = rlimit(RLIMIT_STACK);
        if (gap < MIN_GAP)
                gap = MIN_GAP;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 83bbc70d11bb..3307ea8bd43a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -427,7 +427,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
         * Calculate the number of big nodes that can be allocated as a result
         * of consolidating the remainder.
         */
-        big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) /
+        big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
                FAKE_NODE_MIN_SIZE;
        size &= FAKE_NODE_MIN_HASH_MASK;
@@ -502,77 +502,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
 }
 /*
- * Splits num_nodes nodes up equally starting at node_start.  The return value
+ * Returns the end address of a node so that there is at least `size' amount of
- * is the number of nodes split up and addr is adjusted to be at the end of the
+ * non-reserved memory or `max_addr' is reached.
- * last node allocated.
 */
-static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start,
+static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-                                      int num_nodes)
 {
-        unsigned int big;
+        u64 end = start + size;
-        u64 size;
-        int i;
-        if (num_nodes <= 0)
-                return -1;
-        if (num_nodes > MAX_NUMNODES)
-                num_nodes = MAX_NUMNODES;
-        size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
-               num_nodes;
-        /*
-         * Calculate the number of big nodes that can be allocated as a result
-         * of consolidating the leftovers.
-         */
-        big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
-              FAKE_NODE_MIN_SIZE;
-        /* Round down to nearest FAKE_NODE_MIN_SIZE. */
-        size &= FAKE_NODE_MIN_HASH_MASK;
-        if (!size) {
-                printk(KERN_ERR "Not enough memory for each node.  "
-                       "NUMA emulation disabled.\n");
-                return -1;
-        }
-        for (i = node_start; i < num_nodes + node_start; i++) {
-                u64 end = *addr + size;
-                if (i < big)
+        while (end - start - e820_hole_size(start, end) < size) {
-                        end += FAKE_NODE_MIN_SIZE;
+                end += FAKE_NODE_MIN_SIZE;
-                /*
+                if (end > max_addr) {
-                 * The final node can have the remaining system RAM.  Other
-                 * nodes receive roughly the same amount of available pages.
-                 */
-                if (i == num_nodes + node_start - 1)
                        end = max_addr;
-                else
-                        while (end - *addr - e820_hole_size(*addr, end) <
-                               size) {
-                                end += FAKE_NODE_MIN_SIZE;
-                                if (end > max_addr) {
-                                        end = max_addr;
-                                        break;
-                                }
-                        }
-                if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
                        break;
+                }
        }
-        return i - node_start + 1;
+        return end;
 }
 /*
- * Splits the remaining system RAM into chunks of size.  The remaining memory is
+ * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * always assigned to a final node and can be asymmetric.  Returns the number of
+ * `addr' to `max_addr'.  The return value is the number of nodes allocated.
- * nodes split.
 */
-static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
+static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
-                                      u64 size)
 {
-        int i = node_start;
+        nodemask_t physnode_mask = NODE_MASK_NONE;
-        size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
+        u64 min_size;
-        while (!setup_node_range(i++, addr, size, max_addr))
+        int ret = 0;
-                ;
+        int i;
-        return i - node_start;
+        if (!size)
+                return -1;
+        /*
+         * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
+         * increased accordingly if the requested size is too small.  This
+         * creates a uniform distribution of node sizes across the entire
+         * machine (but not necessarily over physical nodes).
+         */
+        min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
+                                                MAX_NUMNODES;
+        min_size = max(min_size, FAKE_NODE_MIN_SIZE);
+        if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
+                min_size = (min_size + FAKE_NODE_MIN_SIZE) &
+                                                FAKE_NODE_MIN_HASH_MASK;
+        if (size < min_size) {
+                pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
+                        size >> 20, min_size >> 20);
+                size = min_size;
+        }
+        size &= FAKE_NODE_MIN_HASH_MASK;
+        for (i = 0; i < MAX_NUMNODES; i++)
+                if (physnodes[i].start != physnodes[i].end)
+                        node_set(i, physnode_mask);
+        /*
+         * Fill physical nodes with fake nodes of size until there is no memory
+         * left on any of them.
+         */
+        while (nodes_weight(physnode_mask)) {
+                for_each_node_mask(i, physnode_mask) {
+                        u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+                        u64 end;
+                        end = find_end_of_node(physnodes[i].start,
+                                                physnodes[i].end, size);
+                        /*
+                         * If there won't be at least FAKE_NODE_MIN_SIZE of
+                         * non-reserved memory in ZONE_DMA32 for the next node,
+                         * this one must extend to the boundary.
+                         */
+                        if (end < dma32_end && dma32_end - end -
+                            e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+                                end = dma32_end;
+                        /*
+                         * If there won't be enough non-reserved memory for the
+                         * next node, this one must extend to the end of the
+                         * physical node.
+                         */
+                        if (physnodes[i].end - end -
+                            e820_hole_size(end, physnodes[i].end) < size)
+                                end = physnodes[i].end;
+                        /*
+                         * Setup the fake node that will be allocated as bootmem
+                         * later.  If setup_node_range() returns non-zero, there
+                         * is no more memory available on this physical node.
+                         */
+                        if (setup_node_range(ret++, &physnodes[i].start,
+                                                end - physnodes[i].start,
+                                                physnodes[i].end) < 0)
+                                node_clear(i, physnode_mask);
+                }
+        }
+        return ret;
 }
 /*
@@ -582,87 +604,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
 static int __init numa_emulation(unsigned long start_pfn,
                        unsigned long last_pfn, int acpi, int k8)
 {
-        u64 size, addr = start_pfn << PAGE_SHIFT;
+        u64 addr = start_pfn << PAGE_SHIFT;
        u64 max_addr = last_pfn << PAGE_SHIFT;
-        int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
        int num_phys_nodes;
+        int num_nodes;
+        int i;
        num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
        /*
-         * If the numa=fake command-line is just a single number N, split the
+         * If the numa=fake command-line contains a 'M' or 'G', it represents
-         * system RAM into N fake nodes.
+         * the fixed node size.  Otherwise, if it is just a single number N,
+         * split the system RAM into N fake nodes.
         */
-        if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
+        if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
-                long n = simple_strtol(cmdline, NULL, 0);
+                u64 size;
-                num_nodes = split_nodes_interleave(addr, max_addr,
-                                                        num_phys_nodes, n);
-                if (num_nodes < 0)
-                        return num_nodes;
-                goto out;
-        }
-        /* Parse the command line. */
+                size = memparse(cmdline, &cmdline);
-        for (coeff_flag = 0; ; cmdline++) {
+                num_nodes = split_nodes_size_interleave(addr, max_addr, size);
-                if (*cmdline && isdigit(*cmdline)) {
+        } else {
-                        num = num * 10 + *cmdline - '0';
+                unsigned long n;
-                        continue;
-                }
+                n = simple_strtoul(cmdline, NULL, 0);
-                if (*cmdline == '*') {
+                num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
-                        if (num > 0)
-                                coeff = num;
-                        coeff_flag = 1;
-                }
-                if (!*cmdline || *cmdline == ',') {
-                        if (!coeff_flag)
-                                coeff = 1;
-                        /*
-                         * Round down to the nearest FAKE_NODE_MIN_SIZE.
-                         * Command-line coefficients are in megabytes.
-                         */
-                        size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
-                        if (size)
-                                for (i = 0; i < coeff; i++, num_nodes++)
-                                        if (setup_node_range(num_nodes, &addr,
-                                                size, max_addr) < 0)
-                                                goto done;
-                        if (!*cmdline)
-                                break;
-                        coeff_flag = 0;
-                        coeff = -1;
-                }
-                num = 0;
-        }
-done:
-        if (!num_nodes)
-                return -1;
-        /* Fill remainder of system RAM, if appropriate. */
-        if (addr < max_addr) {
-                if (coeff_flag && coeff < 0) {
-                        /* Split remaining nodes into num-sized chunks */
-                        num_nodes += split_nodes_by_size(&addr, max_addr,
-                                                         num_nodes, num);
-                        goto out;
-                }
-                switch (*(cmdline - 1)) {
-                case '*':
-                        /* Split remaining nodes into coeff chunks */
-                        if (coeff <= 0)
-                                break;
-                        num_nodes += split_nodes_equally(&addr, max_addr,
-                                                         num_nodes, coeff);
-                        break;
-                case ',':
-                        /* Do not allocate remaining system RAM */
-                        break;
-                default:
-                        /* Give one final node */
-                        setup_node_range(num_nodes, &addr, max_addr - addr,
-                                         max_addr);
-                        num_nodes++;
-                }
        }
-out:
+        if (num_nodes < 0)
+                return num_nodes;
        memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
        if (memnode_shift < 0) {
                memnode_shift = 0;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e35999..c9ba9deafe83 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
 #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#ifdef CONFIG_HIGHPTE
+#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#else
+#define PGALLOC_USER_GFP 0
+#endif
+gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
        return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
        struct page *pte;
-#ifdef CONFIG_HIGHPTE
+        pte = alloc_pages(__userpte_alloc_gfp, 0);
-        pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
-        pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
        if (pte)
                pgtable_page_ctor(pte);
        return pte;
 }
+static int __init setup_userpte(char *arg)
+{
+        if (!arg)
+                return -EINVAL;
+        /*
+         * "userpte=nohigh" disables allocation of user pagetables in
+         * high memory.
+         */
+        if (strcmp(arg, "nohigh") == 0)
+                __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+        else
+                return -EINVAL;
+        return 0;
+}
+early_param("userpte", setup_userpte);
 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
        pgtable_page_dtor(pte);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 65b58e4b0b8b..426f3a1a64d3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,7 @@ union smp_flush_state {
        struct {
                struct mm_struct *flush_mm;
                unsigned long flush_va;
-                spinlock_t tlbstate_lock;
+                raw_spinlock_t tlbstate_lock;
                DECLARE_BITMAP(flush_cpumask, NR_CPUS);
        };
        char pad[INTERNODE_CACHE_BYTES];
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
         * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
         * probably not worth checking this for a cache-hot lock.
         */
-        spin_lock(&f->tlbstate_lock);
+        raw_spin_lock(&f->tlbstate_lock);
        f->flush_mm = mm;
        f->flush_va = va;
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
        f->flush_mm = NULL;
        f->flush_va = 0;
-        spin_unlock(&f->tlbstate_lock);
+        raw_spin_unlock(&f->tlbstate_lock);
 }
 void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
        int i;
        for (i = 0; i < ARRAY_SIZE(flush_state); i++)
-                spin_lock_init(&flush_state[i].tlbstate_lock);
+                raw_spin_lock_init(&flush_state[i].tlbstate_lock);
        return 0;
 }

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d406c5239019..e71c5cbc8f35 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c
@@ -266,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
266	if (!after_bootmem)	266	if (!after_bootmem)
267	find_early_table_space(end, use_pse, use_gbpages);	267	find_early_table_space(end, use_pse, use_gbpages);
268		268
269	#ifdef CONFIG_X86_32
270	for (i = 0; i < nr_range; i++)
271	kernel_physical_mapping_init(mr[i].start, mr[i].end,
272	mr[i].page_size_mask);
273	ret = end;
274	#else /* CONFIG_X86_64 */
275	for (i = 0; i < nr_range; i++)	269	for (i = 0; i < nr_range; i++)
276	ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,	270	ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
277	mr[i].page_size_mask);	271	mr[i].page_size_mask);
278	#endif
279		272
280	#ifdef CONFIG_X86_32	273	#ifdef CONFIG_X86_32
281	early_ioremap_page_table_range_init();	274	early_ioremap_page_table_range_init();


diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9a0c258a86be..2226f2c70ea3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
241	unsigned long page_size_mask)	241	unsigned long page_size_mask)
242	{	242	{
243	int use_pse = page_size_mask == (1<<PG_LEVEL_2M);	243	int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
		244	unsigned long last_map_addr = end;
244	unsigned long start_pfn, end_pfn;	245	unsigned long start_pfn, end_pfn;
245	pgd_t *pgd_base = swapper_pg_dir;	246	pgd_t *pgd_base = swapper_pg_dir;
246	int pgd_idx, pmd_idx, pte_ofs;	247	int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
341	prot = PAGE_KERNEL_EXEC;	342	prot = PAGE_KERNEL_EXEC;
342		343
343	pages_4k++;	344	pages_4k++;
344	if (mapping_iter == 1)	345	if (mapping_iter == 1) {
345	set_pte(pte, pfn_pte(pfn, init_prot));	346	set_pte(pte, pfn_pte(pfn, init_prot));
346	else	347	last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
		348	} else
347	set_pte(pte, pfn_pte(pfn, prot));	349	set_pte(pte, pfn_pte(pfn, prot));
348	}	350	}
349	}	351	}
@@ -368,7 +370,7 @@ repeat:
368	mapping_iter = 2;	370	mapping_iter = 2;
369	goto repeat;	371	goto repeat;
370	}	372	}
371	return 0;	373	return last_map_addr;
372	}	374	}
373		375
374	pte_t *kmap_pte;	376	pte_t *kmap_pte;


diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c246d259822d..5eb1ba74a3a9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
24		24
25	#include "physaddr.h"	25	#include "physaddr.h"
26		26
27	int page_is_ram(unsigned long pagenr)
28	{
29	resource_size_t addr, end;
30	int i;
31
32	/*
33	* A special case is the first 4Kb of memory;
34	* This is a BIOS owned area, not kernel ram, but generally
35	* not listed as such in the E820 table.
36	*/
37	if (pagenr == 0)
38	return 0;
39
40	/*
41	* Second special case: Some BIOSen report the PC BIOS
42	* area (640->1Mb) as ram even though it is not.
43	*/
44	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
45	pagenr < (BIOS_END >> PAGE_SHIFT))
46	return 0;
47
48	for (i = 0; i < e820.nr_map; i++) {
49	/*
50	* Not usable memory:
51	*/
52	if (e820.map[i].type != E820_RAM)
53	continue;
54	addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
55	end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
56
57
58	if ((pagenr >= addr) && (pagenr < end))
59	return 1;
60	}
61	return 0;
62	}
63
64	/*	27	/*
65	* Fix up the linear direct mapping of the kernel to avoid cache attribute	28	* Fix up the linear direct mapping of the kernel to avoid cache attribute
66	* conflicts.	29	* conflicts.
@@ -422,6 +385,10 @@ void __init early_ioremap_init(void)
422	* The boot-ioremap range spans multiple pmds, for which	385	* The boot-ioremap range spans multiple pmds, for which
423	* we are not prepared:	386	* we are not prepared:
424	*/	387	*/
		388	#define __FIXADDR_TOP (-PAGE_SIZE)
		389	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
		390	!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
		391	#undef __FIXADDR_TOP
425	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {	392	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
426	WARN_ON(1);	393	WARN_ON(1);
427	printk(KERN_WARNING "pmd %p != %p\n",	394	printk(KERN_WARNING "pmd %p != %p\n",


diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 8cc183344140..b3b531a4f8e5 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337	if (!shadow)	337	if (!shadow)
338	return true;	338	return true;
339		339
340	status = kmemcheck_shadow_test(shadow, size);	340	status = kmemcheck_shadow_test_all(shadow, size);
341		341
342	return status == KMEMCHECK_SHADOW_INITIALIZED;	342	return status == KMEMCHECK_SHADOW_INITIALIZED;
343	}	343	}


diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index 3f66b82076a3..aec124214d97 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
125		125
126	enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)	126	enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
127	{	127	{
		128	#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
128	uint8_t *x;	129	uint8_t *x;
129	unsigned int i;	130	unsigned int i;
130		131
131	x = shadow;	132	x = shadow;
132		133
133	#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
134	/*	134	/*
135	* Make sure _some_ bytes are initialized. Gcc frequently generates	135	* Make sure _some_ bytes are initialized. Gcc frequently generates
136	* code to access neighboring bytes.	136	* code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
139	if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)	139	if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
140	return x[i];	140	return x[i];
141	}	141	}
		142
		143	return x[0];
142	#else	144	#else
		145	return kmemcheck_shadow_test_all(shadow, size);
		146	#endif
		147	}
		148
		149	enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
		150	{
		151	uint8_t *x;
		152	unsigned int i;
		153
		154	x = shadow;
		155
143	/* All bytes must be initialized. */	156	/* All bytes must be initialized. */
144	for (i = 0; i < size; ++i) {	157	for (i = 0; i < size; ++i) {
145	if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)	158	if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
146	return x[i];	159	return x[i];
147	}	160	}
148	#endif
149		161
150	return x[0];	162	return x[0];
151	}	163	}


diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index af46d9ab9d86..ff0b2f70fbcb 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
11	void *kmemcheck_shadow_lookup(unsigned long address);	11	void *kmemcheck_shadow_lookup(unsigned long address);
12		12
13	enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);	13	enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
		14	enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
		15	unsigned int size);
14	void kmemcheck_shadow_set(void *shadow, unsigned int size);	16	void kmemcheck_shadow_set(void *shadow, unsigned int size);
15		17
16	#endif	18	#endif


diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
71	if (current->personality & ADDR_COMPAT_LAYOUT)	71	if (current->personality & ADDR_COMPAT_LAYOUT)
72	return 1;	72	return 1;
73		73
74	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)	74	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
75	return 1;	75	return 1;
76		76
77	return sysctl_legacy_va_layout;	77	return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
96		96
97	static unsigned long mmap_base(void)	97	static unsigned long mmap_base(void)
98	{	98	{
99	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;	99	unsigned long gap = rlimit(RLIMIT_STACK);
100		100
101	if (gap < MIN_GAP)	101	if (gap < MIN_GAP)
102	gap = MIN_GAP;	102	gap = MIN_GAP;


diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 83bbc70d11bb..3307ea8bd43a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c
@@ -427,7 +427,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
427	* Calculate the number of big nodes that can be allocated as a result	427	* Calculate the number of big nodes that can be allocated as a result
428	* of consolidating the remainder.	428	* of consolidating the remainder.
429	*/	429	*/
430	big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) /	430	big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
431	FAKE_NODE_MIN_SIZE;	431	FAKE_NODE_MIN_SIZE;
432		432
433	size &= FAKE_NODE_MIN_HASH_MASK;	433	size &= FAKE_NODE_MIN_HASH_MASK;
@@ -502,77 +502,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
502	}	502	}
503		503
504	/*	504	/*
505	* Splits num_nodes nodes up equally starting at node_start. The return value	505	* Returns the end address of a node so that there is at least `size' amount of
506	* is the number of nodes split up and addr is adjusted to be at the end of the	506	* non-reserved memory or `max_addr' is reached.
507	* last node allocated.
508	*/	507	*/
509	static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start,	508	static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
510	int num_nodes)
511	{	509	{
512	unsigned int big;	510	u64 end = start + size;
513	u64 size;
514	int i;
515
516	if (num_nodes <= 0)
517	return -1;
518	if (num_nodes > MAX_NUMNODES)
519	num_nodes = MAX_NUMNODES;
520	size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
521	num_nodes;
522	/*
523	* Calculate the number of big nodes that can be allocated as a result
524	* of consolidating the leftovers.
525	*/
526	big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
527	FAKE_NODE_MIN_SIZE;
528
529	/* Round down to nearest FAKE_NODE_MIN_SIZE. */
530	size &= FAKE_NODE_MIN_HASH_MASK;
531	if (!size) {
532	printk(KERN_ERR "Not enough memory for each node. "
533	"NUMA emulation disabled.\n");
534	return -1;
535	}
536
537	for (i = node_start; i < num_nodes + node_start; i++) {
538	u64 end = *addr + size;
539		511
540	if (i < big)	512	while (end - start - e820_hole_size(start, end) < size) {
541	end += FAKE_NODE_MIN_SIZE;	513	end += FAKE_NODE_MIN_SIZE;
542	/*	514	if (end > max_addr) {
543	* The final node can have the remaining system RAM. Other
544	* nodes receive roughly the same amount of available pages.
545	*/
546	if (i == num_nodes + node_start - 1)
547	end = max_addr;	515	end = max_addr;
548	else
549	while (end - addr - e820_hole_size(addr, end) <
550	size) {
551	end += FAKE_NODE_MIN_SIZE;
552	if (end > max_addr) {
553	end = max_addr;
554	break;
555	}
556	}
557	if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
558	break;	516	break;
		517	}
559	}	518	}
560	return i - node_start + 1;	519	return end;
561	}	520	}
562		521
563	/*	522	/*
564	* Splits the remaining system RAM into chunks of size. The remaining memory is	523	* Sets up fake nodes of `size' interleaved over physical nodes ranging from
565	* always assigned to a final node and can be asymmetric. Returns the number of	524	* `addr' to `max_addr'. The return value is the number of nodes allocated.
566	* nodes split.
567	*/	525	*/
568	static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,	526	static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
569	u64 size)
570	{	527	{
571	int i = node_start;	528	nodemask_t physnode_mask = NODE_MASK_NONE;
572	size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;	529	u64 min_size;
573	while (!setup_node_range(i++, addr, size, max_addr))	530	int ret = 0;
574	;	531	int i;
575	return i - node_start;	532
		533	if (!size)
		534	return -1;
		535	/*
		536	* The limit on emulated nodes is MAX_NUMNODES, so the size per node is
		537	* increased accordingly if the requested size is too small. This
		538	* creates a uniform distribution of node sizes across the entire
		539	* machine (but not necessarily over physical nodes).
		540	*/
		541	min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
		542	MAX_NUMNODES;
		543	min_size = max(min_size, FAKE_NODE_MIN_SIZE);
		544	if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
		545	min_size = (min_size + FAKE_NODE_MIN_SIZE) &
		546	FAKE_NODE_MIN_HASH_MASK;
		547	if (size < min_size) {
		548	pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
		549	size >> 20, min_size >> 20);
		550	size = min_size;
		551	}
		552	size &= FAKE_NODE_MIN_HASH_MASK;
		553
		554	for (i = 0; i < MAX_NUMNODES; i++)
		555	if (physnodes[i].start != physnodes[i].end)
		556	node_set(i, physnode_mask);
		557	/*
		558	* Fill physical nodes with fake nodes of size until there is no memory
		559	* left on any of them.
		560	*/
		561	while (nodes_weight(physnode_mask)) {
		562	for_each_node_mask(i, physnode_mask) {
		563	u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
		564	u64 end;
		565
		566	end = find_end_of_node(physnodes[i].start,
		567	physnodes[i].end, size);
		568	/*
		569	* If there won't be at least FAKE_NODE_MIN_SIZE of
		570	* non-reserved memory in ZONE_DMA32 for the next node,
		571	* this one must extend to the boundary.
		572	*/
		573	if (end < dma32_end && dma32_end - end -
		574	e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
		575	end = dma32_end;
		576
		577	/*
		578	* If there won't be enough non-reserved memory for the
		579	* next node, this one must extend to the end of the
		580	* physical node.
		581	*/
		582	if (physnodes[i].end - end -
		583	e820_hole_size(end, physnodes[i].end) < size)
		584	end = physnodes[i].end;
		585
		586	/*
		587	* Setup the fake node that will be allocated as bootmem
		588	* later. If setup_node_range() returns non-zero, there
		589	* is no more memory available on this physical node.
		590	*/
		591	if (setup_node_range(ret++, &physnodes[i].start,
		592	end - physnodes[i].start,
		593	physnodes[i].end) < 0)
		594	node_clear(i, physnode_mask);
		595	}
		596	}
		597	return ret;
576	}	598	}
577		599
578	/*	600	/*
@@ -582,87 +604,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
582	static int __init numa_emulation(unsigned long start_pfn,	604	static int __init numa_emulation(unsigned long start_pfn,
583	unsigned long last_pfn, int acpi, int k8)	605	unsigned long last_pfn, int acpi, int k8)
584	{	606	{
585	u64 size, addr = start_pfn << PAGE_SHIFT;	607	u64 addr = start_pfn << PAGE_SHIFT;
586	u64 max_addr = last_pfn << PAGE_SHIFT;	608	u64 max_addr = last_pfn << PAGE_SHIFT;
587	int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
588	int num_phys_nodes;	609	int num_phys_nodes;
		610	int num_nodes;
		611	int i;
589		612
590	num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);	613	num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
591	/*	614	/*
592	* If the numa=fake command-line is just a single number N, split the	615	* If the numa=fake command-line contains a 'M' or 'G', it represents
593	* system RAM into N fake nodes.	616	* the fixed node size. Otherwise, if it is just a single number N,
		617	* split the system RAM into N fake nodes.
594	*/	618	*/
595	if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {	619	if (strchr(cmdline, 'M') \|\| strchr(cmdline, 'G')) {
596	long n = simple_strtol(cmdline, NULL, 0);	620	u64 size;
597
598	num_nodes = split_nodes_interleave(addr, max_addr,
599	num_phys_nodes, n);
600	if (num_nodes < 0)
601	return num_nodes;
602	goto out;
603	}
604		621
605	/* Parse the command line. */	622	size = memparse(cmdline, &cmdline);
606	for (coeff_flag = 0; ; cmdline++) {	623	num_nodes = split_nodes_size_interleave(addr, max_addr, size);
607	if (cmdline && isdigit(cmdline)) {	624	} else {
608	num = num * 10 + *cmdline - '0';	625	unsigned long n;
609	continue;	626
610	}	627	n = simple_strtoul(cmdline, NULL, 0);
611	if (cmdline == '') {	628	num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
612	if (num > 0)
613	coeff = num;
614	coeff_flag = 1;
615	}
616	if (!cmdline \|\| cmdline == ',') {
617	if (!coeff_flag)
618	coeff = 1;
619	/*
620	* Round down to the nearest FAKE_NODE_MIN_SIZE.
621	* Command-line coefficients are in megabytes.
622	*/
623	size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
624	if (size)
625	for (i = 0; i < coeff; i++, num_nodes++)
626	if (setup_node_range(num_nodes, &addr,
627	size, max_addr) < 0)
628	goto done;
629	if (!*cmdline)
630	break;
631	coeff_flag = 0;
632	coeff = -1;
633	}
634	num = 0;
635	}
636	done:
637	if (!num_nodes)
638	return -1;
639	/* Fill remainder of system RAM, if appropriate. */
640	if (addr < max_addr) {
641	if (coeff_flag && coeff < 0) {
642	/* Split remaining nodes into num-sized chunks */
643	num_nodes += split_nodes_by_size(&addr, max_addr,
644	num_nodes, num);
645	goto out;
646	}
647	switch (*(cmdline - 1)) {
648	case '*':
649	/* Split remaining nodes into coeff chunks */
650	if (coeff <= 0)
651	break;
652	num_nodes += split_nodes_equally(&addr, max_addr,
653	num_nodes, coeff);
654	break;
655	case ',':
656	/* Do not allocate remaining system RAM */
657	break;
658	default:
659	/* Give one final node */
660	setup_node_range(num_nodes, &addr, max_addr - addr,
661	max_addr);
662	num_nodes++;
663	}
664	}	629	}
665	out:	630
		631	if (num_nodes < 0)
		632	return num_nodes;
666	memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);	633	memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
667	if (memnode_shift < 0) {	634	if (memnode_shift < 0) {
668	memnode_shift = 0;	635	memnode_shift = 0;


diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e35999..c9ba9deafe83 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
6		6
7	#define PGALLOC_GFP GFP_KERNEL \| __GFP_NOTRACK \| __GFP_REPEAT \| __GFP_ZERO	7	#define PGALLOC_GFP GFP_KERNEL \| __GFP_NOTRACK \| __GFP_REPEAT \| __GFP_ZERO
8		8
		9	#ifdef CONFIG_HIGHPTE
		10	#define PGALLOC_USER_GFP __GFP_HIGHMEM
		11	#else
		12	#define PGALLOC_USER_GFP 0
		13	#endif
		14
		15	gfp_t __userpte_alloc_gfp = PGALLOC_GFP \| PGALLOC_USER_GFP;
		16
9	pte_t pte_alloc_one_kernel(struct mm_struct mm, unsigned long address)	17	pte_t pte_alloc_one_kernel(struct mm_struct mm, unsigned long address)
10	{	18	{
11	return (pte_t *)__get_free_page(PGALLOC_GFP);	19	return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
15	{	23	{
16	struct page *pte;	24	struct page *pte;
17		25
18	#ifdef CONFIG_HIGHPTE	26	pte = alloc_pages(__userpte_alloc_gfp, 0);
19	pte = alloc_pages(PGALLOC_GFP \| __GFP_HIGHMEM, 0);
20	#else
21	pte = alloc_pages(PGALLOC_GFP, 0);
22	#endif
23	if (pte)	27	if (pte)
24	pgtable_page_ctor(pte);	28	pgtable_page_ctor(pte);
25	return pte;	29	return pte;
26	}	30	}
27		31
		32	static int __init setup_userpte(char *arg)
		33	{
		34	if (!arg)
		35	return -EINVAL;
		36
		37	/*
		38	* "userpte=nohigh" disables allocation of user pagetables in
		39	* high memory.
		40	*/
		41	if (strcmp(arg, "nohigh") == 0)
		42	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
		43	else
		44	return -EINVAL;
		45	return 0;
		46	}
		47	early_param("userpte", setup_userpte);
		48
28	void ___pte_free_tlb(struct mmu_gather tlb, struct page pte)	49	void ___pte_free_tlb(struct mmu_gather tlb, struct page pte)
29	{	50	{
30	pgtable_page_dtor(pte);	51	pgtable_page_dtor(pte);


diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 65b58e4b0b8b..426f3a1a64d3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,7 @@ union smp_flush_state {
41	struct {	41	struct {
42	struct mm_struct *flush_mm;	42	struct mm_struct *flush_mm;
43	unsigned long flush_va;	43	unsigned long flush_va;
44	spinlock_t tlbstate_lock;	44	raw_spinlock_t tlbstate_lock;
45	DECLARE_BITMAP(flush_cpumask, NR_CPUS);	45	DECLARE_BITMAP(flush_cpumask, NR_CPUS);
46	};	46	};
47	char pad[INTERNODE_CACHE_BYTES];	47	char pad[INTERNODE_CACHE_BYTES];
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
181	* num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is	181	* num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
182	* probably not worth checking this for a cache-hot lock.	182	* probably not worth checking this for a cache-hot lock.
183	*/	183	*/
184	spin_lock(&f->tlbstate_lock);	184	raw_spin_lock(&f->tlbstate_lock);
185		185
186	f->flush_mm = mm;	186	f->flush_mm = mm;
187	f->flush_va = va;	187	f->flush_va = va;
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
199		199
200	f->flush_mm = NULL;	200	f->flush_mm = NULL;
201	f->flush_va = 0;	201	f->flush_va = 0;
202	spin_unlock(&f->tlbstate_lock);	202	raw_spin_unlock(&f->tlbstate_lock);
203	}	203	}
204		204
205	void native_flush_tlb_others(const struct cpumask *cpumask,	205	void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
223	int i;	223	int i;
224		224
225	for (i = 0; i < ARRAY_SIZE(flush_state); i++)	225	for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226	spin_lock_init(&flush_state[i].tlbstate_lock);	226	raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227		227
228	return 0;	228	return 0;
229	}	229	}