Pull release into acpica branch

author: Len Brown <len.brown@intel.com> 2005-12-06 17:31:30 -0500
committer: Len Brown <len.brown@intel.com> 2005-12-06 17:31:30 -0500
commit: 3d5271f9883cba7b54762bc4fe027d4172f06db7 (patch)
tree: ab8a881a14478598a0c8bda0d26c62cdccfffd6d /arch/ia64/mm
parent: 378b2556f4e09fa6f87ff0cb5c4395ff28257d02 (diff)
parent: 9115a6c787596e687df03010d97fccc5e0762506 (diff)
7 files changed, 149 insertions, 102 deletions
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
index 7078f67887ec..d78d20f0a0f0 100644
--- a/arch/ia64/mm/Makefile
+++ b/arch/ia64/mm/Makefile
@@ -7,6 +7,5 @@ obj-y := init.o fault.o tlb.o extable.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA)         += numa.o
 obj-$(CONFIG_DISCONTIGMEM) += discontig.o
-ifndef CONFIG_DISCONTIGMEM
+obj-$(CONFIG_SPARSEMEM)    += discontig.o
-obj-y += contig.o
+obj-$(CONFIG_FLATMEM)      += contig.o
-endif
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 91a055f5731f..acaaec4e4681 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -269,7 +269,7 @@ paging_init (void)
        efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
        if (max_gap < LARGE_GAP) {
                vmem_map = (struct page *) 0;
-                free_area_init_node(0, &contig_page_data, zones_size, 0,
+                free_area_init_node(0, NODE_DATA(0), zones_size, 0,
                                    zholes_size);
        } else {
                unsigned long map_size;
@@ -282,7 +282,7 @@ paging_init (void)
                efi_memmap_walk(create_mem_map_page_table, NULL);
                NODE_DATA(0)->node_mem_map = vmem_map;
-                free_area_init_node(0, &contig_page_data, zones_size,
+                free_area_init_node(0, NODE_DATA(0), zones_size,
                                    0, zholes_size);
                printk("Virtual mem_map starts at 0x%p\n", mem_map);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index b5c90e548195..0f776b032d31 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -350,14 +350,12 @@ static void __init initialize_pernode_data(void)
 *      for best.
 * @nid: node id
 * @pernodesize: size of this node's pernode data
- * @align: alignment to use for this node's pernode data
 */
-static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,
+static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
-        unsigned long align)
 {
        void *ptr = NULL;
        u8 best = 0xff;
-        int bestnode = -1, node;
+        int bestnode = -1, node, anynode = 0;
        for_each_online_node(node) {
                if (node_isset(node, memory_less_mask))
@@ -366,13 +364,15 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,
                        best = node_distance(nid, node);
                        bestnode = node;
                }
+                anynode = node;
        }
-        ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat,
+        if (bestnode == -1)
-                pernodesize, align, __pa(MAX_DMA_ADDRESS));
+                bestnode = anynode;
+        ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize,
+                PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-        if (!ptr)
-                panic("NO memory for memory less node\n");
        return ptr;
 }
@@ -413,14 +413,44 @@ static void __init memory_less_nodes(void)
        for_each_node_mask(node, memory_less_mask) {
                pernodesize = compute_pernodesize(node);
-                pernode = memory_less_node_alloc(node, pernodesize,
+                pernode = memory_less_node_alloc(node, pernodesize);
-                        (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024));
                fill_pernode(node, __pa(pernode), pernodesize);
        }
        return;
 }
+#ifdef CONFIG_SPARSEMEM
+/**
+ * register_sparse_mem - notify SPARSEMEM that this memory range exists.
+ * @start: physical start of range
+ * @end: physical end of range
+ * @arg: unused
+ *
+ * Simply calls SPARSEMEM to register memory section(s).
+ */
+static int __init register_sparse_mem(unsigned long start, unsigned long end,
+        void *arg)
+{
+        int nid;
+        start = __pa(start) >> PAGE_SHIFT;
+        end = __pa(end) >> PAGE_SHIFT;
+        nid = early_pfn_to_nid(start);
+        memory_present(nid, start, end);
+        return 0;
+}
+static void __init arch_sparse_init(void)
+{
+        efi_memmap_walk(register_sparse_mem, NULL);
+        sparse_init();
+}
+#else
+#define arch_sparse_init() do {} while (0)
+#endif
 /**
 * find_memory - walk the EFI memory map and setup the bootmem allocator
 *
@@ -524,12 +554,18 @@ void show_mem(void)
        show_free_areas();
        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
-                unsigned long present = pgdat->node_present_pages;
+                unsigned long present;
+                unsigned long flags;
                int shared = 0, cached = 0, reserved = 0;
                printk("Node ID: %d\n", pgdat->node_id);
+                pgdat_resize_lock(pgdat, &flags);
+                present = pgdat->node_present_pages;
                for(i = 0; i < pgdat->node_spanned_pages; i++) {
-                        struct page *page = pgdat_page_nr(pgdat, i);
+                        struct page *page;
-                        if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
+                        if (pfn_valid(pgdat->node_start_pfn + i))
+                                page = pfn_to_page(pgdat->node_start_pfn + i);
+                        else
                                continue;
                        if (PageReserved(page))
                                reserved++;
@@ -538,6 +574,7 @@ void show_mem(void)
                        else if (page_count(page))
                                shared += page_count(page)-1;
                }
+                pgdat_resize_unlock(pgdat, &flags);
                total_present += present;
                total_reserved += reserved;
                total_cached += cached;
@@ -648,12 +685,16 @@ void __init paging_init(void)
        max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+        arch_sparse_init();
        efi_memmap_walk(filter_rsvd_memory, count_node_pages);
+#ifdef CONFIG_VIRTUAL_MEM_MAP
        vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));
        vmem_map = (struct page *) vmalloc_end;
        efi_memmap_walk(create_mem_map_page_table, NULL);
        printk("Virtual mem_map starts at 0x%p\n", vmem_map);
+#endif
        for_each_online_node(node) {
                memset(zones_size, 0, sizeof(zones_size));
@@ -690,7 +731,9 @@ void __init paging_init(void)
                pfn_offset = mem_data[node].min_pfn;
+#ifdef CONFIG_VIRTUAL_MEM_MAP
                NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
+#endif
                free_area_init_node(node, NODE_DATA(node), zones_size,
                                    pfn_offset, zholes_size);
        }
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3c32af910d60..af7eb087dca7 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -20,32 +20,6 @@
 extern void die (char *, struct pt_regs *, long);
 /*
- * This routine is analogous to expand_stack() but instead grows the
- * register backing store (which grows towards higher addresses).
- * Since the register backing store is access sequentially, we
- * disallow growing the RBS by more than a page at a time.  Note that
- * the VM_GROWSUP flag can be set on any VM area but that's fine
- * because the total process size is still limited by RLIMIT_STACK and
- * RLIMIT_AS.
- */
-static inline long
-expand_backing_store (struct vm_area_struct *vma, unsigned long address)
-{
-        unsigned long grow;
-        grow = PAGE_SIZE >> PAGE_SHIFT;
-        if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
-            || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
-                return -ENOMEM;
-        vma->vm_end += PAGE_SIZE;
-        vma->vm_mm->total_vm += grow;
-        if (vma->vm_flags & VM_LOCKED)
-                vma->vm_mm->locked_vm += grow;
-        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
-        return 0;
-}
-/*
 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
 * (inside region 5, on ia64) and that page is present.
 */
@@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
                if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
                    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
                        goto bad_area;
-                if (expand_backing_store(vma, address))
+                /*
+                 * Since the register backing store is accessed sequentially,
+                 * we disallow growing it by more than a page at a time.
+                 */
+                if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+                        goto bad_area;
+                if (expand_upwards(vma, address))
                        goto bad_area;
        }
        goto good_area;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1281c609ee98..e3215ba64ffd 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -158,7 +158,7 @@ ia64_init_addr_space (void)
                vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
                vma->vm_end = vma->vm_start + PAGE_SIZE;
                vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
-                vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+                vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
                down_write(&current->mm->mmap_sem);
                if (insert_vm_struct(current->mm, vma)) {
                        up_write(&current->mm->mmap_sem);
@@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
        pgd = pgd_offset_k(address);            /* note: this is NOT pgd_offset()! */
-        spin_lock(&init_mm.page_table_lock);
        {
                pud = pud_alloc(&init_mm, pgd, address);
                if (!pud)
                        goto out;
                pmd = pmd_alloc(&init_mm, pud, address);
                if (!pmd)
                        goto out;
-                pte = pte_alloc_map(&init_mm, pmd, address);
+                pte = pte_alloc_kernel(pmd, address);
                if (!pte)
                        goto out;
-                if (!pte_none(*pte)) {
+                if (!pte_none(*pte))
-                        pte_unmap(pte);
                        goto out;
-                }
                set_pte(pte, mk_pte(page, pgprot));
-                pte_unmap(pte);
        }
-  out:  spin_unlock(&init_mm.page_table_lock);
+  out:
        /* no need for flush_tlb */
        return page;
 }
@@ -593,7 +588,7 @@ mem_init (void)
        platform_dma_init();
 #endif
-#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_FLATMEM
        if (!mem_map)
                BUG();
        max_mapnr = max_low_pfn;
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 77118bbf3d8b..4e5c8b36ad93 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -47,3 +47,27 @@ paddr_to_nid(unsigned long paddr)
        return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
 }
+#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA)
+/*
+ * Because of holes evaluate on section limits.
+ * If the section of memory exists, then return the node where the section
+ * resides.  Otherwise return node 0 as the default.  This is used by
+ * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
+ * the section resides.
+ */
+int early_pfn_to_nid(unsigned long pfn)
+{
+        int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
+        for (i = 0; i < num_node_memblks; i++) {
+                ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
+                esec = (node_memblk[i].start_paddr + node_memblk[i].size +
+                        ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
+                if (section >= ssec && section < esec)
+                        return node_memblk[i].nid;
+        }
+        return 0;
+}
+#endif
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 464557e4ed82..41105d454423 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
 *              Modified RID allocation for SMP
 *          Goutham Rao <goutham.rao@intel.com>
 *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
 */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -16,80 +18,83 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/bootmem.h>
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 static struct {
        unsigned long mask;     /* mask of supported purge page-sizes */
-        unsigned long max_bits; /* log2() of largest supported purge page-size */
+        unsigned long max_bits; /* log2 of largest supported purge page-size */
 } purge;
 struct ia64_ctx ia64_ctx = {
        .lock =         SPIN_LOCK_UNLOCKED,
        .next =         1,
-        .limit =        (1 << 15) - 1,          /* start out with the safe (architected) limit */
        .max_ctx =      ~0U
 };
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
 /*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+        ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+        ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+/*
 * Acquire the ia64_ctx.lock before calling this function!
 */
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-        unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+        int i, cpu;
-        struct task_struct *tsk;
+        unsigned long flush_bit;
-        int i;
-        if (ia64_ctx.next > max_ctx)
+        for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
-                ia64_ctx.next = 300;    /* skip daemons */
+                flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
-        ia64_ctx.limit = max_ctx + 1;
+                ia64_ctx.bitmap[i] ^= flush_bit;
+        }
+ 
+        /* use offset at 300 to skip daemons */
+        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, 300);
+        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, ia64_ctx.next);
        /*
-         * Scan all the task's mm->context and set proper safe range
+         * can't call flush_tlb_all() here because of race condition
+         * with O(1) scheduler [EF]
         */
+        cpu = get_cpu(); /* prevent preemption/migration */
-        read_lock(&tasklist_lock);
+        for_each_online_cpu(i)
-  repeat:
+                if (i != cpu)
-        for_each_process(tsk) {
+                        per_cpu(ia64_need_tlb_flush, i) = 1;
-                if (!tsk->mm)
+        put_cpu();
-                        continue;
-                tsk_context = tsk->mm->context;
-                if (tsk_context == ia64_ctx.next) {
-                        if (++ia64_ctx.next >= ia64_ctx.limit) {
-                                /* empty range: reset the range limit and start over */
-                                if (ia64_ctx.next > max_ctx)
-                                        ia64_ctx.next = 300;
-                                ia64_ctx.limit = max_ctx + 1;
-                                goto repeat;
-                        }
-                }
-                if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-                        ia64_ctx.limit = tsk_context;
-        }
-        read_unlock(&tasklist_lock);
-        /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
-        {
-                int cpu = get_cpu(); /* prevent preemption/migration */
-                for (i = 0; i < NR_CPUS; ++i)
-                        if (cpu_online(i) && (i != cpu))
-                                per_cpu(ia64_need_tlb_flush, i) = 1;
-                put_cpu();
-        }
        local_flush_tlb_all();
 }
 void
-ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+                       unsigned long end, unsigned long nbits)
 {
        static DEFINE_SPINLOCK(ptcg_lock);
+        if (mm != current->active_mm) {
+                flush_tlb_all();
+                return;
+        }
        /* HW requires global serialization of ptc.ga.  */
        spin_lock(&ptcg_lock);
        {
@@ -129,36 +134,37 @@ local_flush_tlb_all (void)
 }
 void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+                 unsigned long end)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long size = end - start;
        unsigned long nbits;
+#ifndef CONFIG_SMP
        if (mm != current->active_mm) {
-                /* this does happen, but perhaps it's not worth optimizing for? */
-#ifdef CONFIG_SMP
-                flush_tlb_all();
-#else
                mm->context = 0;
-#endif
                return;
        }
+#endif
        nbits = ia64_fls(size + 0xfff);
-        while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+        while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+                        (nbits < purge.max_bits))
                ++nbits;
        if (nbits > purge.max_bits)
                nbits = purge.max_bits;
        start &= ~((1UL << nbits) - 1);
 # ifdef CONFIG_SMP
-        platform_global_tlb_purge(start, end, nbits);
+        platform_global_tlb_purge(mm, start, end, nbits);
 # else
+        preempt_disable();
        do {
                ia64_ptcl(start, (nbits<<2));
                start += (1UL << nbits);
        } while (start < end);
+        preempt_enable();
 # endif
        ia64_srlz_i();                  /* srlz.i implies srlz.d */
@@ -186,5 +192,5 @@ ia64_tlb_init (void)
        local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
        local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
-        local_flush_tlb_all();          /* nuke left overs from bootstrapping... */
+        local_flush_tlb_all();  /* nuke left overs from bootstrapping... */
 }
author	Len Brown <len.brown@intel.com>	2005-12-06 17:31:30 -0500
committer	Len Brown <len.brown@intel.com>	2005-12-06 17:31:30 -0500
commit	3d5271f9883cba7b54762bc4fe027d4172f06db7 (patch)
tree	ab8a881a14478598a0c8bda0d26c62cdccfffd6d /arch/ia64/mm
parent	378b2556f4e09fa6f87ff0cb5c4395ff28257d02 (diff)
parent	9115a6c787596e687df03010d97fccc5e0762506 (diff)

diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile index 7078f67887ec..d78d20f0a0f0 100644 --- a/arch/ia64/mm/Makefile +++ b/arch/ia64/mm/Makefile
@@ -7,6 +7,5 @@ obj-y := init.o fault.o tlb.o extable.o
7	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o	7	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
8	obj-$(CONFIG_NUMA) += numa.o	8	obj-$(CONFIG_NUMA) += numa.o
9	obj-$(CONFIG_DISCONTIGMEM) += discontig.o	9	obj-$(CONFIG_DISCONTIGMEM) += discontig.o
10	ifndef CONFIG_DISCONTIGMEM	10	obj-$(CONFIG_SPARSEMEM) += discontig.o
11	obj-y += contig.o	11	obj-$(CONFIG_FLATMEM) += contig.o
12	endif


diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 91a055f5731f..acaaec4e4681 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c
@@ -269,7 +269,7 @@ paging_init (void)
269	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);	269	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
270	if (max_gap < LARGE_GAP) {	270	if (max_gap < LARGE_GAP) {
271	vmem_map = (struct page *) 0;	271	vmem_map = (struct page *) 0;
272	free_area_init_node(0, &contig_page_data, zones_size, 0,	272	free_area_init_node(0, NODE_DATA(0), zones_size, 0,
273	zholes_size);	273	zholes_size);
274	} else {	274	} else {
275	unsigned long map_size;	275	unsigned long map_size;
@@ -282,7 +282,7 @@ paging_init (void)
282	efi_memmap_walk(create_mem_map_page_table, NULL);	282	efi_memmap_walk(create_mem_map_page_table, NULL);
283		283
284	NODE_DATA(0)->node_mem_map = vmem_map;	284	NODE_DATA(0)->node_mem_map = vmem_map;
285	free_area_init_node(0, &contig_page_data, zones_size,	285	free_area_init_node(0, NODE_DATA(0), zones_size,
286	0, zholes_size);	286	0, zholes_size);
287		287
288	printk("Virtual mem_map starts at 0x%p\n", mem_map);	288	printk("Virtual mem_map starts at 0x%p\n", mem_map);


diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index b5c90e548195..0f776b032d31 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c
@@ -350,14 +350,12 @@ static void __init initialize_pernode_data(void)
350	* for best.	350	* for best.
351	* @nid: node id	351	* @nid: node id
352	* @pernodesize: size of this node's pernode data	352	* @pernodesize: size of this node's pernode data
353	* @align: alignment to use for this node's pernode data
354	*/	353	*/
355	static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,	354	static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
356	unsigned long align)
357	{	355	{
358	void *ptr = NULL;	356	void *ptr = NULL;
359	u8 best = 0xff;	357	u8 best = 0xff;
360	int bestnode = -1, node;	358	int bestnode = -1, node, anynode = 0;
361		359
362	for_each_online_node(node) {	360	for_each_online_node(node) {
363	if (node_isset(node, memory_less_mask))	361	if (node_isset(node, memory_less_mask))
@@ -366,13 +364,15 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,
366	best = node_distance(nid, node);	364	best = node_distance(nid, node);
367	bestnode = node;	365	bestnode = node;
368	}	366	}
		367	anynode = node;
369	}	368	}
370		369
371	ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat,	370	if (bestnode == -1)
372	pernodesize, align, __pa(MAX_DMA_ADDRESS));	371	bestnode = anynode;
		372
		373	ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize,
		374	PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
373		375
374	if (!ptr)
375	panic("NO memory for memory less node\n");
376	return ptr;	376	return ptr;
377	}	377	}
378		378
@@ -413,14 +413,44 @@ static void __init memory_less_nodes(void)
413		413
414	for_each_node_mask(node, memory_less_mask) {	414	for_each_node_mask(node, memory_less_mask) {
415	pernodesize = compute_pernodesize(node);	415	pernodesize = compute_pernodesize(node);
416	pernode = memory_less_node_alloc(node, pernodesize,	416	pernode = memory_less_node_alloc(node, pernodesize);
417	(node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024));
418	fill_pernode(node, __pa(pernode), pernodesize);	417	fill_pernode(node, __pa(pernode), pernodesize);
419	}	418	}
420		419
421	return;	420	return;
422	}	421	}
423		422
		423	#ifdef CONFIG_SPARSEMEM
		424	/**
		425	* register_sparse_mem - notify SPARSEMEM that this memory range exists.
		426	* @start: physical start of range
		427	* @end: physical end of range
		428	* @arg: unused
		429	*
		430	* Simply calls SPARSEMEM to register memory section(s).
		431	*/
		432	static int __init register_sparse_mem(unsigned long start, unsigned long end,
		433	void *arg)
		434	{
		435	int nid;
		436
		437	start = __pa(start) >> PAGE_SHIFT;
		438	end = __pa(end) >> PAGE_SHIFT;
		439	nid = early_pfn_to_nid(start);
		440	memory_present(nid, start, end);
		441
		442	return 0;
		443	}
		444
		445	static void __init arch_sparse_init(void)
		446	{
		447	efi_memmap_walk(register_sparse_mem, NULL);
		448	sparse_init();
		449	}
		450	#else
		451	#define arch_sparse_init() do {} while (0)
		452	#endif
		453
424	/**	454	/**
425	* find_memory - walk the EFI memory map and setup the bootmem allocator	455	* find_memory - walk the EFI memory map and setup the bootmem allocator
426	*	456	*
@@ -524,12 +554,18 @@ void show_mem(void)
524	show_free_areas();	554	show_free_areas();
525	printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));	555	printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
526	for_each_pgdat(pgdat) {	556	for_each_pgdat(pgdat) {
527	unsigned long present = pgdat->node_present_pages;	557	unsigned long present;
		558	unsigned long flags;
528	int shared = 0, cached = 0, reserved = 0;	559	int shared = 0, cached = 0, reserved = 0;
		560
529	printk("Node ID: %d\n", pgdat->node_id);	561	printk("Node ID: %d\n", pgdat->node_id);
		562	pgdat_resize_lock(pgdat, &flags);
		563	present = pgdat->node_present_pages;
530	for(i = 0; i < pgdat->node_spanned_pages; i++) {	564	for(i = 0; i < pgdat->node_spanned_pages; i++) {
531	struct page *page = pgdat_page_nr(pgdat, i);	565	struct page *page;
532	if (!ia64_pfn_valid(pgdat->node_start_pfn+i))	566	if (pfn_valid(pgdat->node_start_pfn + i))
		567	page = pfn_to_page(pgdat->node_start_pfn + i);
		568	else
533	continue;	569	continue;
534	if (PageReserved(page))	570	if (PageReserved(page))
535	reserved++;	571	reserved++;
@@ -538,6 +574,7 @@ void show_mem(void)
538	else if (page_count(page))	574	else if (page_count(page))
539	shared += page_count(page)-1;	575	shared += page_count(page)-1;
540	}	576	}
		577	pgdat_resize_unlock(pgdat, &flags);
541	total_present += present;	578	total_present += present;
542	total_reserved += reserved;	579	total_reserved += reserved;
543	total_cached += cached;	580	total_cached += cached;
@@ -648,12 +685,16 @@ void __init paging_init(void)
648		685
649	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;	686	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
650		687
		688	arch_sparse_init();
		689
651	efi_memmap_walk(filter_rsvd_memory, count_node_pages);	690	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
652		691
		692	#ifdef CONFIG_VIRTUAL_MEM_MAP
653	vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));	693	vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));
654	vmem_map = (struct page *) vmalloc_end;	694	vmem_map = (struct page *) vmalloc_end;
655	efi_memmap_walk(create_mem_map_page_table, NULL);	695	efi_memmap_walk(create_mem_map_page_table, NULL);
656	printk("Virtual mem_map starts at 0x%p\n", vmem_map);	696	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
		697	#endif
657		698
658	for_each_online_node(node) {	699	for_each_online_node(node) {
659	memset(zones_size, 0, sizeof(zones_size));	700	memset(zones_size, 0, sizeof(zones_size));
@@ -690,7 +731,9 @@ void __init paging_init(void)
690		731
691	pfn_offset = mem_data[node].min_pfn;	732	pfn_offset = mem_data[node].min_pfn;
692		733
		734	#ifdef CONFIG_VIRTUAL_MEM_MAP
693	NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;	735	NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
		736	#endif
694	free_area_init_node(node, NODE_DATA(node), zones_size,	737	free_area_init_node(node, NODE_DATA(node), zones_size,
695	pfn_offset, zholes_size);	738	pfn_offset, zholes_size);
696	}	739	}


diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3c32af910d60..af7eb087dca7 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c
@@ -20,32 +20,6 @@
20	extern void die (char , struct pt_regs , long);	20	extern void die (char , struct pt_regs , long);
21		21
22	/*	22	/*
23	* This routine is analogous to expand_stack() but instead grows the
24	* register backing store (which grows towards higher addresses).
25	* Since the register backing store is access sequentially, we
26	* disallow growing the RBS by more than a page at a time. Note that
27	* the VM_GROWSUP flag can be set on any VM area but that's fine
28	* because the total process size is still limited by RLIMIT_STACK and
29	* RLIMIT_AS.
30	*/
31	static inline long
32	expand_backing_store (struct vm_area_struct *vma, unsigned long address)
33	{
34	unsigned long grow;
35
36	grow = PAGE_SIZE >> PAGE_SHIFT;
37	if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
38	\|\| (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
39	return -ENOMEM;
40	vma->vm_end += PAGE_SIZE;
41	vma->vm_mm->total_vm += grow;
42	if (vma->vm_flags & VM_LOCKED)
43	vma->vm_mm->locked_vm += grow;
44	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
45	return 0;
46	}
47
48	/*
49	* Return TRUE if ADDRESS points at a page in the kernel's mapped segment	23	* Return TRUE if ADDRESS points at a page in the kernel's mapped segment
50	* (inside region 5, on ia64) and that page is present.	24	* (inside region 5, on ia64) and that page is present.
51	*/	25	*/
@@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
185	if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)	159	if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
186	\|\| REGION_OFFSET(address) >= RGN_MAP_LIMIT)	160	\|\| REGION_OFFSET(address) >= RGN_MAP_LIMIT)
187	goto bad_area;	161	goto bad_area;
188	if (expand_backing_store(vma, address))	162	/*
		163	* Since the register backing store is accessed sequentially,
		164	* we disallow growing it by more than a page at a time.
		165	*/
		166	if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
		167	goto bad_area;
		168	if (expand_upwards(vma, address))
189	goto bad_area;	169	goto bad_area;
190	}	170	}
191	goto good_area;	171	goto good_area;


diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1281c609ee98..e3215ba64ffd 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c
@@ -158,7 +158,7 @@ ia64_init_addr_space (void)
158	vma->vm_start = current->thread.rbs_bot & PAGE_MASK;	158	vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
159	vma->vm_end = vma->vm_start + PAGE_SIZE;	159	vma->vm_end = vma->vm_start + PAGE_SIZE;
160	vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];	160	vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
161	vma->vm_flags = VM_DATA_DEFAULT_FLAGS \| VM_GROWSUP;	161	vma->vm_flags = VM_DATA_DEFAULT_FLAGS\|VM_GROWSUP\|VM_ACCOUNT;
162	down_write(&current->mm->mmap_sem);	162	down_write(&current->mm->mmap_sem);
163	if (insert_vm_struct(current->mm, vma)) {	163	if (insert_vm_struct(current->mm, vma)) {
164	up_write(&current->mm->mmap_sem);	164	up_write(&current->mm->mmap_sem);
@@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
275		275
276	pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */	276	pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
277		277
278	spin_lock(&init_mm.page_table_lock);
279	{	278	{
280	pud = pud_alloc(&init_mm, pgd, address);	279	pud = pud_alloc(&init_mm, pgd, address);
281	if (!pud)	280	if (!pud)
282	goto out;	281	goto out;
283
284	pmd = pmd_alloc(&init_mm, pud, address);	282	pmd = pmd_alloc(&init_mm, pud, address);
285	if (!pmd)	283	if (!pmd)
286	goto out;	284	goto out;
287	pte = pte_alloc_map(&init_mm, pmd, address);	285	pte = pte_alloc_kernel(pmd, address);
288	if (!pte)	286	if (!pte)
289	goto out;	287	goto out;
290	if (!pte_none(*pte)) {	288	if (!pte_none(*pte))
291	pte_unmap(pte);
292	goto out;	289	goto out;
293	}
294	set_pte(pte, mk_pte(page, pgprot));	290	set_pte(pte, mk_pte(page, pgprot));
295	pte_unmap(pte);
296	}	291	}
297	out: spin_unlock(&init_mm.page_table_lock);	292	out:
298	/* no need for flush_tlb */	293	/* no need for flush_tlb */
299	return page;	294	return page;
300	}	295	}
@@ -593,7 +588,7 @@ mem_init (void)
593	platform_dma_init();	588	platform_dma_init();
594	#endif	589	#endif
595		590
596	#ifndef CONFIG_DISCONTIGMEM	591	#ifdef CONFIG_FLATMEM
597	if (!mem_map)	592	if (!mem_map)
598	BUG();	593	BUG();
599	max_mapnr = max_low_pfn;	594	max_mapnr = max_low_pfn;


diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 77118bbf3d8b..4e5c8b36ad93 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c
@@ -47,3 +47,27 @@ paddr_to_nid(unsigned long paddr)
47		47
48	return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);	48	return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
49	}	49	}
		50
		51	#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA)
		52	/*
		53	* Because of holes evaluate on section limits.
		54	* If the section of memory exists, then return the node where the section
		55	* resides. Otherwise return node 0 as the default. This is used by
		56	* SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
		57	* the section resides.
		58	*/
		59	int early_pfn_to_nid(unsigned long pfn)
		60	{
		61	int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
		62
		63	for (i = 0; i < num_node_memblks; i++) {
		64	ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
		65	esec = (node_memblk[i].start_paddr + node_memblk[i].size +
		66	((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
		67	if (section >= ssec && section < esec)
		68	return node_memblk[i].nid;
		69	}
		70
		71	return 0;
		72	}
		73	#endif


diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 464557e4ed82..41105d454423 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
8	* Modified RID allocation for SMP	8	* Modified RID allocation for SMP
9	* Goutham Rao <goutham.rao@intel.com>	9	* Goutham Rao <goutham.rao@intel.com>
10	* IPI based ptc implementation and A-step IPI implementation.	10	* IPI based ptc implementation and A-step IPI implementation.
		11	* Rohit Seth <rohit.seth@intel.com>
		12	* Ken Chen <kenneth.w.chen@intel.com>
11	*/	13	*/
12	#include <linux/config.h>	14	#include <linux/config.h>
13	#include <linux/module.h>	15	#include <linux/module.h>
@@ -16,80 +18,83 @@
16	#include <linux/sched.h>	18	#include <linux/sched.h>
17	#include <linux/smp.h>	19	#include <linux/smp.h>
18	#include <linux/mm.h>	20	#include <linux/mm.h>
		21	#include <linux/bootmem.h>
19		22
20	#include <asm/delay.h>	23	#include <asm/delay.h>
21	#include <asm/mmu_context.h>	24	#include <asm/mmu_context.h>
22	#include <asm/pgalloc.h>	25	#include <asm/pgalloc.h>
23	#include <asm/pal.h>	26	#include <asm/pal.h>
24	#include <asm/tlbflush.h>	27	#include <asm/tlbflush.h>
		28	#include <asm/dma.h>
25		29
26	static struct {	30	static struct {
27	unsigned long mask; /* mask of supported purge page-sizes */	31	unsigned long mask; /* mask of supported purge page-sizes */
28	unsigned long max_bits; /* log2() of largest supported purge page-size */	32	unsigned long max_bits; /* log2 of largest supported purge page-size */
29	} purge;	33	} purge;
30		34
31	struct ia64_ctx ia64_ctx = {	35	struct ia64_ctx ia64_ctx = {
32	.lock = SPIN_LOCK_UNLOCKED,	36	.lock = SPIN_LOCK_UNLOCKED,
33	.next = 1,	37	.next = 1,
34	.limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35	.max_ctx = ~0U	38	.max_ctx = ~0U
36	};	39	};
37		40
38	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);	41	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39		42
40	/*	43	/*
		44	* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
		45	* Called after cpu_init() has setup ia64_ctx.max_ctx based on
		46	* maximum RID that is supported by boot CPU.
		47	*/
		48	void __init
		49	mmu_context_init (void)
		50	{
		51	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		52	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		53	}
		54
		55	/*
41	* Acquire the ia64_ctx.lock before calling this function!	56	* Acquire the ia64_ctx.lock before calling this function!
42	*/	57	*/
43	void	58	void
44	wrap_mmu_context (struct mm_struct *mm)	59	wrap_mmu_context (struct mm_struct *mm)
45	{	60	{
46	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;	61	int i, cpu;
47	struct task_struct *tsk;	62	unsigned long flush_bit;
48	int i;
49		63
50	if (ia64_ctx.next > max_ctx)	64	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
51	ia64_ctx.next = 300; /* skip daemons */	65	flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
52	ia64_ctx.limit = max_ctx + 1;	66	ia64_ctx.bitmap[i] ^= flush_bit;
		67	}
		68
		69	/* use offset at 300 to skip daemons */
		70	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
		71	ia64_ctx.max_ctx, 300);
		72	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		73	ia64_ctx.max_ctx, ia64_ctx.next);
53		74
54	/*	75	/*
55	* Scan all the task's mm->context and set proper safe range	76	* can't call flush_tlb_all() here because of race condition
		77	* with O(1) scheduler [EF]
56	*/	78	*/
57		79	cpu = get_cpu(); /* prevent preemption/migration */
58	read_lock(&tasklist_lock);	80	for_each_online_cpu(i)
59	repeat:	81	if (i != cpu)
60	for_each_process(tsk) {	82	per_cpu(ia64_need_tlb_flush, i) = 1;
61	if (!tsk->mm)	83	put_cpu();
62	continue;
63	tsk_context = tsk->mm->context;
64	if (tsk_context == ia64_ctx.next) {
65	if (++ia64_ctx.next >= ia64_ctx.limit) {
66	/* empty range: reset the range limit and start over */
67	if (ia64_ctx.next > max_ctx)
68	ia64_ctx.next = 300;
69	ia64_ctx.limit = max_ctx + 1;
70	goto repeat;
71	}
72	}
73	if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74	ia64_ctx.limit = tsk_context;
75	}
76	read_unlock(&tasklist_lock);
77	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78	{
79	int cpu = get_cpu(); /* prevent preemption/migration */
80	for (i = 0; i < NR_CPUS; ++i)
81	if (cpu_online(i) && (i != cpu))
82	per_cpu(ia64_need_tlb_flush, i) = 1;
83	put_cpu();
84	}
85	local_flush_tlb_all();	84	local_flush_tlb_all();
86	}	85	}
87		86
88	void	87	void
89	ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)	88	ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
		89	unsigned long end, unsigned long nbits)
90	{	90	{
91	static DEFINE_SPINLOCK(ptcg_lock);	91	static DEFINE_SPINLOCK(ptcg_lock);
92		92
		93	if (mm != current->active_mm) {
		94	flush_tlb_all();
		95	return;
		96	}
		97
93	/* HW requires global serialization of ptc.ga. */	98	/* HW requires global serialization of ptc.ga. */
94	spin_lock(&ptcg_lock);	99	spin_lock(&ptcg_lock);
95	{	100	{
@@ -129,36 +134,37 @@ local_flush_tlb_all (void)
129	}	134	}
130		135
131	void	136	void
132	flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)	137	flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
		138	unsigned long end)
133	{	139	{
134	struct mm_struct *mm = vma->vm_mm;	140	struct mm_struct *mm = vma->vm_mm;
135	unsigned long size = end - start;	141	unsigned long size = end - start;
136	unsigned long nbits;	142	unsigned long nbits;
137		143
		144	#ifndef CONFIG_SMP
138	if (mm != current->active_mm) {	145	if (mm != current->active_mm) {
139	/* this does happen, but perhaps it's not worth optimizing for? */
140	#ifdef CONFIG_SMP
141	flush_tlb_all();
142	#else
143	mm->context = 0;	146	mm->context = 0;
144	#endif
145	return;	147	return;
146	}	148	}
		149	#endif
147		150
148	nbits = ia64_fls(size + 0xfff);	151	nbits = ia64_fls(size + 0xfff);
149	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))	152	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
		153	(nbits < purge.max_bits))
150	++nbits;	154	++nbits;
151	if (nbits > purge.max_bits)	155	if (nbits > purge.max_bits)
152	nbits = purge.max_bits;	156	nbits = purge.max_bits;
153	start &= ~((1UL << nbits) - 1);	157	start &= ~((1UL << nbits) - 1);
154		158
155	# ifdef CONFIG_SMP	159	# ifdef CONFIG_SMP
156	platform_global_tlb_purge(start, end, nbits);	160	platform_global_tlb_purge(mm, start, end, nbits);
157	# else	161	# else
		162	preempt_disable();
158	do {	163	do {
159	ia64_ptcl(start, (nbits<<2));	164	ia64_ptcl(start, (nbits<<2));
160	start += (1UL << nbits);	165	start += (1UL << nbits);
161	} while (start < end);	166	} while (start < end);
		167	preempt_enable();
162	# endif	168	# endif
163		169
164	ia64_srlz_i(); /* srlz.i implies srlz.d */	170	ia64_srlz_i(); /* srlz.i implies srlz.d */
@@ -186,5 +192,5 @@ ia64_tlb_init (void)
186	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];	192	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
187	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];	193	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
188		194
189	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */	195	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
190	}	196	}