5 files changed, 112 insertions, 115 deletions
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 6daf15ac8940..91a055f5731f 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -61,7 +61,8 @@ show_mem (void)
        printk("%d reserved pages\n", reserved);
        printk("%d pages shared\n", shared);
        printk("%d pages swap cached\n", cached);
-        printk("%ld pages in page table cache\n", pgtable_cache_size);
+        printk("%ld pages in page table cache\n",
+                pgtable_quicklist_total_size());
 }
 /* physical address where the bootmem map is located */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 3456a9b6971e..c00710929390 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -582,7 +582,8 @@ void show_mem(void)
        printk("%d reserved pages\n", total_reserved);
        printk("%d pages shared\n", total_shared);
        printk("%d pages swap cached\n", total_cached);
-        printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
+        printk("Total of %ld pages in page table cache\n",
+                pgtable_quicklist_total_size());
        printk("%d free buffer pages\n", nr_free_buffer_pages());
 }
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 65cf839573ea..4892be53e227 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -39,6 +39,9 @@
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+DEFINE_PER_CPU(long, __pgtable_quicklist_size);
 extern void ia64_tlb_init (void);
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -50,27 +53,53 @@ struct page *vmem_map;
 EXPORT_SYMBOL(vmem_map);
 #endif
-static int pgt_cache_water[2] = { 25, 50 };
+struct page *zero_page_memmap_ptr;      /* map entry for zero page */
-struct page *zero_page_memmap_ptr;              /* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
+#define MIN_PGT_PAGES                   25UL
+#define MAX_PGT_FREES_PER_PASS          16
+#define PGT_FRACTION_OF_NODE_MEM        16
+static inline long
+max_pgt_pages(void)
+{
+        u64 node_free_pages, max_pgt_pages;
+#ifndef CONFIG_NUMA
+        node_free_pages = nr_free_pages();
+#else
+        node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+        max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+        max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+        return max_pgt_pages;
+}
+static inline long
+min_pages_to_free(void)
+{
+        long pages_to_free;
+        pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+        pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+        return pages_to_free;
+}
 void
-check_pgt_cache (void)
+check_pgt_cache(void)
 {
-        int low, high;
+        long pages_to_free;
-        low = pgt_cache_water[0];
+        if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
-        high = pgt_cache_water[1];
+                return;
        preempt_disable();
-        if (pgtable_cache_size > (u64) high) {
+        while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
-                do {
+                while (pages_to_free--) {
-                        if (pgd_quicklist)
+                        free_page((unsigned long)pgtable_quicklist_alloc());
-                                free_page((unsigned long)pgd_alloc_one_fast(NULL));
+                }
-                        if (pmd_quicklist)
+                preempt_enable();
-                                free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
+                preempt_disable();
-                } while (pgtable_cache_size > (u64) low);
        }
        preempt_enable();
 }
@@ -523,11 +552,14 @@ void
 mem_init (void)
 {
        long reserved_pages, codesize, datasize, initsize;
-        unsigned long num_pgt_pages;
        pg_data_t *pgdat;
        int i;
        static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
+        BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+        BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+        BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
 #ifdef CONFIG_PCI
        /*
         * This needs to be called _after_ the command line has been parsed but _before_
@@ -564,18 +596,6 @@ mem_init (void)
               num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
               reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
-        /*
-         * Allow for enough (cached) page table pages so that we can map the entire memory
-         * at least once.  Each task also needs a couple of page tables pages, so add in a
-         * fudge factor for that (don't use "threads-max" here; that would be wrong!).
-         * Don't allow the cache to be more than 10% of total memory, though.
-         */
-#       define NUM_TASKS        500     /* typical number of tasks */
-        num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
-        if (num_pgt_pages > nr_free_pages() / 10)
-                num_pgt_pages = nr_free_pages() / 10;
-        if (num_pgt_pages > (u64) pgt_cache_water[1])
-                pgt_cache_water[1] = num_pgt_pages;
        /*
         * For fsyscall entrpoints with no light-weight handler, use the ordinary
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 0f05dc8bd460..e86a8c331ee6 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -22,146 +22,124 @@
 #include <asm/mmu_context.h>
-/*
+DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
- * Very stupidly, we used to get new pgd's and pmd's, init their contents
+#define pgtable_quicklist __ia64_per_cpu_var(__pgtable_quicklist)
- * to point to the NULL versions of the next level page table, later on
+DECLARE_PER_CPU(long, __pgtable_quicklist_size);
- * completely re-init them the same way, then free them up.  This wasted
+#define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size)
- * a lot of work and caused unnecessary memory traffic.  How broken...
- * We fix this by caching them.
- */
-#define pgd_quicklist           (local_cpu_data->pgd_quick)
-#define pmd_quicklist           (local_cpu_data->pmd_quick)
-#define pgtable_cache_size      (local_cpu_data->pgtable_cache_sz)
-static inline pgd_t*
+static inline long pgtable_quicklist_total_size(void)
-pgd_alloc_one_fast (struct mm_struct *mm)
+{
+        long ql_size;
+        int cpuid;
+        for_each_online_cpu(cpuid) {
+                ql_size += per_cpu(__pgtable_quicklist_size, cpuid);
+        }
+        return ql_size;
+}
+static inline void *pgtable_quicklist_alloc(void)
 {
        unsigned long *ret = NULL;
        preempt_disable();
-        ret = pgd_quicklist;
+        ret = pgtable_quicklist;
        if (likely(ret != NULL)) {
-                pgd_quicklist = (unsigned long *)(*ret);
+                pgtable_quicklist = (unsigned long *)(*ret);
                ret[0] = 0;
-                --pgtable_cache_size;
+                --pgtable_quicklist_size;
-        } else
+        } else {
-                ret = NULL;
+                ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+        }
        preempt_enable();
-        return (pgd_t *) ret;
+        return ret;
 }
-static inline pgd_t*
+static inline void pgtable_quicklist_free(void *pgtable_entry)
-pgd_alloc (struct mm_struct *mm)
 {
-        /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
+#ifdef CONFIG_NUMA
-        pgd_t *pgd = pgd_alloc_one_fast(mm);
+        unsigned long nid = page_to_nid(virt_to_page(pgtable_entry));
-        if (unlikely(pgd == NULL)) {
+        if (unlikely(nid != numa_node_id())) {
-                pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+                free_page((unsigned long)pgtable_entry);
+                return;
        }
-        return pgd;
+#endif
-}
-static inline void
-pgd_free (pgd_t *pgd)
-{
        preempt_disable();
-        *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
+        *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
-        pgd_quicklist = (unsigned long *) pgd;
+        pgtable_quicklist = (unsigned long *)pgtable_entry;
-        ++pgtable_cache_size;
+        ++pgtable_quicklist_size;
        preempt_enable();
 }
-static inline void
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-pud_populate (struct mm_struct *mm, pud_t *pud_entry, pmd_t *pmd)
 {
-        pud_val(*pud_entry) = __pa(pmd);
+        return pgtable_quicklist_alloc();
 }
-static inline pmd_t*
+static inline void pgd_free(pgd_t * pgd)
-pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
 {
-        unsigned long *ret = NULL;
+        pgtable_quicklist_free(pgd);
-        preempt_disable();
-        ret = (unsigned long *)pmd_quicklist;
-        if (likely(ret != NULL)) {
-                pmd_quicklist = (unsigned long *)(*ret);
-                ret[0] = 0;
-                --pgtable_cache_size;
-        }
-        preempt_enable();
-        return (pmd_t *)ret;
 }
-static inline pmd_t*
+static inline void
-pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 {
-        pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+        pud_val(*pud_entry) = __pa(pmd);
+}
-        return pmd;
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+        return pgtable_quicklist_alloc();
 }
-static inline void
+static inline void pmd_free(pmd_t * pmd)
-pmd_free (pmd_t *pmd)
 {
-        preempt_disable();
+        pgtable_quicklist_free(pmd);
-        *(unsigned long *)pmd = (unsigned long) pmd_quicklist;
-        pmd_quicklist = (unsigned long *) pmd;
-        ++pgtable_cache_size;
-        preempt_enable();
 }
 #define __pmd_free_tlb(tlb, pmd)        pmd_free(pmd)
 static inline void
-pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
 {
        pmd_val(*pmd_entry) = page_to_phys(pte);
 }
 static inline void
-pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 {
        pmd_val(*pmd_entry) = __pa(pte);
 }
-static inline struct page *
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
-pte_alloc_one (struct mm_struct *mm, unsigned long addr)
+                                         unsigned long addr)
 {
-        struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+        return virt_to_page(pgtable_quicklist_alloc());
-        return pte;
 }
-static inline pte_t *
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
+                                          unsigned long addr)
 {
-        pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+        return pgtable_quicklist_alloc();
-        return pte;
 }
-static inline void
+static inline void pte_free(struct page *pte)
-pte_free (struct page *pte)
 {
-        __free_page(pte);
+        pgtable_quicklist_free(page_address(pte));
 }
-static inline void
+static inline void pte_free_kernel(pte_t * pte)
-pte_free_kernel (pte_t *pte)
 {
-        free_page((unsigned long) pte);
+        pgtable_quicklist_free(pte);
 }
-#define __pte_free_tlb(tlb, pte)        tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb, pte)        pte_free(pte)
-extern void check_pgt_cache (void);
+extern void check_pgt_cache(void);
-#endif /* _ASM_IA64_PGALLOC_H */
+#endif                          /* _ASM_IA64_PGALLOC_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 2807f8d766d4..983798ec1791 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -137,9 +137,6 @@ struct cpuinfo_ia64 {
        __u64 nsec_per_cyc;     /* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
        __u64 unimpl_va_mask;   /* mask of unimplemented virtual address bits (from PAL) */
        __u64 unimpl_pa_mask;   /* mask of unimplemented physical address bits (from PAL) */
-        __u64 *pgd_quick;
-        __u64 *pmd_quick;
-        __u64 pgtable_cache_sz;
        __u64 itc_freq;         /* frequency of ITC counter */
        __u64 proc_freq;        /* frequency of processor */
        __u64 cyc_per_usec;     /* itc_freq/1000000 */