8 files changed, 90 insertions, 29 deletions
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 8c2a8c937540..c1263fc390db 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -25,6 +25,7 @@
 #define HPAGE_MASK              (~(HPAGE_SIZE - 1UL))
 #define HUGETLB_PAGE_ORDER      (HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define REAL_HPAGE_PER_HPAGE    (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
 #endif
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index 26d9e7726867..ce2233f7e662 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 int hard_smp_processor_id(void);
 #define raw_smp_processor_id() (current_thread_info()->cpu)
+void smp_fill_in_cpu_possible_map(void);
 void smp_fill_in_sib_core_maps(void);
 void cpu_play_dead(void);
@@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu);
 #define smp_fill_in_sib_core_maps() do { } while (0)
 #define smp_fetch_global_regs() do { } while (0)
 #define smp_fetch_global_pmu() do { } while (0)
+#define smp_fill_in_cpu_possible_map() do { } while (0)
 #endif /* !(CONFIG_SMP) */
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 599f1207eed2..6b7331d198e9 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/start_kernel.h>
+#include <linux/bootmem.h>
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -50,6 +51,8 @@
 #include <asm/elf.h>
 #include <asm/mdesc.h>
 #include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
@@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void)
                pause_patch();
 }
+void __init alloc_irqstack_bootmem(void)
+{
+        unsigned int i, node;
+        for_each_possible_cpu(i) {
+                node = cpu_to_node(i);
+                softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+                                                        THREAD_SIZE,
+                                                        THREAD_SIZE, 0);
+                hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+                                                        THREAD_SIZE,
+                                                        THREAD_SIZE, 0);
+        }
+}
 void __init setup_arch(char **cmdline_p)
 {
        /* Initialize PROM console and command line. */
@@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p)
        paging_init();
        init_sparc64_elf_hwcap();
+        smp_fill_in_cpu_possible_map();
+        /*
+         * Once the OF device tree and MDESC have been setup and nr_cpus has
+         * been parsed, we know the list of possible cpus.  Therefore we can
+         * allocate the IRQ stacks.
+         */
+        alloc_irqstack_bootmem();
 }
 extern int stop_a_enabled;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8a6151a628ce..d3035ba6cd31 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void)
                xcall_deliver_impl = hypervisor_xcall_deliver;
 }
+void __init smp_fill_in_cpu_possible_map(void)
+{
+        int possible_cpus = num_possible_cpus();
+        int i;
+        if (possible_cpus > nr_cpu_ids)
+                possible_cpus = nr_cpu_ids;
+        for (i = 0; i < possible_cpus; i++)
+                set_cpu_possible(i, true);
+        for (; i < NR_CPUS; i++)
+                set_cpu_possible(i, false);
+}
 void smp_fill_in_sib_core_maps(void)
 {
        unsigned int i;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index e16fdd28a931..3f291d8c57f7 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -484,6 +484,7 @@ good_area:
                tsb_grow(mm, MM_TSB_BASE, mm_rss);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
+        mm_rss *= REAL_HPAGE_PER_HPAGE;
        if (unlikely(mm_rss >
                     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
                if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 65457c9f1365..7ac6b62fb7c1 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1160,7 +1160,7 @@ int __node_distance(int from, int to)
        return numa_latency[from][to];
 }
-static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
 {
        int i;
@@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
        return i;
 }
-static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
+static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
-                                          int index)
+                                                 u64 grp, int index)
 {
        u64 arc;
@@ -2081,7 +2081,6 @@ void __init paging_init(void)
 {
        unsigned long end_pfn, shift, phys_base;
        unsigned long real_end, i;
-        int node;
        setup_page_offset();
@@ -2250,21 +2249,6 @@ void __init paging_init(void)
        /* Setup bootmem... */
        last_valid_pfn = end_pfn = bootmem_init(phys_base);
-        /* Once the OF device tree and MDESC have been setup, we know
-         * the list of possible cpus.  Therefore we can allocate the
-         * IRQ stacks.
-         */
-        for_each_possible_cpu(i) {
-                node = cpu_to_node(i);
-                softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-                                                        THREAD_SIZE,
-                                                        THREAD_SIZE, 0);
-                hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-                                                        THREAD_SIZE,
-                                                        THREAD_SIZE, 0);
-        }
        kernel_physical_mapping_init();
        {
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 3659d37b4d81..c56a195c9071 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                return;
        if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
-                if (pmd_val(pmd) & _PAGE_PMD_HUGE)
+                /*
-                        mm->context.thp_pte_count++;
+                 * Note that this routine only sets pmds for THP pages.
-                else
+                 * Hugetlb pages are handled elsewhere.  We need to check
-                        mm->context.thp_pte_count--;
+                 * for huge zero page.  Huge zero pages are like hugetlb
+                 * pages in that there is no RSS, but there is the need
+                 * for TSB entries.  So, huge zero page counts go into
+                 * hugetlb_pte_count.
+                 */
+                if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+                        if (is_huge_zero_page(pmd_page(pmd)))
+                                mm->context.hugetlb_pte_count++;
+                        else
+                                mm->context.thp_pte_count++;
+                } else {
+                        if (is_huge_zero_page(pmd_page(orig)))
+                                mm->context.hugetlb_pte_count--;
+                        else
+                                mm->context.thp_pte_count--;
+                }
                /* Do not try to allocate the TSB hash table if we
                 * don't have one already.  We have various locks held
@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        }
 }
+/*
+ * This routine is only called when splitting a THP
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
 {
@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
        set_pmd_at(vma->vm_mm, address, pmdp, entry);
        flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+        /*
+         * set_pmd_at() will not be called in a way to decrement
+         * thp_pte_count when splitting a THP, so do it now.
+         * Sanity check pmd before doing the actual decrement.
+         */
+        if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+            !is_huge_zero_page(pmd_page(entry)))
+                (vma->vm_mm)->context.thp_pte_count--;
 }
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 6725ed45580e..f2b77112e9d8 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -469,8 +469,10 @@ retry_tsb_alloc:
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
+        unsigned long mm_rss = get_mm_rss(mm);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-        unsigned long total_huge_pte_count;
+        unsigned long saved_hugetlb_pte_count;
+        unsigned long saved_thp_pte_count;
 #endif
        unsigned int i;
@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         * will re-increment the counters as the parent PTEs are
         * copied into the child address space.
         */
-        total_huge_pte_count = mm->context.hugetlb_pte_count +
+        saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
-                         mm->context.thp_pte_count;
+        saved_thp_pte_count = mm->context.thp_pte_count;
        mm->context.hugetlb_pte_count = 0;
        mm->context.thp_pte_count = 0;
+        mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
 #endif
        /* copy_mm() copies over the parent's mm_struct before calling
@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
        /* If this is fork, inherit the parent's TSB size.  We would
         * grow it to that size on the first page fault anyways.
         */
-        tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+        tsb_grow(mm, MM_TSB_BASE, mm_rss);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-        if (unlikely(total_huge_pte_count))
+        if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
-                tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
+                tsb_grow(mm, MM_TSB_HUGE,
+                         (saved_hugetlb_pte_count + saved_thp_pte_count) *
+                         REAL_HPAGE_PER_HPAGE);
 #endif
        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))