Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6: (230 commits) [SPARC64]: Update defconfig. [SPARC64]: Fix 2 bugs in huge page support. [SPARC64]: CONFIG_BLK_DEV_RAM fix [SPARC64]: Optimized TSB table initialization. [SPARC64]: Allow CONFIG_MEMORY_HOTPLUG to build. [SPARC64]: Use SLAB caches for TSB tables. [SPARC64]: Don't kill the page allocator when growing a TSB. [SPARC64]: Randomize mm->mmap_base when PF_RANDOMIZE is set. [SPARC64]: Increase top of 32-bit process stack. [SPARC64]: Top-down address space allocation for 32-bit tasks. [SPARC64] bbc_i2c: Fix cpu check and add missing module license. [SPARC64]: Fix and re-enable dynamic TSB sizing. [SUNSU]: Fix missing spinlock initialization. [TG3]: Do not try to access NIC_SRAM_DATA_SIG on Sun parts. [SPARC64]: First cut at VIS simulator for Niagara. [SPARC64]: Fix system type in /proc/cpuinfo and remove bogus OBP check. [SPARC64]: Add SMT scheduling support for Niagara. [SPARC64]: Fix 32-bit truncation which broke sparsemem. [SPARC64]: Move over to sparsemem. [SPARC64]: Fix new context version SMP handling. ...
author: Linus Torvalds <torvalds@g5.osdl.org> 2006-03-20 14:57:50 -0500
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-03-20 14:57:50 -0500
commit: c4a1745aa09fc110afdefea0e5d025043e348bae (patch)
tree: 6d28dc3a0c1bf18437b3d49f28e5c81b850cdb2f /arch/sparc64/mm/tsb.c
parent: 88dcb91177cfa5b26143a29074389a2aa259c7cf (diff)
parent: ac0eb3eb7e54b700386068be025a43d2a3958ee5 (diff)
1 files changed, 440 insertions, 0 deletions
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..b2064e2a44d6
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,440 @@
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/tsb.h>
+#include <asm/oplib.h>
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
+{
+        vaddr >>= PAGE_SHIFT;
+        return vaddr & (nentries - 1);
+}
+static inline int tag_compare(unsigned long tag, unsigned long vaddr)
+{
+        return (tag == (vaddr >> 22));
+}
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+        unsigned long v;
+        for (v = start; v < end; v += PAGE_SIZE) {
+                unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
+                struct tsb *ent = &swapper_tsb[hash];
+                if (tag_compare(ent->tag, v)) {
+                        ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+                        membar_storeload_storestore();
+                }
+        }
+}
+void flush_tsb_user(struct mmu_gather *mp)
+{
+        struct mm_struct *mm = mp->mm;
+        unsigned long nentries, base, flags;
+        struct tsb *tsb;
+        int i;
+        spin_lock_irqsave(&mm->context.lock, flags);
+        tsb = mm->context.tsb;
+        nentries = mm->context.tsb_nentries;
+        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+                base = __pa(tsb);
+        else
+                base = (unsigned long) tsb;
+        
+        for (i = 0; i < mp->tlb_nr; i++) {
+                unsigned long v = mp->vaddrs[i];
+                unsigned long tag, ent, hash;
+                v &= ~0x1UL;
+                hash = tsb_hash(v, nentries);
+                ent = base + (hash * sizeof(struct tsb));
+                tag = (v >> 22UL);
+                tsb_flush(ent, tag);
+        }
+        spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
+{
+        unsigned long tsb_reg, base, tsb_paddr;
+        unsigned long page_sz, tte;
+        mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
+        base = TSBMAP_BASE;
+        tte = pgprot_val(PAGE_KERNEL_LOCKED);
+        tsb_paddr = __pa(mm->context.tsb);
+        BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
+        /* Use the smallest page size that can map the whole TSB
+         * in one TLB entry.
+         */
+        switch (tsb_bytes) {
+        case 8192 << 0:
+                tsb_reg = 0x0UL;
+#ifdef DCACHE_ALIASING_POSSIBLE
+                base += (tsb_paddr & 8192);
+#endif
+                page_sz = 8192;
+                break;
+        case 8192 << 1:
+                tsb_reg = 0x1UL;
+                page_sz = 64 * 1024;
+                break;
+        case 8192 << 2:
+                tsb_reg = 0x2UL;
+                page_sz = 64 * 1024;
+                break;
+        case 8192 << 3:
+                tsb_reg = 0x3UL;
+                page_sz = 64 * 1024;
+                break;
+        case 8192 << 4:
+                tsb_reg = 0x4UL;
+                page_sz = 512 * 1024;
+                break;
+        case 8192 << 5:
+                tsb_reg = 0x5UL;
+                page_sz = 512 * 1024;
+                break;
+        case 8192 << 6:
+                tsb_reg = 0x6UL;
+                page_sz = 512 * 1024;
+                break;
+        case 8192 << 7:
+                tsb_reg = 0x7UL;
+                page_sz = 4 * 1024 * 1024;
+                break;
+        default:
+                BUG();
+        };
+        tte |= pte_sz_bits(page_sz);
+        if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+                /* Physical mapping, no locked TLB entry for TSB.  */
+                tsb_reg |= tsb_paddr;
+                mm->context.tsb_reg_val = tsb_reg;
+                mm->context.tsb_map_vaddr = 0;
+                mm->context.tsb_map_pte = 0;
+        } else {
+                tsb_reg |= base;
+                tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+                tte |= (tsb_paddr & ~(page_sz - 1UL));
+                mm->context.tsb_reg_val = tsb_reg;
+                mm->context.tsb_map_vaddr = base;
+                mm->context.tsb_map_pte = tte;
+        }
+        /* Setup the Hypervisor TSB descriptor.  */
+        if (tlb_type == hypervisor) {
+                struct hv_tsb_descr *hp = &mm->context.tsb_descr;
+                switch (PAGE_SIZE) {
+                case 8192:
+                default:
+                        hp->pgsz_idx = HV_PGSZ_IDX_8K;
+                        break;
+                case 64 * 1024:
+                        hp->pgsz_idx = HV_PGSZ_IDX_64K;
+                        break;
+                case 512 * 1024:
+                        hp->pgsz_idx = HV_PGSZ_IDX_512K;
+                        break;
+                case 4 * 1024 * 1024:
+                        hp->pgsz_idx = HV_PGSZ_IDX_4MB;
+                        break;
+                };
+                hp->assoc = 1;
+                hp->num_ttes = tsb_bytes / 16;
+                hp->ctx_idx = 0;
+                switch (PAGE_SIZE) {
+                case 8192:
+                default:
+                        hp->pgsz_mask = HV_PGSZ_MASK_8K;
+                        break;
+                case 64 * 1024:
+                        hp->pgsz_mask = HV_PGSZ_MASK_64K;
+                        break;
+                case 512 * 1024:
+                        hp->pgsz_mask = HV_PGSZ_MASK_512K;
+                        break;
+                case 4 * 1024 * 1024:
+                        hp->pgsz_mask = HV_PGSZ_MASK_4MB;
+                        break;
+                };
+                hp->tsb_base = tsb_paddr;
+                hp->resv = 0;
+        }
+}
+static kmem_cache_t *tsb_caches[8] __read_mostly;
+static const char *tsb_cache_names[8] = {
+        "tsb_8KB",
+        "tsb_16KB",
+        "tsb_32KB",
+        "tsb_64KB",
+        "tsb_128KB",
+        "tsb_256KB",
+        "tsb_512KB",
+        "tsb_1MB",
+};
+void __init tsb_cache_init(void)
+{
+        unsigned long i;
+        for (i = 0; i < 8; i++) {
+                unsigned long size = 8192 << i;
+                const char *name = tsb_cache_names[i];
+                tsb_caches[i] = kmem_cache_create(name,
+                                                  size, size,
+                                                  SLAB_HWCACHE_ALIGN |
+                                                  SLAB_MUST_HWCACHE_ALIGN,
+                                                  NULL, NULL);
+                if (!tsb_caches[i]) {
+                        prom_printf("Could not create %s cache\n", name);
+                        prom_halt();
+                }
+        }
+}
+/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
+ * do_sparc64_fault() invokes this routine to try and grow the TSB.
+ *
+ * When we reach the maximum TSB size supported, we stick ~0UL into
+ * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
+ * will not trigger any longer.
+ *
+ * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
+ * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
+ * must be 512K aligned.  It also must be physically contiguous, so we
+ * cannot use vmalloc().
+ *
+ * The idea here is to grow the TSB when the RSS of the process approaches
+ * the number of entries that the current TSB can hold at once.  Currently,
+ * we trigger when the RSS hits 3/4 of the TSB capacity.
+ */
+void tsb_grow(struct mm_struct *mm, unsigned long rss)
+{
+        unsigned long max_tsb_size = 1 * 1024 * 1024;
+        unsigned long new_size, old_size, flags;
+        struct tsb *old_tsb, *new_tsb;
+        unsigned long new_cache_index, old_cache_index;
+        unsigned long new_rss_limit;
+        gfp_t gfp_flags;
+        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
+                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
+        new_cache_index = 0;
+        for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
+                unsigned long n_entries = new_size / sizeof(struct tsb);
+                n_entries = (n_entries * 3) / 4;
+                if (n_entries > rss)
+                        break;
+                new_cache_index++;
+        }
+        if (new_size == max_tsb_size)
+                new_rss_limit = ~0UL;
+        else
+                new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4;
+retry_tsb_alloc:
+        gfp_flags = GFP_KERNEL;
+        if (new_size > (PAGE_SIZE * 2))
+                gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
+        new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
+        if (unlikely(!new_tsb)) {
+                /* Not being able to fork due to a high-order TSB
+                 * allocation failure is very bad behavior.  Just back
+                 * down to a 0-order allocation and force no TSB
+                 * growing for this address space.
+                 */
+                if (mm->context.tsb == NULL && new_cache_index > 0) {
+                        new_cache_index = 0;
+                        new_size = 8192;
+                        new_rss_limit = ~0UL;
+                        goto retry_tsb_alloc;
+                }
+                /* If we failed on a TSB grow, we are under serious
+                 * memory pressure so don't try to grow any more.
+                 */
+                if (mm->context.tsb != NULL)
+                        mm->context.tsb_rss_limit = ~0UL;
+                return;
+        }
+        /* Mark all tags as invalid.  */
+        tsb_init(new_tsb, new_size);
+        /* Ok, we are about to commit the changes.  If we are
+         * growing an existing TSB the locking is very tricky,
+         * so WATCH OUT!
+         *
+         * We have to hold mm->context.lock while committing to the
+         * new TSB, this synchronizes us with processors in
+         * flush_tsb_user() and switch_mm() for this address space.
+         *
+         * But even with that lock held, processors run asynchronously
+         * accessing the old TSB via TLB miss handling.  This is OK
+         * because those actions are just propagating state from the
+         * Linux page tables into the TSB, page table mappings are not
+         * being changed.  If a real fault occurs, the processor will
+         * synchronize with us when it hits flush_tsb_user(), this is
+         * also true for the case where vmscan is modifying the page
+         * tables.  The only thing we need to be careful with is to
+         * skip any locked TSB entries during copy_tsb().
+         *
+         * When we finish committing to the new TSB, we have to drop
+         * the lock and ask all other cpus running this address space
+         * to run tsb_context_switch() to see the new TSB table.
+         */
+        spin_lock_irqsave(&mm->context.lock, flags);
+        old_tsb = mm->context.tsb;
+        old_cache_index = (mm->context.tsb_reg_val & 0x7UL);
+        old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+        /* Handle multiple threads trying to grow the TSB at the same time.
+         * One will get in here first, and bump the size and the RSS limit.
+         * The others will get in here next and hit this check.
+         */
+        if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
+                spin_unlock_irqrestore(&mm->context.lock, flags);
+                kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
+                return;
+        }
+        mm->context.tsb_rss_limit = new_rss_limit;
+        if (old_tsb) {
+                extern void copy_tsb(unsigned long old_tsb_base,
+                                     unsigned long old_tsb_size,
+                                     unsigned long new_tsb_base,
+                                     unsigned long new_tsb_size);
+                unsigned long old_tsb_base = (unsigned long) old_tsb;
+                unsigned long new_tsb_base = (unsigned long) new_tsb;
+                if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+                        old_tsb_base = __pa(old_tsb_base);
+                        new_tsb_base = __pa(new_tsb_base);
+                }
+                copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+        }
+        mm->context.tsb = new_tsb;
+        setup_tsb_params(mm, new_size);
+        spin_unlock_irqrestore(&mm->context.lock, flags);
+        /* If old_tsb is NULL, we're being invoked for the first time
+         * from init_new_context().
+         */
+        if (old_tsb) {
+                /* Reload it on the local cpu.  */
+                tsb_context_switch(mm);
+                /* Now force other processors to do the same.  */
+                smp_tsb_sync(mm);
+                /* Now it is safe to free the old tsb.  */
+                kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
+        }
+}
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+        spin_lock_init(&mm->context.lock);
+        mm->context.sparc64_ctx_val = 0UL;
+        /* copy_mm() copies over the parent's mm_struct before calling
+         * us, so we need to zero out the TSB pointer or else tsb_grow()
+         * will be confused and think there is an older TSB to free up.
+         */
+        mm->context.tsb = NULL;
+        /* If this is fork, inherit the parent's TSB size.  We would
+         * grow it to that size on the first page fault anyways.
+         */
+        tsb_grow(mm, get_mm_rss(mm));
+        if (unlikely(!mm->context.tsb))
+                return -ENOMEM;
+        return 0;
+}
+void destroy_context(struct mm_struct *mm)
+{
+        unsigned long flags, cache_index;
+        cache_index = (mm->context.tsb_reg_val & 0x7UL);
+        kmem_cache_free(tsb_caches[cache_index], mm->context.tsb);
+        /* We can remove these later, but for now it's useful
+         * to catch any bogus post-destroy_context() references
+         * to the TSB.
+         */
+        mm->context.tsb = NULL;
+        mm->context.tsb_reg_val = 0UL;
+        spin_lock_irqsave(&ctx_alloc_lock, flags);
+        if (CTX_VALID(mm->context)) {
+                unsigned long nr = CTX_NRBITS(mm->context);
+                mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
+        }
+        spin_unlock_irqrestore(&ctx_alloc_lock, flags);
+}
author	Linus Torvalds <torvalds@g5.osdl.org>	2006-03-20 14:57:50 -0500
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-03-20 14:57:50 -0500
commit	c4a1745aa09fc110afdefea0e5d025043e348bae (patch)
tree	6d28dc3a0c1bf18437b3d49f28e5c81b850cdb2f /arch/sparc64/mm/tsb.c
parent	88dcb91177cfa5b26143a29074389a2aa259c7cf (diff)
parent	ac0eb3eb7e54b700386068be025a43d2a3958ee5 (diff)

diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 000000000000..b2064e2a44d6 --- /dev/null +++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,440 @@
	1	/* arch/sparc64/mm/tsb.c
	2	*
	3	* Copyright (C) 2006 David S. Miller <davem@davemloft.net>
	4	*/
	5
	6	#include <linux/kernel.h>
	7	#include <asm/system.h>
	8	#include <asm/page.h>
	9	#include <asm/tlbflush.h>
	10	#include <asm/tlb.h>
	11	#include <asm/mmu_context.h>
	12	#include <asm/pgtable.h>
	13	#include <asm/tsb.h>
	14	#include <asm/oplib.h>
	15
	16	extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
	17
	18	static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
	19	{
	20	vaddr >>= PAGE_SHIFT;
	21	return vaddr & (nentries - 1);
	22	}
	23
	24	static inline int tag_compare(unsigned long tag, unsigned long vaddr)
	25	{
	26	return (tag == (vaddr >> 22));
	27	}
	28
	29	/* TSB flushes need only occur on the processor initiating the address
	30	* space modification, not on each cpu the address space has run on.
	31	* Only the TLB flush needs that treatment.
	32	*/
	33
	34	void flush_tsb_kernel_range(unsigned long start, unsigned long end)
	35	{
	36	unsigned long v;
	37
	38	for (v = start; v < end; v += PAGE_SIZE) {
	39	unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
	40	struct tsb *ent = &swapper_tsb[hash];
	41
	42	if (tag_compare(ent->tag, v)) {
	43	ent->tag = (1UL << TSB_TAG_INVALID_BIT);
	44	membar_storeload_storestore();
	45	}
	46	}
	47	}
	48
	49	void flush_tsb_user(struct mmu_gather *mp)
	50	{
	51	struct mm_struct *mm = mp->mm;
	52	unsigned long nentries, base, flags;
	53	struct tsb *tsb;
	54	int i;
	55
	56	spin_lock_irqsave(&mm->context.lock, flags);
	57
	58	tsb = mm->context.tsb;
	59	nentries = mm->context.tsb_nentries;
	60
	61	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor)
	62	base = __pa(tsb);
	63	else
	64	base = (unsigned long) tsb;
	65
	66	for (i = 0; i < mp->tlb_nr; i++) {
	67	unsigned long v = mp->vaddrs[i];
	68	unsigned long tag, ent, hash;
	69
	70	v &= ~0x1UL;
	71
	72	hash = tsb_hash(v, nentries);
	73	ent = base + (hash * sizeof(struct tsb));
	74	tag = (v >> 22UL);
	75
	76	tsb_flush(ent, tag);
	77	}
	78
	79	spin_unlock_irqrestore(&mm->context.lock, flags);
	80	}
	81
	82	static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
	83	{
	84	unsigned long tsb_reg, base, tsb_paddr;
	85	unsigned long page_sz, tte;
	86
	87	mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
	88
	89	base = TSBMAP_BASE;
	90	tte = pgprot_val(PAGE_KERNEL_LOCKED);
	91	tsb_paddr = __pa(mm->context.tsb);
	92	BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
	93
	94	/* Use the smallest page size that can map the whole TSB
	95	* in one TLB entry.
	96	*/
	97	switch (tsb_bytes) {
	98	case 8192 << 0:
	99	tsb_reg = 0x0UL;
	100	#ifdef DCACHE_ALIASING_POSSIBLE
	101	base += (tsb_paddr & 8192);
	102	#endif
	103	page_sz = 8192;
	104	break;
	105
	106	case 8192 << 1:
	107	tsb_reg = 0x1UL;
	108	page_sz = 64 * 1024;
	109	break;
	110
	111	case 8192 << 2:
	112	tsb_reg = 0x2UL;
	113	page_sz = 64 * 1024;
	114	break;
	115
	116	case 8192 << 3:
	117	tsb_reg = 0x3UL;
	118	page_sz = 64 * 1024;
	119	break;
	120
	121	case 8192 << 4:
	122	tsb_reg = 0x4UL;
	123	page_sz = 512 * 1024;
	124	break;
	125
	126	case 8192 << 5:
	127	tsb_reg = 0x5UL;
	128	page_sz = 512 * 1024;
	129	break;
	130
	131	case 8192 << 6:
	132	tsb_reg = 0x6UL;
	133	page_sz = 512 * 1024;
	134	break;
	135
	136	case 8192 << 7:
	137	tsb_reg = 0x7UL;
	138	page_sz = 4 * 1024 * 1024;
	139	break;
	140
	141	default:
	142	BUG();
	143	};
	144	tte \|= pte_sz_bits(page_sz);
	145
	146	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor) {
	147	/* Physical mapping, no locked TLB entry for TSB. */
	148	tsb_reg \|= tsb_paddr;
	149
	150	mm->context.tsb_reg_val = tsb_reg;
	151	mm->context.tsb_map_vaddr = 0;
	152	mm->context.tsb_map_pte = 0;
	153	} else {
	154	tsb_reg \|= base;
	155	tsb_reg \|= (tsb_paddr & (page_sz - 1UL));
	156	tte \|= (tsb_paddr & ~(page_sz - 1UL));
	157
	158	mm->context.tsb_reg_val = tsb_reg;
	159	mm->context.tsb_map_vaddr = base;
	160	mm->context.tsb_map_pte = tte;
	161	}
	162
	163	/* Setup the Hypervisor TSB descriptor. */
	164	if (tlb_type == hypervisor) {
	165	struct hv_tsb_descr *hp = &mm->context.tsb_descr;
	166
	167	switch (PAGE_SIZE) {
	168	case 8192:
	169	default:
	170	hp->pgsz_idx = HV_PGSZ_IDX_8K;
	171	break;
	172
	173	case 64 * 1024:
	174	hp->pgsz_idx = HV_PGSZ_IDX_64K;
	175	break;
	176
	177	case 512 * 1024:
	178	hp->pgsz_idx = HV_PGSZ_IDX_512K;
	179	break;
	180
	181	case 4 * 1024 * 1024:
	182	hp->pgsz_idx = HV_PGSZ_IDX_4MB;
	183	break;
	184	};
	185	hp->assoc = 1;
	186	hp->num_ttes = tsb_bytes / 16;
	187	hp->ctx_idx = 0;
	188	switch (PAGE_SIZE) {
	189	case 8192:
	190	default:
	191	hp->pgsz_mask = HV_PGSZ_MASK_8K;
	192	break;
	193
	194	case 64 * 1024:
	195	hp->pgsz_mask = HV_PGSZ_MASK_64K;
	196	break;
	197
	198	case 512 * 1024:
	199	hp->pgsz_mask = HV_PGSZ_MASK_512K;
	200	break;
	201
	202	case 4 * 1024 * 1024:
	203	hp->pgsz_mask = HV_PGSZ_MASK_4MB;
	204	break;
	205	};
	206	hp->tsb_base = tsb_paddr;
	207	hp->resv = 0;
	208	}
	209	}
	210
	211	static kmem_cache_t *tsb_caches[8] __read_mostly;
	212
	213	static const char *tsb_cache_names[8] = {
	214	"tsb_8KB",
	215	"tsb_16KB",
	216	"tsb_32KB",
	217	"tsb_64KB",
	218	"tsb_128KB",
	219	"tsb_256KB",
	220	"tsb_512KB",
	221	"tsb_1MB",
	222	};
	223
	224	void __init tsb_cache_init(void)
	225	{
	226	unsigned long i;
	227
	228	for (i = 0; i < 8; i++) {
	229	unsigned long size = 8192 << i;
	230	const char *name = tsb_cache_names[i];
	231
	232	tsb_caches[i] = kmem_cache_create(name,
	233	size, size,
	234	SLAB_HWCACHE_ALIGN \|
	235	SLAB_MUST_HWCACHE_ALIGN,
	236	NULL, NULL);
	237	if (!tsb_caches[i]) {
	238	prom_printf("Could not create %s cache\n", name);
	239	prom_halt();
	240	}
	241	}
	242	}
	243
	244	/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
	245	* do_sparc64_fault() invokes this routine to try and grow the TSB.
	246	*
	247	* When we reach the maximum TSB size supported, we stick ~0UL into
	248	* mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
	249	* will not trigger any longer.
	250	*
	251	* The TSB can be anywhere from 8K to 1MB in size, in increasing powers
	252	* of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
	253	* must be 512K aligned. It also must be physically contiguous, so we
	254	* cannot use vmalloc().
	255	*
	256	* The idea here is to grow the TSB when the RSS of the process approaches
	257	* the number of entries that the current TSB can hold at once. Currently,
	258	* we trigger when the RSS hits 3/4 of the TSB capacity.
	259	*/
	260	void tsb_grow(struct mm_struct *mm, unsigned long rss)
	261	{
	262	unsigned long max_tsb_size = 1 * 1024 * 1024;
	263	unsigned long new_size, old_size, flags;
	264	struct tsb old_tsb, new_tsb;
	265	unsigned long new_cache_index, old_cache_index;
	266	unsigned long new_rss_limit;
	267	gfp_t gfp_flags;
	268
	269	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
	270	max_tsb_size = (PAGE_SIZE << MAX_ORDER);
	271
	272	new_cache_index = 0;
	273	for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
	274	unsigned long n_entries = new_size / sizeof(struct tsb);
	275
	276	n_entries = (n_entries * 3) / 4;
	277	if (n_entries > rss)
	278	break;
	279
	280	new_cache_index++;
	281	}
	282
	283	if (new_size == max_tsb_size)
	284	new_rss_limit = ~0UL;
	285	else
	286	new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4;
	287
	288	retry_tsb_alloc:
	289	gfp_flags = GFP_KERNEL;
	290	if (new_size > (PAGE_SIZE * 2))
	291	gfp_flags = __GFP_NOWARN \| __GFP_NORETRY;
	292
	293	new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
	294	if (unlikely(!new_tsb)) {
	295	/* Not being able to fork due to a high-order TSB
	296	* allocation failure is very bad behavior. Just back
	297	* down to a 0-order allocation and force no TSB
	298	* growing for this address space.
	299	*/
	300	if (mm->context.tsb == NULL && new_cache_index > 0) {
	301	new_cache_index = 0;
	302	new_size = 8192;
	303	new_rss_limit = ~0UL;
	304	goto retry_tsb_alloc;
	305	}
	306
	307	/* If we failed on a TSB grow, we are under serious
	308	* memory pressure so don't try to grow any more.
	309	*/
	310	if (mm->context.tsb != NULL)
	311	mm->context.tsb_rss_limit = ~0UL;
	312	return;
	313	}
	314
	315	/* Mark all tags as invalid. */
	316	tsb_init(new_tsb, new_size);
	317
	318	/* Ok, we are about to commit the changes. If we are
	319	* growing an existing TSB the locking is very tricky,
	320	* so WATCH OUT!
	321	*
	322	* We have to hold mm->context.lock while committing to the
	323	* new TSB, this synchronizes us with processors in
	324	* flush_tsb_user() and switch_mm() for this address space.
	325	*
	326	* But even with that lock held, processors run asynchronously
	327	* accessing the old TSB via TLB miss handling. This is OK
	328	* because those actions are just propagating state from the
	329	* Linux page tables into the TSB, page table mappings are not
	330	* being changed. If a real fault occurs, the processor will
	331	* synchronize with us when it hits flush_tsb_user(), this is
	332	* also true for the case where vmscan is modifying the page
	333	* tables. The only thing we need to be careful with is to
	334	* skip any locked TSB entries during copy_tsb().
	335	*
	336	* When we finish committing to the new TSB, we have to drop
	337	* the lock and ask all other cpus running this address space
	338	* to run tsb_context_switch() to see the new TSB table.
	339	*/
	340	spin_lock_irqsave(&mm->context.lock, flags);
	341
	342	old_tsb = mm->context.tsb;
	343	old_cache_index = (mm->context.tsb_reg_val & 0x7UL);
	344	old_size = mm->context.tsb_nentries * sizeof(struct tsb);
	345
	346
	347	/* Handle multiple threads trying to grow the TSB at the same time.
	348	* One will get in here first, and bump the size and the RSS limit.
	349	* The others will get in here next and hit this check.
	350	*/
	351	if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
	352	spin_unlock_irqrestore(&mm->context.lock, flags);
	353
	354	kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
	355	return;
	356	}
	357
	358	mm->context.tsb_rss_limit = new_rss_limit;
	359
	360	if (old_tsb) {
	361	extern void copy_tsb(unsigned long old_tsb_base,
	362	unsigned long old_tsb_size,
	363	unsigned long new_tsb_base,
	364	unsigned long new_tsb_size);
	365	unsigned long old_tsb_base = (unsigned long) old_tsb;
	366	unsigned long new_tsb_base = (unsigned long) new_tsb;
	367
	368	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor) {
	369	old_tsb_base = __pa(old_tsb_base);
	370	new_tsb_base = __pa(new_tsb_base);
	371	}
	372	copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
	373	}
	374
	375	mm->context.tsb = new_tsb;
	376	setup_tsb_params(mm, new_size);
	377
	378	spin_unlock_irqrestore(&mm->context.lock, flags);
	379
	380	/* If old_tsb is NULL, we're being invoked for the first time
	381	* from init_new_context().
	382	*/
	383	if (old_tsb) {
	384	/* Reload it on the local cpu. */
	385	tsb_context_switch(mm);
	386
	387	/* Now force other processors to do the same. */
	388	smp_tsb_sync(mm);
	389
	390	/* Now it is safe to free the old tsb. */
	391	kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
	392	}
	393	}
	394
	395	int init_new_context(struct task_struct tsk, struct mm_struct mm)
	396	{
	397	spin_lock_init(&mm->context.lock);
	398
	399	mm->context.sparc64_ctx_val = 0UL;
	400
	401	/* copy_mm() copies over the parent's mm_struct before calling
	402	* us, so we need to zero out the TSB pointer or else tsb_grow()
	403	* will be confused and think there is an older TSB to free up.
	404	*/
	405	mm->context.tsb = NULL;
	406
	407	/* If this is fork, inherit the parent's TSB size. We would
	408	* grow it to that size on the first page fault anyways.
	409	*/
	410	tsb_grow(mm, get_mm_rss(mm));
	411
	412	if (unlikely(!mm->context.tsb))
	413	return -ENOMEM;
	414
	415	return 0;
	416	}
	417
	418	void destroy_context(struct mm_struct *mm)
	419	{
	420	unsigned long flags, cache_index;
	421
	422	cache_index = (mm->context.tsb_reg_val & 0x7UL);
	423	kmem_cache_free(tsb_caches[cache_index], mm->context.tsb);
	424
	425	/* We can remove these later, but for now it's useful
	426	* to catch any bogus post-destroy_context() references
	427	* to the TSB.
	428	*/
	429	mm->context.tsb = NULL;
	430	mm->context.tsb_reg_val = 0UL;
	431
	432	spin_lock_irqsave(&ctx_alloc_lock, flags);
	433
	434	if (CTX_VALID(mm->context)) {
	435	unsigned long nr = CTX_NRBITS(mm->context);
	436	mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
	437	}
	438
	439	spin_unlock_irqrestore(&ctx_alloc_lock, flags);
	440	}