aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64/mm')
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/fault.c15
-rw-r--r--arch/sparc64/mm/generic.c40
-rw-r--r--arch/sparc64/mm/hugetlbpage.c179
-rw-r--r--arch/sparc64/mm/init.c1431
-rw-r--r--arch/sparc64/mm/tlb.c64
-rw-r--r--arch/sparc64/mm/tsb.c440
-rw-r--r--arch/sparc64/mm/ultra.S374
8 files changed, 1731 insertions, 814 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f48..e415bf942bcd 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
5EXTRA_AFLAGS := -ansi 5EXTRA_AFLAGS := -ansi
6EXTRA_CFLAGS := -Werror 6EXTRA_CFLAGS := -Werror
7 7
8obj-y := ultra.o tlb.o fault.o init.o generic.o 8obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o
9 9
10obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 10obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6f0539aa44d0..63b6cc0cd5d5 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -29,6 +29,7 @@
29#include <asm/lsu.h> 29#include <asm/lsu.h>
30#include <asm/sections.h> 30#include <asm/sections.h>
31#include <asm/kdebug.h> 31#include <asm/kdebug.h>
32#include <asm/mmu_context.h>
32 33
33/* 34/*
34 * To debug kernel to catch accesses to certain virtual/physical addresses. 35 * To debug kernel to catch accesses to certain virtual/physical addresses.
@@ -91,12 +92,13 @@ static void __kprobes unhandled_fault(unsigned long address,
91 die_if_kernel("Oops", regs); 92 die_if_kernel("Oops", regs);
92} 93}
93 94
94static void bad_kernel_pc(struct pt_regs *regs) 95static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
95{ 96{
96 unsigned long *ksp; 97 unsigned long *ksp;
97 98
98 printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", 99 printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
99 regs->tpc); 100 regs->tpc);
101 printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
100 __asm__("mov %%sp, %0" : "=r" (ksp)); 102 __asm__("mov %%sp, %0" : "=r" (ksp));
101 show_stack(current, ksp); 103 show_stack(current, ksp);
102 unhandled_fault(regs->tpc, current, regs); 104 unhandled_fault(regs->tpc, current, regs);
@@ -137,7 +139,7 @@ static unsigned int get_user_insn(unsigned long tpc)
137 if (!pte_present(pte)) 139 if (!pte_present(pte))
138 goto out; 140 goto out;
139 141
140 pa = (pte_val(pte) & _PAGE_PADDR); 142 pa = (pte_pfn(pte) << PAGE_SHIFT);
141 pa += (tpc & ~PAGE_MASK); 143 pa += (tpc & ~PAGE_MASK);
142 144
143 /* Use phys bypass so we don't pollute dtlb/dcache. */ 145 /* Use phys bypass so we don't pollute dtlb/dcache. */
@@ -257,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
257 struct vm_area_struct *vma; 259 struct vm_area_struct *vma;
258 unsigned int insn = 0; 260 unsigned int insn = 0;
259 int si_code, fault_code; 261 int si_code, fault_code;
260 unsigned long address; 262 unsigned long address, mm_rss;
261 263
262 fault_code = get_thread_fault_code(); 264 fault_code = get_thread_fault_code();
263 265
@@ -280,7 +282,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
280 (tpc >= MODULES_VADDR && tpc < MODULES_END)) { 282 (tpc >= MODULES_VADDR && tpc < MODULES_END)) {
281 /* Valid, no problems... */ 283 /* Valid, no problems... */
282 } else { 284 } else {
283 bad_kernel_pc(regs); 285 bad_kernel_pc(regs, address);
284 return; 286 return;
285 } 287 }
286 } 288 }
@@ -406,6 +408,11 @@ good_area:
406 } 408 }
407 409
408 up_read(&mm->mmap_sem); 410 up_read(&mm->mmap_sem);
411
412 mm_rss = get_mm_rss(mm);
413 if (unlikely(mm_rss >= mm->context.tsb_rss_limit))
414 tsb_grow(mm, mm_rss);
415
409 return; 416 return;
410 417
411 /* 418 /*
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index 580b63da836b..5fc5c579e35e 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -15,15 +15,6 @@
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
17 17
18static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space)
19{
20 pte_t pte;
21 pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) &
22 ~(unsigned long)_PAGE_CACHE);
23 pte_val(pte) |= (((unsigned long)space) << 32);
24 return pte;
25}
26
27/* Remap IO memory, the same way as remap_pfn_range(), but use 18/* Remap IO memory, the same way as remap_pfn_range(), but use
28 * the obio memory space. 19 * the obio memory space.
29 * 20 *
@@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
48 pte_t entry; 39 pte_t entry;
49 unsigned long curend = address + PAGE_SIZE; 40 unsigned long curend = address + PAGE_SIZE;
50 41
51 entry = mk_pte_io(offset, prot, space); 42 entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
52 if (!(address & 0xffff)) { 43 if (!(address & 0xffff)) {
53 if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { 44 if (PAGE_SIZE < (4 * 1024 * 1024) &&
54 entry = mk_pte_io(offset, 45 !(address & 0x3fffff) &&
55 __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), 46 !(offset & 0x3ffffe) &&
56 space); 47 end >= address + 0x400000) {
48 entry = mk_pte_io(offset, prot, space,
49 4 * 1024 * 1024);
57 curend = address + 0x400000; 50 curend = address + 0x400000;
58 offset += 0x400000; 51 offset += 0x400000;
59 } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { 52 } else if (PAGE_SIZE < (512 * 1024) &&
60 entry = mk_pte_io(offset, 53 !(address & 0x7ffff) &&
61 __pgprot(pgprot_val (prot) | _PAGE_SZ512K), 54 !(offset & 0x7fffe) &&
62 space); 55 end >= address + 0x80000) {
56 entry = mk_pte_io(offset, prot, space,
57 512 * 1024 * 1024);
63 curend = address + 0x80000; 58 curend = address + 0x80000;
64 offset += 0x80000; 59 offset += 0x80000;
65 } else if (!(offset & 0xfffe) && end >= address + 0x10000) { 60 } else if (PAGE_SIZE < (64 * 1024) &&
66 entry = mk_pte_io(offset, 61 !(offset & 0xfffe) &&
67 __pgprot(pgprot_val (prot) | _PAGE_SZ64K), 62 end >= address + 0x10000) {
68 space); 63 entry = mk_pte_io(offset, prot, space,
64 64 * 1024);
69 curend = address + 0x10000; 65 curend = address + 0x10000;
70 offset += 0x10000; 66 offset += 0x10000;
71 } else 67 } else
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 625cbb336a23..a7a24869d045 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * SPARC64 Huge TLB page support. 2 * SPARC64 Huge TLB page support.
3 * 3 *
4 * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) 4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
5 */ 5 */
6 6
7#include <linux/config.h> 7#include <linux/config.h>
@@ -22,6 +22,175 @@
22#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
23#include <asm/mmu_context.h> 23#include <asm/mmu_context.h>
24 24
25/* Slightly simplified from the non-hugepage variant because by
26 * definition we don't have to worry about any page coloring stuff
27 */
28#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
29#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
30
31static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
32 unsigned long addr,
33 unsigned long len,
34 unsigned long pgoff,
35 unsigned long flags)
36{
37 struct mm_struct *mm = current->mm;
38 struct vm_area_struct * vma;
39 unsigned long task_size = TASK_SIZE;
40 unsigned long start_addr;
41
42 if (test_thread_flag(TIF_32BIT))
43 task_size = STACK_TOP32;
44 if (unlikely(len >= VA_EXCLUDE_START))
45 return -ENOMEM;
46
47 if (len > mm->cached_hole_size) {
48 start_addr = addr = mm->free_area_cache;
49 } else {
50 start_addr = addr = TASK_UNMAPPED_BASE;
51 mm->cached_hole_size = 0;
52 }
53
54 task_size -= len;
55
56full_search:
57 addr = ALIGN(addr, HPAGE_SIZE);
58
59 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
60 /* At this point: (!vma || addr < vma->vm_end). */
61 if (addr < VA_EXCLUDE_START &&
62 (addr + len) >= VA_EXCLUDE_START) {
63 addr = VA_EXCLUDE_END;
64 vma = find_vma(mm, VA_EXCLUDE_END);
65 }
66 if (unlikely(task_size < addr)) {
67 if (start_addr != TASK_UNMAPPED_BASE) {
68 start_addr = addr = TASK_UNMAPPED_BASE;
69 mm->cached_hole_size = 0;
70 goto full_search;
71 }
72 return -ENOMEM;
73 }
74 if (likely(!vma || addr + len <= vma->vm_start)) {
75 /*
76 * Remember the place where we stopped the search:
77 */
78 mm->free_area_cache = addr + len;
79 return addr;
80 }
81 if (addr + mm->cached_hole_size < vma->vm_start)
82 mm->cached_hole_size = vma->vm_start - addr;
83
84 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
85 }
86}
87
88static unsigned long
89hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
90 const unsigned long len,
91 const unsigned long pgoff,
92 const unsigned long flags)
93{
94 struct vm_area_struct *vma;
95 struct mm_struct *mm = current->mm;
96 unsigned long addr = addr0;
97
98 /* This should only ever run for 32-bit processes. */
99 BUG_ON(!test_thread_flag(TIF_32BIT));
100
101 /* check if free_area_cache is useful for us */
102 if (len <= mm->cached_hole_size) {
103 mm->cached_hole_size = 0;
104 mm->free_area_cache = mm->mmap_base;
105 }
106
107 /* either no address requested or can't fit in requested address hole */
108 addr = mm->free_area_cache & HPAGE_MASK;
109
110 /* make sure it can fit in the remaining address space */
111 if (likely(addr > len)) {
112 vma = find_vma(mm, addr-len);
113 if (!vma || addr <= vma->vm_start) {
114 /* remember the address as a hint for next time */
115 return (mm->free_area_cache = addr-len);
116 }
117 }
118
119 if (unlikely(mm->mmap_base < len))
120 goto bottomup;
121
122 addr = (mm->mmap_base-len) & HPAGE_MASK;
123
124 do {
125 /*
126 * Lookup failure means no vma is above this address,
127 * else if new region fits below vma->vm_start,
128 * return with success:
129 */
130 vma = find_vma(mm, addr);
131 if (likely(!vma || addr+len <= vma->vm_start)) {
132 /* remember the address as a hint for next time */
133 return (mm->free_area_cache = addr);
134 }
135
136 /* remember the largest hole we saw so far */
137 if (addr + mm->cached_hole_size < vma->vm_start)
138 mm->cached_hole_size = vma->vm_start - addr;
139
140 /* try just below the current vma->vm_start */
141 addr = (vma->vm_start-len) & HPAGE_MASK;
142 } while (likely(len < vma->vm_start));
143
144bottomup:
145 /*
146 * A failed mmap() very likely causes application failure,
147 * so fall back to the bottom-up function here. This scenario
148 * can happen with large stack limits and large mmap()
149 * allocations.
150 */
151 mm->cached_hole_size = ~0UL;
152 mm->free_area_cache = TASK_UNMAPPED_BASE;
153 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
154 /*
155 * Restore the topdown base:
156 */
157 mm->free_area_cache = mm->mmap_base;
158 mm->cached_hole_size = ~0UL;
159
160 return addr;
161}
162
163unsigned long
164hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
165 unsigned long len, unsigned long pgoff, unsigned long flags)
166{
167 struct mm_struct *mm = current->mm;
168 struct vm_area_struct *vma;
169 unsigned long task_size = TASK_SIZE;
170
171 if (test_thread_flag(TIF_32BIT))
172 task_size = STACK_TOP32;
173
174 if (len & ~HPAGE_MASK)
175 return -EINVAL;
176 if (len > task_size)
177 return -ENOMEM;
178
179 if (addr) {
180 addr = ALIGN(addr, HPAGE_SIZE);
181 vma = find_vma(mm, addr);
182 if (task_size - len >= addr &&
183 (!vma || addr + len <= vma->vm_start))
184 return addr;
185 }
186 if (mm->get_unmapped_area == arch_get_unmapped_area)
187 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
188 pgoff, flags);
189 else
190 return hugetlb_get_unmapped_area_topdown(file, addr, len,
191 pgoff, flags);
192}
193
25pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 194pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
26{ 195{
27 pgd_t *pgd; 196 pgd_t *pgd;
@@ -48,12 +217,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
48 pmd_t *pmd; 217 pmd_t *pmd;
49 pte_t *pte = NULL; 218 pte_t *pte = NULL;
50 219
220 addr &= HPAGE_MASK;
221
51 pgd = pgd_offset(mm, addr); 222 pgd = pgd_offset(mm, addr);
52 if (pgd) { 223 if (!pgd_none(*pgd)) {
53 pud = pud_offset(pgd, addr); 224 pud = pud_offset(pgd, addr);
54 if (pud) { 225 if (!pud_none(*pud)) {
55 pmd = pmd_offset(pud, addr); 226 pmd = pmd_offset(pud, addr);
56 if (pmd) 227 if (!pmd_none(*pmd))
57 pte = pte_offset_map(pmd, addr); 228 pte = pte_offset_map(pmd, addr);
58 } 229 }
59 } 230 }
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee8..c2b556106fc1 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/config.h> 8#include <linux/config.h>
9#include <linux/module.h>
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/string.h> 12#include <linux/string.h>
@@ -39,9 +40,27 @@
39#include <asm/tlb.h> 40#include <asm/tlb.h>
40#include <asm/spitfire.h> 41#include <asm/spitfire.h>
41#include <asm/sections.h> 42#include <asm/sections.h>
43#include <asm/tsb.h>
44#include <asm/hypervisor.h>
42 45
43extern void device_scan(void); 46extern void device_scan(void);
44 47
48#define MAX_PHYS_ADDRESS (1UL << 42UL)
49#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
50#define KPTE_BITMAP_BYTES \
51 ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
52
53unsigned long kern_linear_pte_xor[2] __read_mostly;
54
55/* A bitmap, one bit for every 256MB of physical memory. If the bit
56 * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
57 * if set we should use a 256MB page (via kern_linear_pte_xor[1]).
58 */
59unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
60
61/* A special kernel TSB for 4MB and 256MB linear mappings. */
62struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
63
45#define MAX_BANKS 32 64#define MAX_BANKS 32
46 65
47static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; 66static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
@@ -111,11 +130,9 @@ static void __init read_obp_memory(const char *property,
111 130
112unsigned long *sparc64_valid_addr_bitmap __read_mostly; 131unsigned long *sparc64_valid_addr_bitmap __read_mostly;
113 132
114/* Ugly, but necessary... -DaveM */ 133/* Kernel physical address base and size in bytes. */
115unsigned long phys_base __read_mostly;
116unsigned long kern_base __read_mostly; 134unsigned long kern_base __read_mostly;
117unsigned long kern_size __read_mostly; 135unsigned long kern_size __read_mostly;
118unsigned long pfn_base __read_mostly;
119 136
120/* get_new_mmu_context() uses "cache + 1". */ 137/* get_new_mmu_context() uses "cache + 1". */
121DEFINE_SPINLOCK(ctx_alloc_lock); 138DEFINE_SPINLOCK(ctx_alloc_lock);
@@ -141,24 +158,28 @@ unsigned long sparc64_kern_sec_context __read_mostly;
141 158
142int bigkernel = 0; 159int bigkernel = 0;
143 160
144/* XXX Tune this... */ 161kmem_cache_t *pgtable_cache __read_mostly;
145#define PGT_CACHE_LOW 25 162
146#define PGT_CACHE_HIGH 50 163static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
164{
165 clear_page(addr);
166}
167
168extern void tsb_cache_init(void);
147 169
148void check_pgt_cache(void) 170void pgtable_cache_init(void)
149{ 171{
150 preempt_disable(); 172 pgtable_cache = kmem_cache_create("pgtable_cache",
151 if (pgtable_cache_size > PGT_CACHE_HIGH) { 173 PAGE_SIZE, PAGE_SIZE,
152 do { 174 SLAB_HWCACHE_ALIGN |
153 if (pgd_quicklist) 175 SLAB_MUST_HWCACHE_ALIGN,
154 free_pgd_slow(get_pgd_fast()); 176 zero_ctor,
155 if (pte_quicklist[0]) 177 NULL);
156 free_pte_slow(pte_alloc_one_fast(NULL, 0)); 178 if (!pgtable_cache) {
157 if (pte_quicklist[1]) 179 prom_printf("Could not create pgtable_cache\n");
158 free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10))); 180 prom_halt();
159 } while (pgtable_cache_size > PGT_CACHE_LOW);
160 } 181 }
161 preempt_enable(); 182 tsb_cache_init();
162} 183}
163 184
164#ifdef CONFIG_DEBUG_DCFLUSH 185#ifdef CONFIG_DEBUG_DCFLUSH
@@ -168,8 +189,9 @@ atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
168#endif 189#endif
169#endif 190#endif
170 191
171__inline__ void flush_dcache_page_impl(struct page *page) 192inline void flush_dcache_page_impl(struct page *page)
172{ 193{
194 BUG_ON(tlb_type == hypervisor);
173#ifdef CONFIG_DEBUG_DCFLUSH 195#ifdef CONFIG_DEBUG_DCFLUSH
174 atomic_inc(&dcpage_flushes); 196 atomic_inc(&dcpage_flushes);
175#endif 197#endif
@@ -186,8 +208,8 @@ __inline__ void flush_dcache_page_impl(struct page *page)
186} 208}
187 209
188#define PG_dcache_dirty PG_arch_1 210#define PG_dcache_dirty PG_arch_1
189#define PG_dcache_cpu_shift 24 211#define PG_dcache_cpu_shift 24UL
190#define PG_dcache_cpu_mask (256 - 1) 212#define PG_dcache_cpu_mask (256UL - 1UL)
191 213
192#if NR_CPUS > 256 214#if NR_CPUS > 256
193#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus 215#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
@@ -243,32 +265,61 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
243 : "g1", "g7"); 265 : "g1", "g7");
244} 266}
245 267
268static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
269{
270 unsigned long tsb_addr = (unsigned long) ent;
271
272 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
273 tsb_addr = __pa(tsb_addr);
274
275 __tsb_insert(tsb_addr, tag, pte);
276}
277
278unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
279unsigned long _PAGE_SZBITS __read_mostly;
280
246void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) 281void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
247{ 282{
248 struct page *page; 283 struct mm_struct *mm;
249 unsigned long pfn; 284 struct tsb *tsb;
250 unsigned long pg_flags; 285 unsigned long tag, flags;
251 286
252 pfn = pte_pfn(pte); 287 if (tlb_type != hypervisor) {
253 if (pfn_valid(pfn) && 288 unsigned long pfn = pte_pfn(pte);
254 (page = pfn_to_page(pfn), page_mapping(page)) && 289 unsigned long pg_flags;
255 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { 290 struct page *page;
256 int cpu = ((pg_flags >> PG_dcache_cpu_shift) & 291
257 PG_dcache_cpu_mask); 292 if (pfn_valid(pfn) &&
258 int this_cpu = get_cpu(); 293 (page = pfn_to_page(pfn), page_mapping(page)) &&
259 294 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
260 /* This is just to optimize away some function calls 295 int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
261 * in the SMP case. 296 PG_dcache_cpu_mask);
262 */ 297 int this_cpu = get_cpu();
263 if (cpu == this_cpu) 298
264 flush_dcache_page_impl(page); 299 /* This is just to optimize away some function calls
265 else 300 * in the SMP case.
266 smp_flush_dcache_page_impl(page, cpu); 301 */
302 if (cpu == this_cpu)
303 flush_dcache_page_impl(page);
304 else
305 smp_flush_dcache_page_impl(page, cpu);
267 306
268 clear_dcache_dirty_cpu(page, cpu); 307 clear_dcache_dirty_cpu(page, cpu);
269 308
270 put_cpu(); 309 put_cpu();
310 }
271 } 311 }
312
313 mm = vma->vm_mm;
314
315 spin_lock_irqsave(&mm->context.lock, flags);
316
317 tsb = &mm->context.tsb[(address >> PAGE_SHIFT) &
318 (mm->context.tsb_nentries - 1UL)];
319 tag = (address >> 22UL);
320 tsb_insert(tsb, tag, pte_val(pte));
321
322 spin_unlock_irqrestore(&mm->context.lock, flags);
272} 323}
273 324
274void flush_dcache_page(struct page *page) 325void flush_dcache_page(struct page *page)
@@ -276,6 +327,9 @@ void flush_dcache_page(struct page *page)
276 struct address_space *mapping; 327 struct address_space *mapping;
277 int this_cpu; 328 int this_cpu;
278 329
330 if (tlb_type == hypervisor)
331 return;
332
279 /* Do not bother with the expensive D-cache flush if it 333 /* Do not bother with the expensive D-cache flush if it
280 * is merely the zero page. The 'bigcore' testcase in GDB 334 * is merely the zero page. The 'bigcore' testcase in GDB
281 * causes this case to run millions of times. 335 * causes this case to run millions of times.
@@ -311,7 +365,7 @@ out:
311 365
312void __kprobes flush_icache_range(unsigned long start, unsigned long end) 366void __kprobes flush_icache_range(unsigned long start, unsigned long end)
313{ 367{
314 /* Cheetah has coherent I-cache. */ 368 /* Cheetah and Hypervisor platform cpus have coherent I-cache. */
315 if (tlb_type == spitfire) { 369 if (tlb_type == spitfire) {
316 unsigned long kaddr; 370 unsigned long kaddr;
317 371
@@ -320,16 +374,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
320 } 374 }
321} 375}
322 376
323unsigned long page_to_pfn(struct page *page)
324{
325 return (unsigned long) ((page - mem_map) + pfn_base);
326}
327
328struct page *pfn_to_page(unsigned long pfn)
329{
330 return (mem_map + (pfn - pfn_base));
331}
332
333void show_mem(void) 377void show_mem(void)
334{ 378{
335 printk("Mem-info:\n"); 379 printk("Mem-info:\n");
@@ -338,7 +382,6 @@ void show_mem(void)
338 nr_swap_pages << (PAGE_SHIFT-10)); 382 nr_swap_pages << (PAGE_SHIFT-10));
339 printk("%ld pages of RAM\n", num_physpages); 383 printk("%ld pages of RAM\n", num_physpages);
340 printk("%d free pages\n", nr_free_pages()); 384 printk("%d free pages\n", nr_free_pages());
341 printk("%d pages in page table cache\n",pgtable_cache_size);
342} 385}
343 386
344void mmu_info(struct seq_file *m) 387void mmu_info(struct seq_file *m)
@@ -349,6 +392,8 @@ void mmu_info(struct seq_file *m)
349 seq_printf(m, "MMU Type\t: Cheetah+\n"); 392 seq_printf(m, "MMU Type\t: Cheetah+\n");
350 else if (tlb_type == spitfire) 393 else if (tlb_type == spitfire)
351 seq_printf(m, "MMU Type\t: Spitfire\n"); 394 seq_printf(m, "MMU Type\t: Spitfire\n");
395 else if (tlb_type == hypervisor)
396 seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
352 else 397 else
353 seq_printf(m, "MMU Type\t: ???\n"); 398 seq_printf(m, "MMU Type\t: ???\n");
354 399
@@ -371,45 +416,13 @@ struct linux_prom_translation {
371/* Exported for kernel TLB miss handling in ktlb.S */ 416/* Exported for kernel TLB miss handling in ktlb.S */
372struct linux_prom_translation prom_trans[512] __read_mostly; 417struct linux_prom_translation prom_trans[512] __read_mostly;
373unsigned int prom_trans_ents __read_mostly; 418unsigned int prom_trans_ents __read_mostly;
374unsigned int swapper_pgd_zero __read_mostly;
375
376extern unsigned long prom_boot_page;
377extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
378extern int prom_get_mmu_ihandle(void);
379extern void register_prom_callbacks(void);
380 419
381/* Exported for SMP bootup purposes. */ 420/* Exported for SMP bootup purposes. */
382unsigned long kern_locked_tte_data; 421unsigned long kern_locked_tte_data;
383 422
384/*
385 * Translate PROM's mapping we capture at boot time into physical address.
386 * The second parameter is only set from prom_callback() invocations.
387 */
388unsigned long prom_virt_to_phys(unsigned long promva, int *error)
389{
390 int i;
391
392 for (i = 0; i < prom_trans_ents; i++) {
393 struct linux_prom_translation *p = &prom_trans[i];
394
395 if (promva >= p->virt &&
396 promva < (p->virt + p->size)) {
397 unsigned long base = p->data & _PAGE_PADDR;
398
399 if (error)
400 *error = 0;
401 return base + (promva & (8192 - 1));
402 }
403 }
404 if (error)
405 *error = 1;
406 return 0UL;
407}
408
409/* The obp translations are saved based on 8k pagesize, since obp can 423/* The obp translations are saved based on 8k pagesize, since obp can
410 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> 424 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
411 * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte 425 * HI_OBP_ADDRESS range are handled in ktlb.S.
412 * scheme (also, see rant in inherit_locked_prom_mappings()).
413 */ 426 */
414static inline int in_obp_range(unsigned long vaddr) 427static inline int in_obp_range(unsigned long vaddr)
415{ 428{
@@ -490,6 +503,36 @@ static void __init read_obp_translations(void)
490 } 503 }
491} 504}
492 505
506static void __init hypervisor_tlb_lock(unsigned long vaddr,
507 unsigned long pte,
508 unsigned long mmu)
509{
510 register unsigned long func asm("%o5");
511 register unsigned long arg0 asm("%o0");
512 register unsigned long arg1 asm("%o1");
513 register unsigned long arg2 asm("%o2");
514 register unsigned long arg3 asm("%o3");
515
516 func = HV_FAST_MMU_MAP_PERM_ADDR;
517 arg0 = vaddr;
518 arg1 = 0;
519 arg2 = pte;
520 arg3 = mmu;
521 __asm__ __volatile__("ta 0x80"
522 : "=&r" (func), "=&r" (arg0),
523 "=&r" (arg1), "=&r" (arg2),
524 "=&r" (arg3)
525 : "0" (func), "1" (arg0), "2" (arg1),
526 "3" (arg2), "4" (arg3));
527 if (arg0 != 0) {
528 prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
529 "errors with %lx\n", vaddr, 0, pte, mmu, arg0);
530 prom_halt();
531 }
532}
533
534static unsigned long kern_large_tte(unsigned long paddr);
535
493static void __init remap_kernel(void) 536static void __init remap_kernel(void)
494{ 537{
495 unsigned long phys_page, tte_vaddr, tte_data; 538 unsigned long phys_page, tte_vaddr, tte_data;
@@ -497,25 +540,34 @@ static void __init remap_kernel(void)
497 540
498 tte_vaddr = (unsigned long) KERNBASE; 541 tte_vaddr = (unsigned long) KERNBASE;
499 phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; 542 phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
500 tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB | 543 tte_data = kern_large_tte(phys_page);
501 _PAGE_CP | _PAGE_CV | _PAGE_P |
502 _PAGE_L | _PAGE_W));
503 544
504 kern_locked_tte_data = tte_data; 545 kern_locked_tte_data = tte_data;
505 546
506 /* Now lock us into the TLBs via OBP. */ 547 /* Now lock us into the TLBs via Hypervisor or OBP. */
507 prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); 548 if (tlb_type == hypervisor) {
508 prom_itlb_load(tlb_ent, tte_data, tte_vaddr); 549 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
509 if (bigkernel) { 550 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
510 tlb_ent -= 1; 551 if (bigkernel) {
511 prom_dtlb_load(tlb_ent, 552 tte_vaddr += 0x400000;
512 tte_data + 0x400000, 553 tte_data += 0x400000;
513 tte_vaddr + 0x400000); 554 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
514 prom_itlb_load(tlb_ent, 555 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
515 tte_data + 0x400000, 556 }
516 tte_vaddr + 0x400000); 557 } else {
558 prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
559 prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
560 if (bigkernel) {
561 tlb_ent -= 1;
562 prom_dtlb_load(tlb_ent,
563 tte_data + 0x400000,
564 tte_vaddr + 0x400000);
565 prom_itlb_load(tlb_ent,
566 tte_data + 0x400000,
567 tte_vaddr + 0x400000);
568 }
569 sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
517 } 570 }
518 sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
519 if (tlb_type == cheetah_plus) { 571 if (tlb_type == cheetah_plus) {
520 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | 572 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
521 CTX_CHEETAH_PLUS_NUC); 573 CTX_CHEETAH_PLUS_NUC);
@@ -533,372 +585,14 @@ static void __init inherit_prom_mappings(void)
533 prom_printf("Remapping the kernel... "); 585 prom_printf("Remapping the kernel... ");
534 remap_kernel(); 586 remap_kernel();
535 prom_printf("done.\n"); 587 prom_printf("done.\n");
536
537 prom_printf("Registering callbacks... ");
538 register_prom_callbacks();
539 prom_printf("done.\n");
540}
541
542/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
543 * upwards as reserved for use by the firmware (I wonder if this
544 * will be the same on Cheetah...). We use this virtual address
545 * range for the VPTE table mappings of the nucleus so we need
546 * to zap them when we enter the PROM. -DaveM
547 */
548static void __flush_nucleus_vptes(void)
549{
550 unsigned long prom_reserved_base = 0xfffffffc00000000UL;
551 int i;
552
553 /* Only DTLB must be checked for VPTE entries. */
554 if (tlb_type == spitfire) {
555 for (i = 0; i < 63; i++) {
556 unsigned long tag;
557
558 /* Spitfire Errata #32 workaround */
559 /* NOTE: Always runs on spitfire, so no cheetah+
560 * page size encodings.
561 */
562 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
563 "flush %%g6"
564 : /* No outputs */
565 : "r" (0),
566 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
567
568 tag = spitfire_get_dtlb_tag(i);
569 if (((tag & ~(PAGE_MASK)) == 0) &&
570 ((tag & (PAGE_MASK)) >= prom_reserved_base)) {
571 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
572 "membar #Sync"
573 : /* no outputs */
574 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
575 spitfire_put_dtlb_data(i, 0x0UL);
576 }
577 }
578 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
579 for (i = 0; i < 512; i++) {
580 unsigned long tag = cheetah_get_dtlb_tag(i, 2);
581
582 if ((tag & ~PAGE_MASK) == 0 &&
583 (tag & PAGE_MASK) >= prom_reserved_base) {
584 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
585 "membar #Sync"
586 : /* no outputs */
587 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
588 cheetah_put_dtlb_data(i, 0x0UL, 2);
589 }
590
591 if (tlb_type != cheetah_plus)
592 continue;
593
594 tag = cheetah_get_dtlb_tag(i, 3);
595
596 if ((tag & ~PAGE_MASK) == 0 &&
597 (tag & PAGE_MASK) >= prom_reserved_base) {
598 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
599 "membar #Sync"
600 : /* no outputs */
601 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
602 cheetah_put_dtlb_data(i, 0x0UL, 3);
603 }
604 }
605 } else {
606 /* Implement me :-) */
607 BUG();
608 }
609} 588}
610 589
611static int prom_ditlb_set;
612struct prom_tlb_entry {
613 int tlb_ent;
614 unsigned long tlb_tag;
615 unsigned long tlb_data;
616};
617struct prom_tlb_entry prom_itlb[16], prom_dtlb[16];
618
619void prom_world(int enter) 590void prom_world(int enter)
620{ 591{
621 unsigned long pstate;
622 int i;
623
624 if (!enter) 592 if (!enter)
625 set_fs((mm_segment_t) { get_thread_current_ds() }); 593 set_fs((mm_segment_t) { get_thread_current_ds() });
626 594
627 if (!prom_ditlb_set) 595 __asm__ __volatile__("flushw");
628 return;
629
630 /* Make sure the following runs atomically. */
631 __asm__ __volatile__("flushw\n\t"
632 "rdpr %%pstate, %0\n\t"
633 "wrpr %0, %1, %%pstate"
634 : "=r" (pstate)
635 : "i" (PSTATE_IE));
636
637 if (enter) {
638 /* Kick out nucleus VPTEs. */
639 __flush_nucleus_vptes();
640
641 /* Install PROM world. */
642 for (i = 0; i < 16; i++) {
643 if (prom_dtlb[i].tlb_ent != -1) {
644 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
645 "membar #Sync"
646 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
647 "i" (ASI_DMMU));
648 if (tlb_type == spitfire)
649 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
650 prom_dtlb[i].tlb_data);
651 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
652 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
653 prom_dtlb[i].tlb_data);
654 }
655 if (prom_itlb[i].tlb_ent != -1) {
656 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
657 "membar #Sync"
658 : : "r" (prom_itlb[i].tlb_tag),
659 "r" (TLB_TAG_ACCESS),
660 "i" (ASI_IMMU));
661 if (tlb_type == spitfire)
662 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
663 prom_itlb[i].tlb_data);
664 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
665 cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
666 prom_itlb[i].tlb_data);
667 }
668 }
669 } else {
670 for (i = 0; i < 16; i++) {
671 if (prom_dtlb[i].tlb_ent != -1) {
672 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
673 "membar #Sync"
674 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
675 if (tlb_type == spitfire)
676 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
677 else
678 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
679 }
680 if (prom_itlb[i].tlb_ent != -1) {
681 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
682 "membar #Sync"
683 : : "r" (TLB_TAG_ACCESS),
684 "i" (ASI_IMMU));
685 if (tlb_type == spitfire)
686 spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL);
687 else
688 cheetah_put_litlb_data(prom_itlb[i].tlb_ent, 0x0UL);
689 }
690 }
691 }
692 __asm__ __volatile__("wrpr %0, 0, %%pstate"
693 : : "r" (pstate));
694}
695
696void inherit_locked_prom_mappings(int save_p)
697{
698 int i;
699 int dtlb_seen = 0;
700 int itlb_seen = 0;
701
702 /* Fucking losing PROM has more mappings in the TLB, but
703 * it (conveniently) fails to mention any of these in the
704 * translations property. The only ones that matter are
705 * the locked PROM tlb entries, so we impose the following
706 * irrecovable rule on the PROM, it is allowed 8 locked
707 * entries in the ITLB and 8 in the DTLB.
708 *
709 * Supposedly the upper 16GB of the address space is
710 * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED
711 * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface
712 * used between the client program and the firmware on sun5
713 * systems to coordinate mmu mappings is also COMPLETELY
714 * UNDOCUMENTED!!!!!! Thanks S(t)un!
715 */
716 if (save_p) {
717 for (i = 0; i < 16; i++) {
718 prom_itlb[i].tlb_ent = -1;
719 prom_dtlb[i].tlb_ent = -1;
720 }
721 }
722 if (tlb_type == spitfire) {
723 int high = sparc64_highest_unlocked_tlb_ent;
724 for (i = 0; i <= high; i++) {
725 unsigned long data;
726
727 /* Spitfire Errata #32 workaround */
728 /* NOTE: Always runs on spitfire, so no cheetah+
729 * page size encodings.
730 */
731 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
732 "flush %%g6"
733 : /* No outputs */
734 : "r" (0),
735 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
736
737 data = spitfire_get_dtlb_data(i);
738 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
739 unsigned long tag;
740
741 /* Spitfire Errata #32 workaround */
742 /* NOTE: Always runs on spitfire, so no
743 * cheetah+ page size encodings.
744 */
745 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
746 "flush %%g6"
747 : /* No outputs */
748 : "r" (0),
749 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
750
751 tag = spitfire_get_dtlb_tag(i);
752 if (save_p) {
753 prom_dtlb[dtlb_seen].tlb_ent = i;
754 prom_dtlb[dtlb_seen].tlb_tag = tag;
755 prom_dtlb[dtlb_seen].tlb_data = data;
756 }
757 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
758 "membar #Sync"
759 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
760 spitfire_put_dtlb_data(i, 0x0UL);
761
762 dtlb_seen++;
763 if (dtlb_seen > 15)
764 break;
765 }
766 }
767
768 for (i = 0; i < high; i++) {
769 unsigned long data;
770
771 /* Spitfire Errata #32 workaround */
772 /* NOTE: Always runs on spitfire, so no
773 * cheetah+ page size encodings.
774 */
775 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
776 "flush %%g6"
777 : /* No outputs */
778 : "r" (0),
779 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
780
781 data = spitfire_get_itlb_data(i);
782 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
783 unsigned long tag;
784
785 /* Spitfire Errata #32 workaround */
786 /* NOTE: Always runs on spitfire, so no
787 * cheetah+ page size encodings.
788 */
789 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
790 "flush %%g6"
791 : /* No outputs */
792 : "r" (0),
793 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
794
795 tag = spitfire_get_itlb_tag(i);
796 if (save_p) {
797 prom_itlb[itlb_seen].tlb_ent = i;
798 prom_itlb[itlb_seen].tlb_tag = tag;
799 prom_itlb[itlb_seen].tlb_data = data;
800 }
801 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
802 "membar #Sync"
803 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
804 spitfire_put_itlb_data(i, 0x0UL);
805
806 itlb_seen++;
807 if (itlb_seen > 15)
808 break;
809 }
810 }
811 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
812 int high = sparc64_highest_unlocked_tlb_ent;
813
814 for (i = 0; i <= high; i++) {
815 unsigned long data;
816
817 data = cheetah_get_ldtlb_data(i);
818 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
819 unsigned long tag;
820
821 tag = cheetah_get_ldtlb_tag(i);
822 if (save_p) {
823 prom_dtlb[dtlb_seen].tlb_ent = i;
824 prom_dtlb[dtlb_seen].tlb_tag = tag;
825 prom_dtlb[dtlb_seen].tlb_data = data;
826 }
827 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
828 "membar #Sync"
829 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
830 cheetah_put_ldtlb_data(i, 0x0UL);
831
832 dtlb_seen++;
833 if (dtlb_seen > 15)
834 break;
835 }
836 }
837
838 for (i = 0; i < high; i++) {
839 unsigned long data;
840
841 data = cheetah_get_litlb_data(i);
842 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
843 unsigned long tag;
844
845 tag = cheetah_get_litlb_tag(i);
846 if (save_p) {
847 prom_itlb[itlb_seen].tlb_ent = i;
848 prom_itlb[itlb_seen].tlb_tag = tag;
849 prom_itlb[itlb_seen].tlb_data = data;
850 }
851 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
852 "membar #Sync"
853 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
854 cheetah_put_litlb_data(i, 0x0UL);
855
856 itlb_seen++;
857 if (itlb_seen > 15)
858 break;
859 }
860 }
861 } else {
862 /* Implement me :-) */
863 BUG();
864 }
865 if (save_p)
866 prom_ditlb_set = 1;
867}
868
869/* Give PROM back his world, done during reboots... */
870void prom_reload_locked(void)
871{
872 int i;
873
874 for (i = 0; i < 16; i++) {
875 if (prom_dtlb[i].tlb_ent != -1) {
876 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
877 "membar #Sync"
878 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
879 "i" (ASI_DMMU));
880 if (tlb_type == spitfire)
881 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
882 prom_dtlb[i].tlb_data);
883 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
884 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
885 prom_dtlb[i].tlb_data);
886 }
887
888 if (prom_itlb[i].tlb_ent != -1) {
889 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
890 "membar #Sync"
891 : : "r" (prom_itlb[i].tlb_tag),
892 "r" (TLB_TAG_ACCESS),
893 "i" (ASI_IMMU));
894 if (tlb_type == spitfire)
895 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
896 prom_itlb[i].tlb_data);
897 else
898 cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
899 prom_itlb[i].tlb_data);
900 }
901 }
902} 596}
903 597
904#ifdef DCACHE_ALIASING_POSSIBLE 598#ifdef DCACHE_ALIASING_POSSIBLE
@@ -914,7 +608,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
914 if (++n >= 512) 608 if (++n >= 512)
915 break; 609 break;
916 } 610 }
917 } else { 611 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
918 start = __pa(start); 612 start = __pa(start);
919 end = __pa(end); 613 end = __pa(end);
920 for (va = start; va < end; va += 32) 614 for (va = start; va < end; va += 32)
@@ -927,63 +621,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
927} 621}
928#endif /* DCACHE_ALIASING_POSSIBLE */ 622#endif /* DCACHE_ALIASING_POSSIBLE */
929 623
930/* If not locked, zap it. */
931void __flush_tlb_all(void)
932{
933 unsigned long pstate;
934 int i;
935
936 __asm__ __volatile__("flushw\n\t"
937 "rdpr %%pstate, %0\n\t"
938 "wrpr %0, %1, %%pstate"
939 : "=r" (pstate)
940 : "i" (PSTATE_IE));
941 if (tlb_type == spitfire) {
942 for (i = 0; i < 64; i++) {
943 /* Spitfire Errata #32 workaround */
944 /* NOTE: Always runs on spitfire, so no
945 * cheetah+ page size encodings.
946 */
947 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
948 "flush %%g6"
949 : /* No outputs */
950 : "r" (0),
951 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
952
953 if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) {
954 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
955 "membar #Sync"
956 : /* no outputs */
957 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
958 spitfire_put_dtlb_data(i, 0x0UL);
959 }
960
961 /* Spitfire Errata #32 workaround */
962 /* NOTE: Always runs on spitfire, so no
963 * cheetah+ page size encodings.
964 */
965 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
966 "flush %%g6"
967 : /* No outputs */
968 : "r" (0),
969 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
970
971 if (!(spitfire_get_itlb_data(i) & _PAGE_L)) {
972 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
973 "membar #Sync"
974 : /* no outputs */
975 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
976 spitfire_put_itlb_data(i, 0x0UL);
977 }
978 }
979 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
980 cheetah_flush_dtlb_all();
981 cheetah_flush_itlb_all();
982 }
983 __asm__ __volatile__("wrpr %0, 0, %%pstate"
984 : : "r" (pstate));
985}
986
987/* Caller does TLB context flushing on local CPU if necessary. 624/* Caller does TLB context flushing on local CPU if necessary.
988 * The caller also ensures that CTX_VALID(mm->context) is false. 625 * The caller also ensures that CTX_VALID(mm->context) is false.
989 * 626 *
@@ -991,17 +628,21 @@ void __flush_tlb_all(void)
991 * let the user have CTX 0 (nucleus) or we ever use a CTX 628 * let the user have CTX 0 (nucleus) or we ever use a CTX
992 * version of zero (and thus NO_CONTEXT would not be caught 629 * version of zero (and thus NO_CONTEXT would not be caught
993 * by version mis-match tests in mmu_context.h). 630 * by version mis-match tests in mmu_context.h).
631 *
632 * Always invoked with interrupts disabled.
994 */ 633 */
995void get_new_mmu_context(struct mm_struct *mm) 634void get_new_mmu_context(struct mm_struct *mm)
996{ 635{
997 unsigned long ctx, new_ctx; 636 unsigned long ctx, new_ctx;
998 unsigned long orig_pgsz_bits; 637 unsigned long orig_pgsz_bits;
999 638 unsigned long flags;
639 int new_version;
1000 640
1001 spin_lock(&ctx_alloc_lock); 641 spin_lock_irqsave(&ctx_alloc_lock, flags);
1002 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); 642 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
1003 ctx = (tlb_context_cache + 1) & CTX_NR_MASK; 643 ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
1004 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); 644 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
645 new_version = 0;
1005 if (new_ctx >= (1 << CTX_NR_BITS)) { 646 if (new_ctx >= (1 << CTX_NR_BITS)) {
1006 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); 647 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
1007 if (new_ctx >= ctx) { 648 if (new_ctx >= ctx) {
@@ -1024,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
1024 mmu_context_bmap[i + 2] = 0; 665 mmu_context_bmap[i + 2] = 0;
1025 mmu_context_bmap[i + 3] = 0; 666 mmu_context_bmap[i + 3] = 0;
1026 } 667 }
668 new_version = 1;
1027 goto out; 669 goto out;
1028 } 670 }
1029 } 671 }
@@ -1032,79 +674,10 @@ void get_new_mmu_context(struct mm_struct *mm)
1032out: 674out:
1033 tlb_context_cache = new_ctx; 675 tlb_context_cache = new_ctx;
1034 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; 676 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
1035 spin_unlock(&ctx_alloc_lock); 677 spin_unlock_irqrestore(&ctx_alloc_lock, flags);
1036}
1037
1038#ifndef CONFIG_SMP
1039struct pgtable_cache_struct pgt_quicklists;
1040#endif
1041
1042/* OK, we have to color these pages. The page tables are accessed
1043 * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
1044 * code, as well as by PAGE_OFFSET range direct-mapped addresses by
1045 * other parts of the kernel. By coloring, we make sure that the tlbmiss
1046 * fast handlers do not get data from old/garbage dcache lines that
1047 * correspond to an old/stale virtual address (user/kernel) that
1048 * previously mapped the pagetable page while accessing vpte range
1049 * addresses. The idea is that if the vpte color and PAGE_OFFSET range
1050 * color is the same, then when the kernel initializes the pagetable
1051 * using the later address range, accesses with the first address
1052 * range will see the newly initialized data rather than the garbage.
1053 */
1054#ifdef DCACHE_ALIASING_POSSIBLE
1055#define DC_ALIAS_SHIFT 1
1056#else
1057#define DC_ALIAS_SHIFT 0
1058#endif
1059pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
1060{
1061 struct page *page;
1062 unsigned long color;
1063
1064 {
1065 pte_t *ptep = pte_alloc_one_fast(mm, address);
1066
1067 if (ptep)
1068 return ptep;
1069 }
1070 678
1071 color = VPTE_COLOR(address); 679 if (unlikely(new_version))
1072 page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, DC_ALIAS_SHIFT); 680 smp_new_mmu_context_version();
1073 if (page) {
1074 unsigned long *to_free;
1075 unsigned long paddr;
1076 pte_t *pte;
1077
1078#ifdef DCACHE_ALIASING_POSSIBLE
1079 set_page_count(page, 1);
1080 ClearPageCompound(page);
1081
1082 set_page_count((page + 1), 1);
1083 ClearPageCompound(page + 1);
1084#endif
1085 paddr = (unsigned long) page_address(page);
1086 memset((char *)paddr, 0, (PAGE_SIZE << DC_ALIAS_SHIFT));
1087
1088 if (!color) {
1089 pte = (pte_t *) paddr;
1090 to_free = (unsigned long *) (paddr + PAGE_SIZE);
1091 } else {
1092 pte = (pte_t *) (paddr + PAGE_SIZE);
1093 to_free = (unsigned long *) paddr;
1094 }
1095
1096#ifdef DCACHE_ALIASING_POSSIBLE
1097 /* Now free the other one up, adjust cache size. */
1098 preempt_disable();
1099 *to_free = (unsigned long) pte_quicklist[color ^ 0x1];
1100 pte_quicklist[color ^ 0x1] = to_free;
1101 pgtable_cache_size++;
1102 preempt_enable();
1103#endif
1104
1105 return pte;
1106 }
1107 return NULL;
1108} 681}
1109 682
1110void sparc_ultra_dump_itlb(void) 683void sparc_ultra_dump_itlb(void)
@@ -1196,9 +769,78 @@ void sparc_ultra_dump_dtlb(void)
1196 769
1197extern unsigned long cmdline_memory_size; 770extern unsigned long cmdline_memory_size;
1198 771
1199unsigned long __init bootmem_init(unsigned long *pages_avail) 772/* Find a free area for the bootmem map, avoiding the kernel image
773 * and the initial ramdisk.
774 */
775static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
776 unsigned long end_pfn)
777{
778 unsigned long avoid_start, avoid_end, bootmap_size;
779 int i;
780
781 bootmap_size = ((end_pfn - start_pfn) + 7) / 8;
782 bootmap_size = ALIGN(bootmap_size, sizeof(long));
783
784 avoid_start = avoid_end = 0;
785#ifdef CONFIG_BLK_DEV_INITRD
786 avoid_start = initrd_start;
787 avoid_end = PAGE_ALIGN(initrd_end);
788#endif
789
790#ifdef CONFIG_DEBUG_BOOTMEM
791 prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n",
792 kern_base, PAGE_ALIGN(kern_base + kern_size),
793 avoid_start, avoid_end);
794#endif
795 for (i = 0; i < pavail_ents; i++) {
796 unsigned long start, end;
797
798 start = pavail[i].phys_addr;
799 end = start + pavail[i].reg_size;
800
801 while (start < end) {
802 if (start >= kern_base &&
803 start < PAGE_ALIGN(kern_base + kern_size)) {
804 start = PAGE_ALIGN(kern_base + kern_size);
805 continue;
806 }
807 if (start >= avoid_start && start < avoid_end) {
808 start = avoid_end;
809 continue;
810 }
811
812 if ((end - start) < bootmap_size)
813 break;
814
815 if (start < kern_base &&
816 (start + bootmap_size) > kern_base) {
817 start = PAGE_ALIGN(kern_base + kern_size);
818 continue;
819 }
820
821 if (start < avoid_start &&
822 (start + bootmap_size) > avoid_start) {
823 start = avoid_end;
824 continue;
825 }
826
827 /* OK, it doesn't overlap anything, use it. */
828#ifdef CONFIG_DEBUG_BOOTMEM
829 prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n",
830 start >> PAGE_SHIFT, start);
831#endif
832 return start >> PAGE_SHIFT;
833 }
834 }
835
836 prom_printf("Cannot find free area for bootmap, aborting.\n");
837 prom_halt();
838}
839
840static unsigned long __init bootmem_init(unsigned long *pages_avail,
841 unsigned long phys_base)
1200{ 842{
1201 unsigned long bootmap_size, start_pfn, end_pfn; 843 unsigned long bootmap_size, end_pfn;
1202 unsigned long end_of_phys_memory = 0UL; 844 unsigned long end_of_phys_memory = 0UL;
1203 unsigned long bootmap_pfn, bytes_avail, size; 845 unsigned long bootmap_pfn, bytes_avail, size;
1204 int i; 846 int i;
@@ -1236,14 +878,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1236 878
1237 *pages_avail = bytes_avail >> PAGE_SHIFT; 879 *pages_avail = bytes_avail >> PAGE_SHIFT;
1238 880
1239 /* Start with page aligned address of last symbol in kernel
1240 * image. The kernel is hard mapped below PAGE_OFFSET in a
1241 * 4MB locked TLB translation.
1242 */
1243 start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT;
1244
1245 bootmap_pfn = start_pfn;
1246
1247 end_pfn = end_of_phys_memory >> PAGE_SHIFT; 881 end_pfn = end_of_phys_memory >> PAGE_SHIFT;
1248 882
1249#ifdef CONFIG_BLK_DEV_INITRD 883#ifdef CONFIG_BLK_DEV_INITRD
@@ -1260,23 +894,22 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1260 "(0x%016lx > 0x%016lx)\ndisabling initrd\n", 894 "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
1261 initrd_end, end_of_phys_memory); 895 initrd_end, end_of_phys_memory);
1262 initrd_start = 0; 896 initrd_start = 0;
1263 } 897 initrd_end = 0;
1264 if (initrd_start) {
1265 if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
1266 initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
1267 bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
1268 } 898 }
1269 } 899 }
1270#endif 900#endif
1271 /* Initialize the boot-time allocator. */ 901 /* Initialize the boot-time allocator. */
1272 max_pfn = max_low_pfn = end_pfn; 902 max_pfn = max_low_pfn = end_pfn;
1273 min_low_pfn = pfn_base; 903 min_low_pfn = (phys_base >> PAGE_SHIFT);
904
905 bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
1274 906
1275#ifdef CONFIG_DEBUG_BOOTMEM 907#ifdef CONFIG_DEBUG_BOOTMEM
1276 prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n", 908 prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n",
1277 min_low_pfn, bootmap_pfn, max_low_pfn); 909 min_low_pfn, bootmap_pfn, max_low_pfn);
1278#endif 910#endif
1279 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn); 911 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
912 min_low_pfn, end_pfn);
1280 913
1281 /* Now register the available physical memory with the 914 /* Now register the available physical memory with the
1282 * allocator. 915 * allocator.
@@ -1324,9 +957,26 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1324 reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); 957 reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
1325 *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; 958 *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
1326 959
960 for (i = 0; i < pavail_ents; i++) {
961 unsigned long start_pfn, end_pfn;
962
963 start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
964 end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
965#ifdef CONFIG_DEBUG_BOOTMEM
966 prom_printf("memory_present(0, %lx, %lx)\n",
967 start_pfn, end_pfn);
968#endif
969 memory_present(0, start_pfn, end_pfn);
970 }
971
972 sparse_init();
973
1327 return end_pfn; 974 return end_pfn;
1328} 975}
1329 976
977static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
978static int pall_ents __initdata;
979
1330#ifdef CONFIG_DEBUG_PAGEALLOC 980#ifdef CONFIG_DEBUG_PAGEALLOC
1331static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot) 981static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
1332{ 982{
@@ -1382,14 +1032,44 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend,
1382 return alloc_bytes; 1032 return alloc_bytes;
1383} 1033}
1384 1034
1385static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
1386static int pall_ents __initdata;
1387
1388extern unsigned int kvmap_linear_patch[1]; 1035extern unsigned int kvmap_linear_patch[1];
1036#endif /* CONFIG_DEBUG_PAGEALLOC */
1037
1038static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
1039{
1040 const unsigned long shift_256MB = 28;
1041 const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
1042 const unsigned long size_256MB = (1UL << shift_256MB);
1043
1044 while (start < end) {
1045 long remains;
1046
1047 remains = end - start;
1048 if (remains < size_256MB)
1049 break;
1050
1051 if (start & mask_256MB) {
1052 start = (start + size_256MB) & ~mask_256MB;
1053 continue;
1054 }
1055
1056 while (remains >= size_256MB) {
1057 unsigned long index = start >> shift_256MB;
1058
1059 __set_bit(index, kpte_linear_bitmap);
1060
1061 start += size_256MB;
1062 remains -= size_256MB;
1063 }
1064 }
1065}
1389 1066
1390static void __init kernel_physical_mapping_init(void) 1067static void __init kernel_physical_mapping_init(void)
1391{ 1068{
1392 unsigned long i, mem_alloced = 0UL; 1069 unsigned long i;
1070#ifdef CONFIG_DEBUG_PAGEALLOC
1071 unsigned long mem_alloced = 0UL;
1072#endif
1393 1073
1394 read_obp_memory("reg", &pall[0], &pall_ents); 1074 read_obp_memory("reg", &pall[0], &pall_ents);
1395 1075
@@ -1398,10 +1078,16 @@ static void __init kernel_physical_mapping_init(void)
1398 1078
1399 phys_start = pall[i].phys_addr; 1079 phys_start = pall[i].phys_addr;
1400 phys_end = phys_start + pall[i].reg_size; 1080 phys_end = phys_start + pall[i].reg_size;
1081
1082 mark_kpte_bitmap(phys_start, phys_end);
1083
1084#ifdef CONFIG_DEBUG_PAGEALLOC
1401 mem_alloced += kernel_map_range(phys_start, phys_end, 1085 mem_alloced += kernel_map_range(phys_start, phys_end,
1402 PAGE_KERNEL); 1086 PAGE_KERNEL);
1087#endif
1403 } 1088 }
1404 1089
1090#ifdef CONFIG_DEBUG_PAGEALLOC
1405 printk("Allocated %ld bytes for kernel page tables.\n", 1091 printk("Allocated %ld bytes for kernel page tables.\n",
1406 mem_alloced); 1092 mem_alloced);
1407 1093
@@ -1409,8 +1095,10 @@ static void __init kernel_physical_mapping_init(void)
1409 flushi(&kvmap_linear_patch[0]); 1095 flushi(&kvmap_linear_patch[0]);
1410 1096
1411 __flush_tlb_all(); 1097 __flush_tlb_all();
1098#endif
1412} 1099}
1413 1100
1101#ifdef CONFIG_DEBUG_PAGEALLOC
1414void kernel_map_pages(struct page *page, int numpages, int enable) 1102void kernel_map_pages(struct page *page, int numpages, int enable)
1415{ 1103{
1416 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; 1104 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
@@ -1419,6 +1107,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1419 kernel_map_range(phys_start, phys_end, 1107 kernel_map_range(phys_start, phys_end,
1420 (enable ? PAGE_KERNEL : __pgprot(0))); 1108 (enable ? PAGE_KERNEL : __pgprot(0)));
1421 1109
1110 flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
1111 PAGE_OFFSET + phys_end);
1112
1422 /* we should perform an IPI and flush all tlbs, 1113 /* we should perform an IPI and flush all tlbs,
1423 * but that can deadlock->flush only current cpu. 1114 * but that can deadlock->flush only current cpu.
1424 */ 1115 */
@@ -1439,18 +1130,150 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
1439 return ~0UL; 1130 return ~0UL;
1440} 1131}
1441 1132
1133static void __init tsb_phys_patch(void)
1134{
1135 struct tsb_ldquad_phys_patch_entry *pquad;
1136 struct tsb_phys_patch_entry *p;
1137
1138 pquad = &__tsb_ldquad_phys_patch;
1139 while (pquad < &__tsb_ldquad_phys_patch_end) {
1140 unsigned long addr = pquad->addr;
1141
1142 if (tlb_type == hypervisor)
1143 *(unsigned int *) addr = pquad->sun4v_insn;
1144 else
1145 *(unsigned int *) addr = pquad->sun4u_insn;
1146 wmb();
1147 __asm__ __volatile__("flush %0"
1148 : /* no outputs */
1149 : "r" (addr));
1150
1151 pquad++;
1152 }
1153
1154 p = &__tsb_phys_patch;
1155 while (p < &__tsb_phys_patch_end) {
1156 unsigned long addr = p->addr;
1157
1158 *(unsigned int *) addr = p->insn;
1159 wmb();
1160 __asm__ __volatile__("flush %0"
1161 : /* no outputs */
1162 : "r" (addr));
1163
1164 p++;
1165 }
1166}
1167
1168/* Don't mark as init, we give this to the Hypervisor. */
1169static struct hv_tsb_descr ktsb_descr[2];
1170extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
1171
1172static void __init sun4v_ktsb_init(void)
1173{
1174 unsigned long ktsb_pa;
1175
1176 /* First KTSB for PAGE_SIZE mappings. */
1177 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
1178
1179 switch (PAGE_SIZE) {
1180 case 8 * 1024:
1181 default:
1182 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
1183 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
1184 break;
1185
1186 case 64 * 1024:
1187 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
1188 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
1189 break;
1190
1191 case 512 * 1024:
1192 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
1193 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
1194 break;
1195
1196 case 4 * 1024 * 1024:
1197 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
1198 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
1199 break;
1200 };
1201
1202 ktsb_descr[0].assoc = 1;
1203 ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
1204 ktsb_descr[0].ctx_idx = 0;
1205 ktsb_descr[0].tsb_base = ktsb_pa;
1206 ktsb_descr[0].resv = 0;
1207
1208 /* Second KTSB for 4MB/256MB mappings. */
1209 ktsb_pa = (kern_base +
1210 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1211
1212 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
1213 ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
1214 HV_PGSZ_MASK_256MB);
1215 ktsb_descr[1].assoc = 1;
1216 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
1217 ktsb_descr[1].ctx_idx = 0;
1218 ktsb_descr[1].tsb_base = ktsb_pa;
1219 ktsb_descr[1].resv = 0;
1220}
1221
1222void __cpuinit sun4v_ktsb_register(void)
1223{
1224 register unsigned long func asm("%o5");
1225 register unsigned long arg0 asm("%o0");
1226 register unsigned long arg1 asm("%o1");
1227 unsigned long pa;
1228
1229 pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
1230
1231 func = HV_FAST_MMU_TSB_CTX0;
1232 arg0 = 2;
1233 arg1 = pa;
1234 __asm__ __volatile__("ta %6"
1235 : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
1236 : "0" (func), "1" (arg0), "2" (arg1),
1237 "i" (HV_FAST_TRAP));
1238}
1239
1442/* paging_init() sets up the page tables */ 1240/* paging_init() sets up the page tables */
1443 1241
1444extern void cheetah_ecache_flush_init(void); 1242extern void cheetah_ecache_flush_init(void);
1243extern void sun4v_patch_tlb_handlers(void);
1445 1244
1446static unsigned long last_valid_pfn; 1245static unsigned long last_valid_pfn;
1447pgd_t swapper_pg_dir[2048]; 1246pgd_t swapper_pg_dir[2048];
1448 1247
1248static void sun4u_pgprot_init(void);
1249static void sun4v_pgprot_init(void);
1250
1449void __init paging_init(void) 1251void __init paging_init(void)
1450{ 1252{
1451 unsigned long end_pfn, pages_avail, shift; 1253 unsigned long end_pfn, pages_avail, shift, phys_base;
1452 unsigned long real_end, i; 1254 unsigned long real_end, i;
1453 1255
1256 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
1257 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1258
1259 /* Invalidate both kernel TSBs. */
1260 memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
1261 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
1262
1263 if (tlb_type == hypervisor)
1264 sun4v_pgprot_init();
1265 else
1266 sun4u_pgprot_init();
1267
1268 if (tlb_type == cheetah_plus ||
1269 tlb_type == hypervisor)
1270 tsb_phys_patch();
1271
1272 if (tlb_type == hypervisor) {
1273 sun4v_patch_tlb_handlers();
1274 sun4v_ktsb_init();
1275 }
1276
1454 /* Find available physical memory... */ 1277 /* Find available physical memory... */
1455 read_obp_memory("available", &pavail[0], &pavail_ents); 1278 read_obp_memory("available", &pavail[0], &pavail_ents);
1456 1279
@@ -1458,11 +1281,6 @@ void __init paging_init(void)
1458 for (i = 0; i < pavail_ents; i++) 1281 for (i = 0; i < pavail_ents; i++)
1459 phys_base = min(phys_base, pavail[i].phys_addr); 1282 phys_base = min(phys_base, pavail[i].phys_addr);
1460 1283
1461 pfn_base = phys_base >> PAGE_SHIFT;
1462
1463 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
1464 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1465
1466 set_bit(0, mmu_context_bmap); 1284 set_bit(0, mmu_context_bmap);
1467 1285
1468 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); 1286 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
@@ -1486,47 +1304,38 @@ void __init paging_init(void)
1486 pud_set(pud_offset(&swapper_pg_dir[0], 0), 1304 pud_set(pud_offset(&swapper_pg_dir[0], 0),
1487 swapper_low_pmd_dir + (shift / sizeof(pgd_t))); 1305 swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
1488 1306
1489 swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
1490
1491 inherit_prom_mappings(); 1307 inherit_prom_mappings();
1492 1308
1493 /* Ok, we can use our TLB miss and window trap handlers safely. 1309 /* Ok, we can use our TLB miss and window trap handlers safely. */
1494 * We need to do a quick peek here to see if we are on StarFire 1310 setup_tba();
1495 * or not, so setup_tba can setup the IRQ globals correctly (it
1496 * needs to get the hard smp processor id correctly).
1497 */
1498 {
1499 extern void setup_tba(int);
1500 setup_tba(this_is_starfire);
1501 }
1502
1503 inherit_locked_prom_mappings(1);
1504 1311
1505 __flush_tlb_all(); 1312 __flush_tlb_all();
1506 1313
1314 if (tlb_type == hypervisor)
1315 sun4v_ktsb_register();
1316
1507 /* Setup bootmem... */ 1317 /* Setup bootmem... */
1508 pages_avail = 0; 1318 pages_avail = 0;
1509 last_valid_pfn = end_pfn = bootmem_init(&pages_avail); 1319 last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
1320
1321 max_mapnr = last_valid_pfn;
1510 1322
1511#ifdef CONFIG_DEBUG_PAGEALLOC
1512 kernel_physical_mapping_init(); 1323 kernel_physical_mapping_init();
1513#endif
1514 1324
1515 { 1325 {
1516 unsigned long zones_size[MAX_NR_ZONES]; 1326 unsigned long zones_size[MAX_NR_ZONES];
1517 unsigned long zholes_size[MAX_NR_ZONES]; 1327 unsigned long zholes_size[MAX_NR_ZONES];
1518 unsigned long npages;
1519 int znum; 1328 int znum;
1520 1329
1521 for (znum = 0; znum < MAX_NR_ZONES; znum++) 1330 for (znum = 0; znum < MAX_NR_ZONES; znum++)
1522 zones_size[znum] = zholes_size[znum] = 0; 1331 zones_size[znum] = zholes_size[znum] = 0;
1523 1332
1524 npages = end_pfn - pfn_base; 1333 zones_size[ZONE_DMA] = end_pfn;
1525 zones_size[ZONE_DMA] = npages; 1334 zholes_size[ZONE_DMA] = end_pfn - pages_avail;
1526 zholes_size[ZONE_DMA] = npages - pages_avail;
1527 1335
1528 free_area_init_node(0, &contig_page_data, zones_size, 1336 free_area_init_node(0, &contig_page_data, zones_size,
1529 phys_base >> PAGE_SHIFT, zholes_size); 1337 __pa(PAGE_OFFSET) >> PAGE_SHIFT,
1338 zholes_size);
1530 } 1339 }
1531 1340
1532 device_scan(); 1341 device_scan();
@@ -1596,7 +1405,6 @@ void __init mem_init(void)
1596 1405
1597 taint_real_pages(); 1406 taint_real_pages();
1598 1407
1599 max_mapnr = last_valid_pfn - pfn_base;
1600 high_memory = __va(last_valid_pfn << PAGE_SHIFT); 1408 high_memory = __va(last_valid_pfn << PAGE_SHIFT);
1601 1409
1602#ifdef CONFIG_DEBUG_BOOTMEM 1410#ifdef CONFIG_DEBUG_BOOTMEM
@@ -1676,3 +1484,342 @@ void free_initrd_mem(unsigned long start, unsigned long end)
1676 } 1484 }
1677} 1485}
1678#endif 1486#endif
1487
1488#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
1489#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
1490#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
1491#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
1492#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
1493#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
1494
1495pgprot_t PAGE_KERNEL __read_mostly;
1496EXPORT_SYMBOL(PAGE_KERNEL);
1497
1498pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
1499pgprot_t PAGE_COPY __read_mostly;
1500
1501pgprot_t PAGE_SHARED __read_mostly;
1502EXPORT_SYMBOL(PAGE_SHARED);
1503
1504pgprot_t PAGE_EXEC __read_mostly;
1505unsigned long pg_iobits __read_mostly;
1506
1507unsigned long _PAGE_IE __read_mostly;
1508
1509unsigned long _PAGE_E __read_mostly;
1510EXPORT_SYMBOL(_PAGE_E);
1511
1512unsigned long _PAGE_CACHE __read_mostly;
1513EXPORT_SYMBOL(_PAGE_CACHE);
1514
1515static void prot_init_common(unsigned long page_none,
1516 unsigned long page_shared,
1517 unsigned long page_copy,
1518 unsigned long page_readonly,
1519 unsigned long page_exec_bit)
1520{
1521 PAGE_COPY = __pgprot(page_copy);
1522 PAGE_SHARED = __pgprot(page_shared);
1523
1524 protection_map[0x0] = __pgprot(page_none);
1525 protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
1526 protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
1527 protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
1528 protection_map[0x4] = __pgprot(page_readonly);
1529 protection_map[0x5] = __pgprot(page_readonly);
1530 protection_map[0x6] = __pgprot(page_copy);
1531 protection_map[0x7] = __pgprot(page_copy);
1532 protection_map[0x8] = __pgprot(page_none);
1533 protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
1534 protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
1535 protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
1536 protection_map[0xc] = __pgprot(page_readonly);
1537 protection_map[0xd] = __pgprot(page_readonly);
1538 protection_map[0xe] = __pgprot(page_shared);
1539 protection_map[0xf] = __pgprot(page_shared);
1540}
1541
1542static void __init sun4u_pgprot_init(void)
1543{
1544 unsigned long page_none, page_shared, page_copy, page_readonly;
1545 unsigned long page_exec_bit;
1546
1547 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
1548 _PAGE_CACHE_4U | _PAGE_P_4U |
1549 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
1550 _PAGE_EXEC_4U);
1551 PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
1552 _PAGE_CACHE_4U | _PAGE_P_4U |
1553 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
1554 _PAGE_EXEC_4U | _PAGE_L_4U);
1555 PAGE_EXEC = __pgprot(_PAGE_EXEC_4U);
1556
1557 _PAGE_IE = _PAGE_IE_4U;
1558 _PAGE_E = _PAGE_E_4U;
1559 _PAGE_CACHE = _PAGE_CACHE_4U;
1560
1561 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
1562 __ACCESS_BITS_4U | _PAGE_E_4U);
1563
1564 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
1565 0xfffff80000000000;
1566 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
1567 _PAGE_P_4U | _PAGE_W_4U);
1568
1569 /* XXX Should use 256MB on Panther. XXX */
1570 kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
1571
1572 _PAGE_SZBITS = _PAGE_SZBITS_4U;
1573 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
1574 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
1575 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
1576
1577
1578 page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
1579 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1580 __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
1581 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1582 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
1583 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1584 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
1585
1586 page_exec_bit = _PAGE_EXEC_4U;
1587
1588 prot_init_common(page_none, page_shared, page_copy, page_readonly,
1589 page_exec_bit);
1590}
1591
1592static void __init sun4v_pgprot_init(void)
1593{
1594 unsigned long page_none, page_shared, page_copy, page_readonly;
1595 unsigned long page_exec_bit;
1596
1597 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
1598 _PAGE_CACHE_4V | _PAGE_P_4V |
1599 __ACCESS_BITS_4V | __DIRTY_BITS_4V |
1600 _PAGE_EXEC_4V);
1601 PAGE_KERNEL_LOCKED = PAGE_KERNEL;
1602 PAGE_EXEC = __pgprot(_PAGE_EXEC_4V);
1603
1604 _PAGE_IE = _PAGE_IE_4V;
1605 _PAGE_E = _PAGE_E_4V;
1606 _PAGE_CACHE = _PAGE_CACHE_4V;
1607
1608 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
1609 0xfffff80000000000;
1610 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1611 _PAGE_P_4V | _PAGE_W_4V);
1612
1613 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
1614 0xfffff80000000000;
1615 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1616 _PAGE_P_4V | _PAGE_W_4V);
1617
1618 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
1619 __ACCESS_BITS_4V | _PAGE_E_4V);
1620
1621 _PAGE_SZBITS = _PAGE_SZBITS_4V;
1622 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
1623 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
1624 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
1625 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
1626
1627 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
1628 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1629 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
1630 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1631 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
1632 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1633 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
1634
1635 page_exec_bit = _PAGE_EXEC_4V;
1636
1637 prot_init_common(page_none, page_shared, page_copy, page_readonly,
1638 page_exec_bit);
1639}
1640
1641unsigned long pte_sz_bits(unsigned long sz)
1642{
1643 if (tlb_type == hypervisor) {
1644 switch (sz) {
1645 case 8 * 1024:
1646 default:
1647 return _PAGE_SZ8K_4V;
1648 case 64 * 1024:
1649 return _PAGE_SZ64K_4V;
1650 case 512 * 1024:
1651 return _PAGE_SZ512K_4V;
1652 case 4 * 1024 * 1024:
1653 return _PAGE_SZ4MB_4V;
1654 };
1655 } else {
1656 switch (sz) {
1657 case 8 * 1024:
1658 default:
1659 return _PAGE_SZ8K_4U;
1660 case 64 * 1024:
1661 return _PAGE_SZ64K_4U;
1662 case 512 * 1024:
1663 return _PAGE_SZ512K_4U;
1664 case 4 * 1024 * 1024:
1665 return _PAGE_SZ4MB_4U;
1666 };
1667 }
1668}
1669
1670pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
1671{
1672 pte_t pte;
1673
1674 pte_val(pte) = page | pgprot_val(pgprot_noncached(prot));
1675 pte_val(pte) |= (((unsigned long)space) << 32);
1676 pte_val(pte) |= pte_sz_bits(page_size);
1677
1678 return pte;
1679}
1680
1681static unsigned long kern_large_tte(unsigned long paddr)
1682{
1683 unsigned long val;
1684
1685 val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
1686 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
1687 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
1688 if (tlb_type == hypervisor)
1689 val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
1690 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
1691 _PAGE_EXEC_4V | _PAGE_W_4V);
1692
1693 return val | paddr;
1694}
1695
1696/*
1697 * Translate PROM's mapping we capture at boot time into physical address.
1698 * The second parameter is only set from prom_callback() invocations.
1699 */
1700unsigned long prom_virt_to_phys(unsigned long promva, int *error)
1701{
1702 unsigned long mask;
1703 int i;
1704
1705 mask = _PAGE_PADDR_4U;
1706 if (tlb_type == hypervisor)
1707 mask = _PAGE_PADDR_4V;
1708
1709 for (i = 0; i < prom_trans_ents; i++) {
1710 struct linux_prom_translation *p = &prom_trans[i];
1711
1712 if (promva >= p->virt &&
1713 promva < (p->virt + p->size)) {
1714 unsigned long base = p->data & mask;
1715
1716 if (error)
1717 *error = 0;
1718 return base + (promva & (8192 - 1));
1719 }
1720 }
1721 if (error)
1722 *error = 1;
1723 return 0UL;
1724}
1725
1726/* XXX We should kill off this ugly thing at so me point. XXX */
1727unsigned long sun4u_get_pte(unsigned long addr)
1728{
1729 pgd_t *pgdp;
1730 pud_t *pudp;
1731 pmd_t *pmdp;
1732 pte_t *ptep;
1733 unsigned long mask = _PAGE_PADDR_4U;
1734
1735 if (tlb_type == hypervisor)
1736 mask = _PAGE_PADDR_4V;
1737
1738 if (addr >= PAGE_OFFSET)
1739 return addr & mask;
1740
1741 if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS))
1742 return prom_virt_to_phys(addr, NULL);
1743
1744 pgdp = pgd_offset_k(addr);
1745 pudp = pud_offset(pgdp, addr);
1746 pmdp = pmd_offset(pudp, addr);
1747 ptep = pte_offset_kernel(pmdp, addr);
1748
1749 return pte_val(*ptep) & mask;
1750}
1751
1752/* If not locked, zap it. */
1753void __flush_tlb_all(void)
1754{
1755 unsigned long pstate;
1756 int i;
1757
1758 __asm__ __volatile__("flushw\n\t"
1759 "rdpr %%pstate, %0\n\t"
1760 "wrpr %0, %1, %%pstate"
1761 : "=r" (pstate)
1762 : "i" (PSTATE_IE));
1763 if (tlb_type == spitfire) {
1764 for (i = 0; i < 64; i++) {
1765 /* Spitfire Errata #32 workaround */
1766 /* NOTE: Always runs on spitfire, so no
1767 * cheetah+ page size encodings.
1768 */
1769 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
1770 "flush %%g6"
1771 : /* No outputs */
1772 : "r" (0),
1773 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
1774
1775 if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
1776 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
1777 "membar #Sync"
1778 : /* no outputs */
1779 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
1780 spitfire_put_dtlb_data(i, 0x0UL);
1781 }
1782
1783 /* Spitfire Errata #32 workaround */
1784 /* NOTE: Always runs on spitfire, so no
1785 * cheetah+ page size encodings.
1786 */
1787 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
1788 "flush %%g6"
1789 : /* No outputs */
1790 : "r" (0),
1791 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
1792
1793 if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
1794 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
1795 "membar #Sync"
1796 : /* no outputs */
1797 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
1798 spitfire_put_itlb_data(i, 0x0UL);
1799 }
1800 }
1801 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1802 cheetah_flush_dtlb_all();
1803 cheetah_flush_itlb_all();
1804 }
1805 __asm__ __volatile__("wrpr %0, 0, %%pstate"
1806 : : "r" (pstate));
1807}
1808
1809#ifdef CONFIG_MEMORY_HOTPLUG
1810
1811void online_page(struct page *page)
1812{
1813 ClearPageReserved(page);
1814 set_page_count(page, 0);
1815 free_cold_page(page);
1816 totalram_pages++;
1817 num_physpages++;
1818}
1819
1820int remove_memory(u64 start, u64 size)
1821{
1822 return -EINVAL;
1823}
1824
1825#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662b..a079cf42505e 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
25 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); 25 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
26 26
27 if (mp->tlb_nr) { 27 if (mp->tlb_nr) {
28 flush_tsb_user(mp);
29
28 if (CTX_VALID(mp->mm->context)) { 30 if (CTX_VALID(mp->mm->context)) {
29#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
30 smp_flush_tlb_pending(mp->mm, mp->tlb_nr, 32 smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -47,7 +49,8 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
47 if (pte_exec(orig)) 49 if (pte_exec(orig))
48 vaddr |= 0x1UL; 50 vaddr |= 0x1UL;
49 51
50 if (pte_dirty(orig)) { 52 if (tlb_type != hypervisor &&
53 pte_dirty(orig)) {
51 unsigned long paddr, pfn = pte_pfn(orig); 54 unsigned long paddr, pfn = pte_pfn(orig);
52 struct address_space *mapping; 55 struct address_space *mapping;
53 struct page *page; 56 struct page *page;
@@ -89,62 +92,3 @@ no_cache_flush:
89 if (nr >= TLB_BATCH_NR) 92 if (nr >= TLB_BATCH_NR)
90 flush_tlb_pending(); 93 flush_tlb_pending();
91} 94}
92
93void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
94{
95 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
96 unsigned long nr = mp->tlb_nr;
97 long s = start, e = end, vpte_base;
98
99 if (mp->fullmm)
100 return;
101
102 /* If start is greater than end, that is a real problem. */
103 BUG_ON(start > end);
104
105 /* However, straddling the VA space hole is quite normal. */
106 s &= PMD_MASK;
107 e = (e + PMD_SIZE - 1) & PMD_MASK;
108
109 vpte_base = (tlb_type == spitfire ?
110 VPTE_BASE_SPITFIRE :
111 VPTE_BASE_CHEETAH);
112
113 if (unlikely(nr != 0 && mm != mp->mm)) {
114 flush_tlb_pending();
115 nr = 0;
116 }
117
118 if (nr == 0)
119 mp->mm = mm;
120
121 start = vpte_base + (s >> (PAGE_SHIFT - 3));
122 end = vpte_base + (e >> (PAGE_SHIFT - 3));
123
124 /* If the request straddles the VA space hole, we
125 * need to swap start and end. The reason this
126 * occurs is that "vpte_base" is the center of
127 * the linear page table mapping area. Thus,
128 * high addresses with the sign bit set map to
129 * addresses below vpte_base and non-sign bit
130 * addresses map to addresses above vpte_base.
131 */
132 if (end < start) {
133 unsigned long tmp = start;
134
135 start = end;
136 end = tmp;
137 }
138
139 while (start < end) {
140 mp->vaddrs[nr] = start;
141 mp->tlb_nr = ++nr;
142 if (nr >= TLB_BATCH_NR) {
143 flush_tlb_pending();
144 nr = 0;
145 }
146 start += PAGE_SIZE;
147 }
148 if (nr)
149 flush_tlb_pending();
150}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..b2064e2a44d6
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,440 @@
1/* arch/sparc64/mm/tsb.c
2 *
3 * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <asm/system.h>
8#include <asm/page.h>
9#include <asm/tlbflush.h>
10#include <asm/tlb.h>
11#include <asm/mmu_context.h>
12#include <asm/pgtable.h>
13#include <asm/tsb.h>
14#include <asm/oplib.h>
15
16extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
17
18static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
19{
20 vaddr >>= PAGE_SHIFT;
21 return vaddr & (nentries - 1);
22}
23
24static inline int tag_compare(unsigned long tag, unsigned long vaddr)
25{
26 return (tag == (vaddr >> 22));
27}
28
29/* TSB flushes need only occur on the processor initiating the address
30 * space modification, not on each cpu the address space has run on.
31 * Only the TLB flush needs that treatment.
32 */
33
34void flush_tsb_kernel_range(unsigned long start, unsigned long end)
35{
36 unsigned long v;
37
38 for (v = start; v < end; v += PAGE_SIZE) {
39 unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
40 struct tsb *ent = &swapper_tsb[hash];
41
42 if (tag_compare(ent->tag, v)) {
43 ent->tag = (1UL << TSB_TAG_INVALID_BIT);
44 membar_storeload_storestore();
45 }
46 }
47}
48
49void flush_tsb_user(struct mmu_gather *mp)
50{
51 struct mm_struct *mm = mp->mm;
52 unsigned long nentries, base, flags;
53 struct tsb *tsb;
54 int i;
55
56 spin_lock_irqsave(&mm->context.lock, flags);
57
58 tsb = mm->context.tsb;
59 nentries = mm->context.tsb_nentries;
60
61 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
62 base = __pa(tsb);
63 else
64 base = (unsigned long) tsb;
65
66 for (i = 0; i < mp->tlb_nr; i++) {
67 unsigned long v = mp->vaddrs[i];
68 unsigned long tag, ent, hash;
69
70 v &= ~0x1UL;
71
72 hash = tsb_hash(v, nentries);
73 ent = base + (hash * sizeof(struct tsb));
74 tag = (v >> 22UL);
75
76 tsb_flush(ent, tag);
77 }
78
79 spin_unlock_irqrestore(&mm->context.lock, flags);
80}
81
82static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
83{
84 unsigned long tsb_reg, base, tsb_paddr;
85 unsigned long page_sz, tte;
86
87 mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
88
89 base = TSBMAP_BASE;
90 tte = pgprot_val(PAGE_KERNEL_LOCKED);
91 tsb_paddr = __pa(mm->context.tsb);
92 BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
93
94 /* Use the smallest page size that can map the whole TSB
95 * in one TLB entry.
96 */
97 switch (tsb_bytes) {
98 case 8192 << 0:
99 tsb_reg = 0x0UL;
100#ifdef DCACHE_ALIASING_POSSIBLE
101 base += (tsb_paddr & 8192);
102#endif
103 page_sz = 8192;
104 break;
105
106 case 8192 << 1:
107 tsb_reg = 0x1UL;
108 page_sz = 64 * 1024;
109 break;
110
111 case 8192 << 2:
112 tsb_reg = 0x2UL;
113 page_sz = 64 * 1024;
114 break;
115
116 case 8192 << 3:
117 tsb_reg = 0x3UL;
118 page_sz = 64 * 1024;
119 break;
120
121 case 8192 << 4:
122 tsb_reg = 0x4UL;
123 page_sz = 512 * 1024;
124 break;
125
126 case 8192 << 5:
127 tsb_reg = 0x5UL;
128 page_sz = 512 * 1024;
129 break;
130
131 case 8192 << 6:
132 tsb_reg = 0x6UL;
133 page_sz = 512 * 1024;
134 break;
135
136 case 8192 << 7:
137 tsb_reg = 0x7UL;
138 page_sz = 4 * 1024 * 1024;
139 break;
140
141 default:
142 BUG();
143 };
144 tte |= pte_sz_bits(page_sz);
145
146 if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
147 /* Physical mapping, no locked TLB entry for TSB. */
148 tsb_reg |= tsb_paddr;
149
150 mm->context.tsb_reg_val = tsb_reg;
151 mm->context.tsb_map_vaddr = 0;
152 mm->context.tsb_map_pte = 0;
153 } else {
154 tsb_reg |= base;
155 tsb_reg |= (tsb_paddr & (page_sz - 1UL));
156 tte |= (tsb_paddr & ~(page_sz - 1UL));
157
158 mm->context.tsb_reg_val = tsb_reg;
159 mm->context.tsb_map_vaddr = base;
160 mm->context.tsb_map_pte = tte;
161 }
162
163 /* Setup the Hypervisor TSB descriptor. */
164 if (tlb_type == hypervisor) {
165 struct hv_tsb_descr *hp = &mm->context.tsb_descr;
166
167 switch (PAGE_SIZE) {
168 case 8192:
169 default:
170 hp->pgsz_idx = HV_PGSZ_IDX_8K;
171 break;
172
173 case 64 * 1024:
174 hp->pgsz_idx = HV_PGSZ_IDX_64K;
175 break;
176
177 case 512 * 1024:
178 hp->pgsz_idx = HV_PGSZ_IDX_512K;
179 break;
180
181 case 4 * 1024 * 1024:
182 hp->pgsz_idx = HV_PGSZ_IDX_4MB;
183 break;
184 };
185 hp->assoc = 1;
186 hp->num_ttes = tsb_bytes / 16;
187 hp->ctx_idx = 0;
188 switch (PAGE_SIZE) {
189 case 8192:
190 default:
191 hp->pgsz_mask = HV_PGSZ_MASK_8K;
192 break;
193
194 case 64 * 1024:
195 hp->pgsz_mask = HV_PGSZ_MASK_64K;
196 break;
197
198 case 512 * 1024:
199 hp->pgsz_mask = HV_PGSZ_MASK_512K;
200 break;
201
202 case 4 * 1024 * 1024:
203 hp->pgsz_mask = HV_PGSZ_MASK_4MB;
204 break;
205 };
206 hp->tsb_base = tsb_paddr;
207 hp->resv = 0;
208 }
209}
210
211static kmem_cache_t *tsb_caches[8] __read_mostly;
212
213static const char *tsb_cache_names[8] = {
214 "tsb_8KB",
215 "tsb_16KB",
216 "tsb_32KB",
217 "tsb_64KB",
218 "tsb_128KB",
219 "tsb_256KB",
220 "tsb_512KB",
221 "tsb_1MB",
222};
223
224void __init tsb_cache_init(void)
225{
226 unsigned long i;
227
228 for (i = 0; i < 8; i++) {
229 unsigned long size = 8192 << i;
230 const char *name = tsb_cache_names[i];
231
232 tsb_caches[i] = kmem_cache_create(name,
233 size, size,
234 SLAB_HWCACHE_ALIGN |
235 SLAB_MUST_HWCACHE_ALIGN,
236 NULL, NULL);
237 if (!tsb_caches[i]) {
238 prom_printf("Could not create %s cache\n", name);
239 prom_halt();
240 }
241 }
242}
243
244/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
245 * do_sparc64_fault() invokes this routine to try and grow the TSB.
246 *
247 * When we reach the maximum TSB size supported, we stick ~0UL into
248 * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
249 * will not trigger any longer.
250 *
251 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
252 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
253 * must be 512K aligned. It also must be physically contiguous, so we
254 * cannot use vmalloc().
255 *
256 * The idea here is to grow the TSB when the RSS of the process approaches
257 * the number of entries that the current TSB can hold at once. Currently,
258 * we trigger when the RSS hits 3/4 of the TSB capacity.
259 */
260void tsb_grow(struct mm_struct *mm, unsigned long rss)
261{
262 unsigned long max_tsb_size = 1 * 1024 * 1024;
263 unsigned long new_size, old_size, flags;
264 struct tsb *old_tsb, *new_tsb;
265 unsigned long new_cache_index, old_cache_index;
266 unsigned long new_rss_limit;
267 gfp_t gfp_flags;
268
269 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
270 max_tsb_size = (PAGE_SIZE << MAX_ORDER);
271
272 new_cache_index = 0;
273 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
274 unsigned long n_entries = new_size / sizeof(struct tsb);
275
276 n_entries = (n_entries * 3) / 4;
277 if (n_entries > rss)
278 break;
279
280 new_cache_index++;
281 }
282
283 if (new_size == max_tsb_size)
284 new_rss_limit = ~0UL;
285 else
286 new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4;
287
288retry_tsb_alloc:
289 gfp_flags = GFP_KERNEL;
290 if (new_size > (PAGE_SIZE * 2))
291 gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
292
293 new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
294 if (unlikely(!new_tsb)) {
295 /* Not being able to fork due to a high-order TSB
296 * allocation failure is very bad behavior. Just back
297 * down to a 0-order allocation and force no TSB
298 * growing for this address space.
299 */
300 if (mm->context.tsb == NULL && new_cache_index > 0) {
301 new_cache_index = 0;
302 new_size = 8192;
303 new_rss_limit = ~0UL;
304 goto retry_tsb_alloc;
305 }
306
307 /* If we failed on a TSB grow, we are under serious
308 * memory pressure so don't try to grow any more.
309 */
310 if (mm->context.tsb != NULL)
311 mm->context.tsb_rss_limit = ~0UL;
312 return;
313 }
314
315 /* Mark all tags as invalid. */
316 tsb_init(new_tsb, new_size);
317
318 /* Ok, we are about to commit the changes. If we are
319 * growing an existing TSB the locking is very tricky,
320 * so WATCH OUT!
321 *
322 * We have to hold mm->context.lock while committing to the
323 * new TSB, this synchronizes us with processors in
324 * flush_tsb_user() and switch_mm() for this address space.
325 *
326 * But even with that lock held, processors run asynchronously
327 * accessing the old TSB via TLB miss handling. This is OK
328 * because those actions are just propagating state from the
329 * Linux page tables into the TSB, page table mappings are not
330 * being changed. If a real fault occurs, the processor will
331 * synchronize with us when it hits flush_tsb_user(), this is
332 * also true for the case where vmscan is modifying the page
333 * tables. The only thing we need to be careful with is to
334 * skip any locked TSB entries during copy_tsb().
335 *
336 * When we finish committing to the new TSB, we have to drop
337 * the lock and ask all other cpus running this address space
338 * to run tsb_context_switch() to see the new TSB table.
339 */
340 spin_lock_irqsave(&mm->context.lock, flags);
341
342 old_tsb = mm->context.tsb;
343 old_cache_index = (mm->context.tsb_reg_val & 0x7UL);
344 old_size = mm->context.tsb_nentries * sizeof(struct tsb);
345
346
347 /* Handle multiple threads trying to grow the TSB at the same time.
348 * One will get in here first, and bump the size and the RSS limit.
349 * The others will get in here next and hit this check.
350 */
351 if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
352 spin_unlock_irqrestore(&mm->context.lock, flags);
353
354 kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
355 return;
356 }
357
358 mm->context.tsb_rss_limit = new_rss_limit;
359
360 if (old_tsb) {
361 extern void copy_tsb(unsigned long old_tsb_base,
362 unsigned long old_tsb_size,
363 unsigned long new_tsb_base,
364 unsigned long new_tsb_size);
365 unsigned long old_tsb_base = (unsigned long) old_tsb;
366 unsigned long new_tsb_base = (unsigned long) new_tsb;
367
368 if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
369 old_tsb_base = __pa(old_tsb_base);
370 new_tsb_base = __pa(new_tsb_base);
371 }
372 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
373 }
374
375 mm->context.tsb = new_tsb;
376 setup_tsb_params(mm, new_size);
377
378 spin_unlock_irqrestore(&mm->context.lock, flags);
379
380 /* If old_tsb is NULL, we're being invoked for the first time
381 * from init_new_context().
382 */
383 if (old_tsb) {
384 /* Reload it on the local cpu. */
385 tsb_context_switch(mm);
386
387 /* Now force other processors to do the same. */
388 smp_tsb_sync(mm);
389
390 /* Now it is safe to free the old tsb. */
391 kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
392 }
393}
394
395int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
396{
397 spin_lock_init(&mm->context.lock);
398
399 mm->context.sparc64_ctx_val = 0UL;
400
401 /* copy_mm() copies over the parent's mm_struct before calling
402 * us, so we need to zero out the TSB pointer or else tsb_grow()
403 * will be confused and think there is an older TSB to free up.
404 */
405 mm->context.tsb = NULL;
406
407 /* If this is fork, inherit the parent's TSB size. We would
408 * grow it to that size on the first page fault anyways.
409 */
410 tsb_grow(mm, get_mm_rss(mm));
411
412 if (unlikely(!mm->context.tsb))
413 return -ENOMEM;
414
415 return 0;
416}
417
418void destroy_context(struct mm_struct *mm)
419{
420 unsigned long flags, cache_index;
421
422 cache_index = (mm->context.tsb_reg_val & 0x7UL);
423 kmem_cache_free(tsb_caches[cache_index], mm->context.tsb);
424
425 /* We can remove these later, but for now it's useful
426 * to catch any bogus post-destroy_context() references
427 * to the TSB.
428 */
429 mm->context.tsb = NULL;
430 mm->context.tsb_reg_val = 0UL;
431
432 spin_lock_irqsave(&ctx_alloc_lock, flags);
433
434 if (CTX_VALID(mm->context)) {
435 unsigned long nr = CTX_NRBITS(mm->context);
436 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
437 }
438
439 spin_unlock_irqrestore(&ctx_alloc_lock, flags);
440}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa116..f8479fad4047 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -15,6 +15,7 @@
15#include <asm/head.h> 15#include <asm/head.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/hypervisor.h>
18 19
19 /* Basically, most of the Spitfire vs. Cheetah madness 20 /* Basically, most of the Spitfire vs. Cheetah madness
20 * has to do with the fact that Cheetah does not support 21 * has to do with the fact that Cheetah does not support
@@ -29,16 +30,18 @@
29 .text 30 .text
30 .align 32 31 .align 32
31 .globl __flush_tlb_mm 32 .globl __flush_tlb_mm
32__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ 33__flush_tlb_mm: /* 18 insns */
34 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
33 ldxa [%o1] ASI_DMMU, %g2 35 ldxa [%o1] ASI_DMMU, %g2
34 cmp %g2, %o0 36 cmp %g2, %o0
35 bne,pn %icc, __spitfire_flush_tlb_mm_slow 37 bne,pn %icc, __spitfire_flush_tlb_mm_slow
36 mov 0x50, %g3 38 mov 0x50, %g3
37 stxa %g0, [%g3] ASI_DMMU_DEMAP 39 stxa %g0, [%g3] ASI_DMMU_DEMAP
38 stxa %g0, [%g3] ASI_IMMU_DEMAP 40 stxa %g0, [%g3] ASI_IMMU_DEMAP
41 sethi %hi(KERNBASE), %g3
42 flush %g3
39 retl 43 retl
40 flush %g6 44 nop
41 nop
42 nop 45 nop
43 nop 46 nop
44 nop 47 nop
@@ -51,7 +54,7 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
51 54
52 .align 32 55 .align 32
53 .globl __flush_tlb_pending 56 .globl __flush_tlb_pending
54__flush_tlb_pending: 57__flush_tlb_pending: /* 26 insns */
55 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 58 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
56 rdpr %pstate, %g7 59 rdpr %pstate, %g7
57 sllx %o1, 3, %o1 60 sllx %o1, 3, %o1
@@ -72,7 +75,8 @@ __flush_tlb_pending:
72 brnz,pt %o1, 1b 75 brnz,pt %o1, 1b
73 nop 76 nop
74 stxa %g2, [%o4] ASI_DMMU 77 stxa %g2, [%o4] ASI_DMMU
75 flush %g6 78 sethi %hi(KERNBASE), %o4
79 flush %o4
76 retl 80 retl
77 wrpr %g7, 0x0, %pstate 81 wrpr %g7, 0x0, %pstate
78 nop 82 nop
@@ -82,7 +86,8 @@ __flush_tlb_pending:
82 86
83 .align 32 87 .align 32
84 .globl __flush_tlb_kernel_range 88 .globl __flush_tlb_kernel_range
85__flush_tlb_kernel_range: /* %o0=start, %o1=end */ 89__flush_tlb_kernel_range: /* 16 insns */
90 /* %o0=start, %o1=end */
86 cmp %o0, %o1 91 cmp %o0, %o1
87 be,pn %xcc, 2f 92 be,pn %xcc, 2f
88 sethi %hi(PAGE_SIZE), %o4 93 sethi %hi(PAGE_SIZE), %o4
@@ -94,8 +99,11 @@ __flush_tlb_kernel_range: /* %o0=start, %o1=end */
94 membar #Sync 99 membar #Sync
95 brnz,pt %o3, 1b 100 brnz,pt %o3, 1b
96 sub %o3, %o4, %o3 101 sub %o3, %o4, %o3
972: retl 1022: sethi %hi(KERNBASE), %o3
98 flush %g6 103 flush %o3
104 retl
105 nop
106 nop
99 107
100__spitfire_flush_tlb_mm_slow: 108__spitfire_flush_tlb_mm_slow:
101 rdpr %pstate, %g1 109 rdpr %pstate, %g1
@@ -105,7 +113,8 @@ __spitfire_flush_tlb_mm_slow:
105 stxa %g0, [%g3] ASI_IMMU_DEMAP 113 stxa %g0, [%g3] ASI_IMMU_DEMAP
106 flush %g6 114 flush %g6
107 stxa %g2, [%o1] ASI_DMMU 115 stxa %g2, [%o1] ASI_DMMU
108 flush %g6 116 sethi %hi(KERNBASE), %o1
117 flush %o1
109 retl 118 retl
110 wrpr %g1, 0, %pstate 119 wrpr %g1, 0, %pstate
111 120
@@ -181,7 +190,7 @@ __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
181 .previous 190 .previous
182 191
183 /* Cheetah specific versions, patched at boot time. */ 192 /* Cheetah specific versions, patched at boot time. */
184__cheetah_flush_tlb_mm: /* 18 insns */ 193__cheetah_flush_tlb_mm: /* 19 insns */
185 rdpr %pstate, %g7 194 rdpr %pstate, %g7
186 andn %g7, PSTATE_IE, %g2 195 andn %g7, PSTATE_IE, %g2
187 wrpr %g2, 0x0, %pstate 196 wrpr %g2, 0x0, %pstate
@@ -196,12 +205,13 @@ __cheetah_flush_tlb_mm: /* 18 insns */
196 stxa %g0, [%g3] ASI_DMMU_DEMAP 205 stxa %g0, [%g3] ASI_DMMU_DEMAP
197 stxa %g0, [%g3] ASI_IMMU_DEMAP 206 stxa %g0, [%g3] ASI_IMMU_DEMAP
198 stxa %g2, [%o2] ASI_DMMU 207 stxa %g2, [%o2] ASI_DMMU
199 flush %g6 208 sethi %hi(KERNBASE), %o2
209 flush %o2
200 wrpr %g0, 0, %tl 210 wrpr %g0, 0, %tl
201 retl 211 retl
202 wrpr %g7, 0x0, %pstate 212 wrpr %g7, 0x0, %pstate
203 213
204__cheetah_flush_tlb_pending: /* 26 insns */ 214__cheetah_flush_tlb_pending: /* 27 insns */
205 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 215 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
206 rdpr %pstate, %g7 216 rdpr %pstate, %g7
207 sllx %o1, 3, %o1 217 sllx %o1, 3, %o1
@@ -225,7 +235,8 @@ __cheetah_flush_tlb_pending: /* 26 insns */
225 brnz,pt %o1, 1b 235 brnz,pt %o1, 1b
226 nop 236 nop
227 stxa %g2, [%o4] ASI_DMMU 237 stxa %g2, [%o4] ASI_DMMU
228 flush %g6 238 sethi %hi(KERNBASE), %o4
239 flush %o4
229 wrpr %g0, 0, %tl 240 wrpr %g0, 0, %tl
230 retl 241 retl
231 wrpr %g7, 0x0, %pstate 242 wrpr %g7, 0x0, %pstate
@@ -245,7 +256,76 @@ __cheetah_flush_dcache_page: /* 11 insns */
245 nop 256 nop
246#endif /* DCACHE_ALIASING_POSSIBLE */ 257#endif /* DCACHE_ALIASING_POSSIBLE */
247 258
248cheetah_patch_one: 259 /* Hypervisor specific versions, patched at boot time. */
260__hypervisor_tlb_tl0_error:
261 save %sp, -192, %sp
262 mov %i0, %o0
263 call hypervisor_tlbop_error
264 mov %i1, %o1
265 ret
266 restore
267
268__hypervisor_flush_tlb_mm: /* 10 insns */
269 mov %o0, %o2 /* ARG2: mmu context */
270 mov 0, %o0 /* ARG0: CPU lists unimplemented */
271 mov 0, %o1 /* ARG1: CPU lists unimplemented */
272 mov HV_MMU_ALL, %o3 /* ARG3: flags */
273 mov HV_FAST_MMU_DEMAP_CTX, %o5
274 ta HV_FAST_TRAP
275 brnz,pn %o0, __hypervisor_tlb_tl0_error
276 mov HV_FAST_MMU_DEMAP_CTX, %o1
277 retl
278 nop
279
280__hypervisor_flush_tlb_pending: /* 16 insns */
281 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
282 sllx %o1, 3, %g1
283 mov %o2, %g2
284 mov %o0, %g3
2851: sub %g1, (1 << 3), %g1
286 ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */
287 mov %g3, %o1 /* ARG1: mmu context */
288 mov HV_MMU_ALL, %o2 /* ARG2: flags */
289 srlx %o0, PAGE_SHIFT, %o0
290 sllx %o0, PAGE_SHIFT, %o0
291 ta HV_MMU_UNMAP_ADDR_TRAP
292 brnz,pn %o0, __hypervisor_tlb_tl0_error
293 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
294 brnz,pt %g1, 1b
295 nop
296 retl
297 nop
298
299__hypervisor_flush_tlb_kernel_range: /* 16 insns */
300 /* %o0=start, %o1=end */
301 cmp %o0, %o1
302 be,pn %xcc, 2f
303 sethi %hi(PAGE_SIZE), %g3
304 mov %o0, %g1
305 sub %o1, %g1, %g2
306 sub %g2, %g3, %g2
3071: add %g1, %g2, %o0 /* ARG0: virtual address */
308 mov 0, %o1 /* ARG1: mmu context */
309 mov HV_MMU_ALL, %o2 /* ARG2: flags */
310 ta HV_MMU_UNMAP_ADDR_TRAP
311 brnz,pn %o0, __hypervisor_tlb_tl0_error
312 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
313 brnz,pt %g2, 1b
314 sub %g2, %g3, %g2
3152: retl
316 nop
317
318#ifdef DCACHE_ALIASING_POSSIBLE
319 /* XXX Niagara and friends have an 8K cache, so no aliasing is
320 * XXX possible, but nothing explicit in the Hypervisor API
321 * XXX guarantees this.
322 */
323__hypervisor_flush_dcache_page: /* 2 insns */
324 retl
325 nop
326#endif
327
328tlb_patch_one:
2491: lduw [%o1], %g1 3291: lduw [%o1], %g1
250 stw %g1, [%o0] 330 stw %g1, [%o0]
251 flush %o0 331 flush %o0
@@ -264,22 +344,22 @@ cheetah_patch_cachetlbops:
264 or %o0, %lo(__flush_tlb_mm), %o0 344 or %o0, %lo(__flush_tlb_mm), %o0
265 sethi %hi(__cheetah_flush_tlb_mm), %o1 345 sethi %hi(__cheetah_flush_tlb_mm), %o1
266 or %o1, %lo(__cheetah_flush_tlb_mm), %o1 346 or %o1, %lo(__cheetah_flush_tlb_mm), %o1
267 call cheetah_patch_one 347 call tlb_patch_one
268 mov 18, %o2 348 mov 19, %o2
269 349
270 sethi %hi(__flush_tlb_pending), %o0 350 sethi %hi(__flush_tlb_pending), %o0
271 or %o0, %lo(__flush_tlb_pending), %o0 351 or %o0, %lo(__flush_tlb_pending), %o0
272 sethi %hi(__cheetah_flush_tlb_pending), %o1 352 sethi %hi(__cheetah_flush_tlb_pending), %o1
273 or %o1, %lo(__cheetah_flush_tlb_pending), %o1 353 or %o1, %lo(__cheetah_flush_tlb_pending), %o1
274 call cheetah_patch_one 354 call tlb_patch_one
275 mov 26, %o2 355 mov 27, %o2
276 356
277#ifdef DCACHE_ALIASING_POSSIBLE 357#ifdef DCACHE_ALIASING_POSSIBLE
278 sethi %hi(__flush_dcache_page), %o0 358 sethi %hi(__flush_dcache_page), %o0
279 or %o0, %lo(__flush_dcache_page), %o0 359 or %o0, %lo(__flush_dcache_page), %o0
280 sethi %hi(__cheetah_flush_dcache_page), %o1 360 sethi %hi(__cheetah_flush_dcache_page), %o1
281 or %o1, %lo(__cheetah_flush_dcache_page), %o1 361 or %o1, %lo(__cheetah_flush_dcache_page), %o1
282 call cheetah_patch_one 362 call tlb_patch_one
283 mov 11, %o2 363 mov 11, %o2
284#endif /* DCACHE_ALIASING_POSSIBLE */ 364#endif /* DCACHE_ALIASING_POSSIBLE */
285 365
@@ -295,16 +375,14 @@ cheetah_patch_cachetlbops:
295 * %g1 address arg 1 (tlb page and range flushes) 375 * %g1 address arg 1 (tlb page and range flushes)
296 * %g7 address arg 2 (tlb range flush only) 376 * %g7 address arg 2 (tlb range flush only)
297 * 377 *
298 * %g6 ivector table, don't touch 378 * %g6 scratch 1
299 * %g2 scratch 1 379 * %g2 scratch 2
300 * %g3 scratch 2 380 * %g3 scratch 3
301 * %g4 scratch 3 381 * %g4 scratch 4
302 *
303 * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
304 */ 382 */
305 .align 32 383 .align 32
306 .globl xcall_flush_tlb_mm 384 .globl xcall_flush_tlb_mm
307xcall_flush_tlb_mm: 385xcall_flush_tlb_mm: /* 21 insns */
308 mov PRIMARY_CONTEXT, %g2 386 mov PRIMARY_CONTEXT, %g2
309 ldxa [%g2] ASI_DMMU, %g3 387 ldxa [%g2] ASI_DMMU, %g3
310 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 388 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -316,9 +394,19 @@ xcall_flush_tlb_mm:
316 stxa %g0, [%g4] ASI_IMMU_DEMAP 394 stxa %g0, [%g4] ASI_IMMU_DEMAP
317 stxa %g3, [%g2] ASI_DMMU 395 stxa %g3, [%g2] ASI_DMMU
318 retry 396 retry
397 nop
398 nop
399 nop
400 nop
401 nop
402 nop
403 nop
404 nop
405 nop
406 nop
319 407
320 .globl xcall_flush_tlb_pending 408 .globl xcall_flush_tlb_pending
321xcall_flush_tlb_pending: 409xcall_flush_tlb_pending: /* 21 insns */
322 /* %g5=context, %g1=nr, %g7=vaddrs[] */ 410 /* %g5=context, %g1=nr, %g7=vaddrs[] */
323 sllx %g1, 3, %g1 411 sllx %g1, 3, %g1
324 mov PRIMARY_CONTEXT, %g4 412 mov PRIMARY_CONTEXT, %g4
@@ -341,9 +429,10 @@ xcall_flush_tlb_pending:
341 nop 429 nop
342 stxa %g2, [%g4] ASI_DMMU 430 stxa %g2, [%g4] ASI_DMMU
343 retry 431 retry
432 nop
344 433
345 .globl xcall_flush_tlb_kernel_range 434 .globl xcall_flush_tlb_kernel_range
346xcall_flush_tlb_kernel_range: 435xcall_flush_tlb_kernel_range: /* 25 insns */
347 sethi %hi(PAGE_SIZE - 1), %g2 436 sethi %hi(PAGE_SIZE - 1), %g2
348 or %g2, %lo(PAGE_SIZE - 1), %g2 437 or %g2, %lo(PAGE_SIZE - 1), %g2
349 andn %g1, %g2, %g1 438 andn %g1, %g2, %g1
@@ -360,14 +449,30 @@ xcall_flush_tlb_kernel_range:
360 retry 449 retry
361 nop 450 nop
362 nop 451 nop
452 nop
453 nop
454 nop
455 nop
456 nop
457 nop
458 nop
459 nop
460 nop
363 461
364 /* This runs in a very controlled environment, so we do 462 /* This runs in a very controlled environment, so we do
365 * not need to worry about BH races etc. 463 * not need to worry about BH races etc.
366 */ 464 */
367 .globl xcall_sync_tick 465 .globl xcall_sync_tick
368xcall_sync_tick: 466xcall_sync_tick:
369 rdpr %pstate, %g2 467
468661: rdpr %pstate, %g2
370 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate 469 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
470 .section .sun4v_2insn_patch, "ax"
471 .word 661b
472 nop
473 nop
474 .previous
475
371 rdpr %pil, %g2 476 rdpr %pil, %g2
372 wrpr %g0, 15, %pil 477 wrpr %g0, 15, %pil
373 sethi %hi(109f), %g7 478 sethi %hi(109f), %g7
@@ -390,8 +495,15 @@ xcall_sync_tick:
390 */ 495 */
391 .globl xcall_report_regs 496 .globl xcall_report_regs
392xcall_report_regs: 497xcall_report_regs:
393 rdpr %pstate, %g2 498
499661: rdpr %pstate, %g2
394 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate 500 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
501 .section .sun4v_2insn_patch, "ax"
502 .word 661b
503 nop
504 nop
505 .previous
506
395 rdpr %pil, %g2 507 rdpr %pil, %g2
396 wrpr %g0, 15, %pil 508 wrpr %g0, 15, %pil
397 sethi %hi(109f), %g7 509 sethi %hi(109f), %g7
@@ -453,62 +565,96 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
453 nop 565 nop
454 nop 566 nop
455 567
456 .data 568 /* %g5: error
457 569 * %g6: tlb op
458errata32_hwbug: 570 */
459 .xword 0 571__hypervisor_tlb_xcall_error:
460 572 mov %g5, %g4
461 .text 573 mov %g6, %g5
462 574 ba,pt %xcc, etrap
463 /* These two are not performance critical... */ 575 rd %pc, %g7
464 .globl xcall_flush_tlb_all_spitfire 576 mov %l4, %o0
465xcall_flush_tlb_all_spitfire: 577 call hypervisor_tlbop_error_xcall
466 /* Spitfire Errata #32 workaround. */ 578 mov %l5, %o1
467 sethi %hi(errata32_hwbug), %g4 579 ba,a,pt %xcc, rtrap_clr_l6
468 stx %g0, [%g4 + %lo(errata32_hwbug)] 580
469 581 .globl __hypervisor_xcall_flush_tlb_mm
470 clr %g2 582__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
471 clr %g3 583 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
4721: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4 584 mov %o0, %g2
473 and %g4, _PAGE_L, %g5 585 mov %o1, %g3
474 brnz,pn %g5, 2f 586 mov %o2, %g4
475 mov TLB_TAG_ACCESS, %g7 587 mov %o3, %g1
476 588 mov %o5, %g7
477 stxa %g0, [%g7] ASI_DMMU 589 clr %o0 /* ARG0: CPU lists unimplemented */
478 membar #Sync 590 clr %o1 /* ARG1: CPU lists unimplemented */
479 stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS 591 mov %g5, %o2 /* ARG2: mmu context */
592 mov HV_MMU_ALL, %o3 /* ARG3: flags */
593 mov HV_FAST_MMU_DEMAP_CTX, %o5
594 ta HV_FAST_TRAP
595 mov HV_FAST_MMU_DEMAP_CTX, %g6
596 brnz,pn %o0, __hypervisor_tlb_xcall_error
597 mov %o0, %g5
598 mov %g2, %o0
599 mov %g3, %o1
600 mov %g4, %o2
601 mov %g1, %o3
602 mov %g7, %o5
480 membar #Sync 603 membar #Sync
604 retry
481 605
482 /* Spitfire Errata #32 workaround. */ 606 .globl __hypervisor_xcall_flush_tlb_pending
483 sethi %hi(errata32_hwbug), %g4 607__hypervisor_xcall_flush_tlb_pending: /* 21 insns */
484 stx %g0, [%g4 + %lo(errata32_hwbug)] 608 /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */
485 609 sllx %g1, 3, %g1
4862: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4 610 mov %o0, %g2
487 and %g4, _PAGE_L, %g5 611 mov %o1, %g3
488 brnz,pn %g5, 2f 612 mov %o2, %g4
489 mov TLB_TAG_ACCESS, %g7 6131: sub %g1, (1 << 3), %g1
490 614 ldx [%g7 + %g1], %o0 /* ARG0: virtual address */
491 stxa %g0, [%g7] ASI_IMMU 615 mov %g5, %o1 /* ARG1: mmu context */
492 membar #Sync 616 mov HV_MMU_ALL, %o2 /* ARG2: flags */
493 stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS 617 srlx %o0, PAGE_SHIFT, %o0
618 sllx %o0, PAGE_SHIFT, %o0
619 ta HV_MMU_UNMAP_ADDR_TRAP
620 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
621 brnz,a,pn %o0, __hypervisor_tlb_xcall_error
622 mov %o0, %g5
623 brnz,pt %g1, 1b
624 nop
625 mov %g2, %o0
626 mov %g3, %o1
627 mov %g4, %o2
494 membar #Sync 628 membar #Sync
495
496 /* Spitfire Errata #32 workaround. */
497 sethi %hi(errata32_hwbug), %g4
498 stx %g0, [%g4 + %lo(errata32_hwbug)]
499
5002: add %g2, 1, %g2
501 cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
502 ble,pt %icc, 1b
503 sll %g2, 3, %g3
504 flush %g6
505 retry 629 retry
506 630
507 .globl xcall_flush_tlb_all_cheetah 631 .globl __hypervisor_xcall_flush_tlb_kernel_range
508xcall_flush_tlb_all_cheetah: 632__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
509 mov 0x80, %g2 633 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
510 stxa %g0, [%g2] ASI_DMMU_DEMAP 634 sethi %hi(PAGE_SIZE - 1), %g2
511 stxa %g0, [%g2] ASI_IMMU_DEMAP 635 or %g2, %lo(PAGE_SIZE - 1), %g2
636 andn %g1, %g2, %g1
637 andn %g7, %g2, %g7
638 sub %g7, %g1, %g3
639 add %g2, 1, %g2
640 sub %g3, %g2, %g3
641 mov %o0, %g2
642 mov %o1, %g4
643 mov %o2, %g7
6441: add %g1, %g3, %o0 /* ARG0: virtual address */
645 mov 0, %o1 /* ARG1: mmu context */
646 mov HV_MMU_ALL, %o2 /* ARG2: flags */
647 ta HV_MMU_UNMAP_ADDR_TRAP
648 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
649 brnz,pn %o0, __hypervisor_tlb_xcall_error
650 mov %o0, %g5
651 sethi %hi(PAGE_SIZE), %o2
652 brnz,pt %g3, 1b
653 sub %g3, %o2, %g3
654 mov %g2, %o0
655 mov %g4, %o1
656 mov %g7, %o2
657 membar #Sync
512 retry 658 retry
513 659
514 /* These just get rescheduled to PIL vectors. */ 660 /* These just get rescheduled to PIL vectors. */
@@ -527,4 +673,70 @@ xcall_capture:
527 wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint 673 wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
528 retry 674 retry
529 675
676 .globl xcall_new_mmu_context_version
677xcall_new_mmu_context_version:
678 wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
679 retry
680
530#endif /* CONFIG_SMP */ 681#endif /* CONFIG_SMP */
682
683
684 .globl hypervisor_patch_cachetlbops
685hypervisor_patch_cachetlbops:
686 save %sp, -128, %sp
687
688 sethi %hi(__flush_tlb_mm), %o0
689 or %o0, %lo(__flush_tlb_mm), %o0
690 sethi %hi(__hypervisor_flush_tlb_mm), %o1
691 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
692 call tlb_patch_one
693 mov 10, %o2
694
695 sethi %hi(__flush_tlb_pending), %o0
696 or %o0, %lo(__flush_tlb_pending), %o0
697 sethi %hi(__hypervisor_flush_tlb_pending), %o1
698 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
699 call tlb_patch_one
700 mov 16, %o2
701
702 sethi %hi(__flush_tlb_kernel_range), %o0
703 or %o0, %lo(__flush_tlb_kernel_range), %o0
704 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
705 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
706 call tlb_patch_one
707 mov 16, %o2
708
709#ifdef DCACHE_ALIASING_POSSIBLE
710 sethi %hi(__flush_dcache_page), %o0
711 or %o0, %lo(__flush_dcache_page), %o0
712 sethi %hi(__hypervisor_flush_dcache_page), %o1
713 or %o1, %lo(__hypervisor_flush_dcache_page), %o1
714 call tlb_patch_one
715 mov 2, %o2
716#endif /* DCACHE_ALIASING_POSSIBLE */
717
718#ifdef CONFIG_SMP
719 sethi %hi(xcall_flush_tlb_mm), %o0
720 or %o0, %lo(xcall_flush_tlb_mm), %o0
721 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
722 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
723 call tlb_patch_one
724 mov 21, %o2
725
726 sethi %hi(xcall_flush_tlb_pending), %o0
727 or %o0, %lo(xcall_flush_tlb_pending), %o0
728 sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1
729 or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1
730 call tlb_patch_one
731 mov 21, %o2
732
733 sethi %hi(xcall_flush_tlb_kernel_range), %o0
734 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
735 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
736 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
737 call tlb_patch_one
738 mov 25, %o2
739#endif /* CONFIG_SMP */
740
741 ret
742 restore