aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/mm
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 00:08:05 -0500
committerDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 00:08:05 -0500
commit95d465fd750897ab32462a6702fbfe1b122cbbc0 (patch)
tree65c38b2f11c51bb6932e44dd6c92f15b0091abfe /arch/sparc64/mm
parent642fde17dceceb56c7ba2762733ac688666ae657 (diff)
parent683aa4012f53b2ada0f430487e05d37b0d94e90a (diff)
Manual merge with Linus.
Conflicts: arch/powerpc/kernel/setup-common.c drivers/input/keyboard/hil_kbd.c drivers/input/mouse/hil_ptr.c
Diffstat (limited to 'arch/sparc64/mm')
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/fault.c24
-rw-r--r--arch/sparc64/mm/generic.c41
-rw-r--r--arch/sparc64/mm/hugetlbpage.c219
-rw-r--r--arch/sparc64/mm/init.c1452
-rw-r--r--arch/sparc64/mm/tlb.c64
-rw-r--r--arch/sparc64/mm/tsb.c500
-rw-r--r--arch/sparc64/mm/ultra.S374
8 files changed, 1838 insertions, 838 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f48..e415bf942bcd 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
5EXTRA_AFLAGS := -ansi 5EXTRA_AFLAGS := -ansi
6EXTRA_CFLAGS := -Werror 6EXTRA_CFLAGS := -Werror
7 7
8obj-y := ultra.o tlb.o fault.o init.o generic.o 8obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o
9 9
10obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 10obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6f0539aa44d0..0db2f7d9fab5 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -29,6 +29,7 @@
29#include <asm/lsu.h> 29#include <asm/lsu.h>
30#include <asm/sections.h> 30#include <asm/sections.h>
31#include <asm/kdebug.h> 31#include <asm/kdebug.h>
32#include <asm/mmu_context.h>
32 33
33/* 34/*
34 * To debug kernel to catch accesses to certain virtual/physical addresses. 35 * To debug kernel to catch accesses to certain virtual/physical addresses.
@@ -91,12 +92,13 @@ static void __kprobes unhandled_fault(unsigned long address,
91 die_if_kernel("Oops", regs); 92 die_if_kernel("Oops", regs);
92} 93}
93 94
94static void bad_kernel_pc(struct pt_regs *regs) 95static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
95{ 96{
96 unsigned long *ksp; 97 unsigned long *ksp;
97 98
98 printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", 99 printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
99 regs->tpc); 100 regs->tpc);
101 printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
100 __asm__("mov %%sp, %0" : "=r" (ksp)); 102 __asm__("mov %%sp, %0" : "=r" (ksp));
101 show_stack(current, ksp); 103 show_stack(current, ksp);
102 unhandled_fault(regs->tpc, current, regs); 104 unhandled_fault(regs->tpc, current, regs);
@@ -137,7 +139,7 @@ static unsigned int get_user_insn(unsigned long tpc)
137 if (!pte_present(pte)) 139 if (!pte_present(pte))
138 goto out; 140 goto out;
139 141
140 pa = (pte_val(pte) & _PAGE_PADDR); 142 pa = (pte_pfn(pte) << PAGE_SHIFT);
141 pa += (tpc & ~PAGE_MASK); 143 pa += (tpc & ~PAGE_MASK);
142 144
143 /* Use phys bypass so we don't pollute dtlb/dcache. */ 145 /* Use phys bypass so we don't pollute dtlb/dcache. */
@@ -257,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
257 struct vm_area_struct *vma; 259 struct vm_area_struct *vma;
258 unsigned int insn = 0; 260 unsigned int insn = 0;
259 int si_code, fault_code; 261 int si_code, fault_code;
260 unsigned long address; 262 unsigned long address, mm_rss;
261 263
262 fault_code = get_thread_fault_code(); 264 fault_code = get_thread_fault_code();
263 265
@@ -280,7 +282,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
280 (tpc >= MODULES_VADDR && tpc < MODULES_END)) { 282 (tpc >= MODULES_VADDR && tpc < MODULES_END)) {
281 /* Valid, no problems... */ 283 /* Valid, no problems... */
282 } else { 284 } else {
283 bad_kernel_pc(regs); 285 bad_kernel_pc(regs, address);
284 return; 286 return;
285 } 287 }
286 } 288 }
@@ -406,6 +408,20 @@ good_area:
406 } 408 }
407 409
408 up_read(&mm->mmap_sem); 410 up_read(&mm->mmap_sem);
411
412 mm_rss = get_mm_rss(mm);
413#ifdef CONFIG_HUGETLB_PAGE
414 mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
415#endif
416 if (unlikely(mm_rss >
417 mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
418 tsb_grow(mm, MM_TSB_BASE, mm_rss);
419#ifdef CONFIG_HUGETLB_PAGE
420 mm_rss = mm->context.huge_pte_count;
421 if (unlikely(mm_rss >
422 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
423 tsb_grow(mm, MM_TSB_HUGE, mm_rss);
424#endif
409 return; 425 return;
410 426
411 /* 427 /*
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index 580b63da836b..8cb06205d265 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -15,15 +15,6 @@
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
17 17
18static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space)
19{
20 pte_t pte;
21 pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) &
22 ~(unsigned long)_PAGE_CACHE);
23 pte_val(pte) |= (((unsigned long)space) << 32);
24 return pte;
25}
26
27/* Remap IO memory, the same way as remap_pfn_range(), but use 18/* Remap IO memory, the same way as remap_pfn_range(), but use
28 * the obio memory space. 19 * the obio memory space.
29 * 20 *
@@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
48 pte_t entry; 39 pte_t entry;
49 unsigned long curend = address + PAGE_SIZE; 40 unsigned long curend = address + PAGE_SIZE;
50 41
51 entry = mk_pte_io(offset, prot, space); 42 entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
52 if (!(address & 0xffff)) { 43 if (!(address & 0xffff)) {
53 if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { 44 if (PAGE_SIZE < (4 * 1024 * 1024) &&
54 entry = mk_pte_io(offset, 45 !(address & 0x3fffff) &&
55 __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), 46 !(offset & 0x3ffffe) &&
56 space); 47 end >= address + 0x400000) {
48 entry = mk_pte_io(offset, prot, space,
49 4 * 1024 * 1024);
57 curend = address + 0x400000; 50 curend = address + 0x400000;
58 offset += 0x400000; 51 offset += 0x400000;
59 } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { 52 } else if (PAGE_SIZE < (512 * 1024) &&
60 entry = mk_pte_io(offset, 53 !(address & 0x7ffff) &&
61 __pgprot(pgprot_val (prot) | _PAGE_SZ512K), 54 !(offset & 0x7fffe) &&
62 space); 55 end >= address + 0x80000) {
56 entry = mk_pte_io(offset, prot, space,
57 512 * 1024 * 1024);
63 curend = address + 0x80000; 58 curend = address + 0x80000;
64 offset += 0x80000; 59 offset += 0x80000;
65 } else if (!(offset & 0xfffe) && end >= address + 0x10000) { 60 } else if (PAGE_SIZE < (64 * 1024) &&
66 entry = mk_pte_io(offset, 61 !(offset & 0xfffe) &&
67 __pgprot(pgprot_val (prot) | _PAGE_SZ64K), 62 end >= address + 0x10000) {
68 space); 63 entry = mk_pte_io(offset, prot, space,
64 64 * 1024);
69 curend = address + 0x10000; 65 curend = address + 0x10000;
70 offset += 0x10000; 66 offset += 0x10000;
71 } else 67 } else
@@ -144,7 +140,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
144 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 140 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
145 vma->vm_pgoff = phys_base >> PAGE_SHIFT; 141 vma->vm_pgoff = phys_base >> PAGE_SHIFT;
146 142
147 prot = __pgprot(pg_iobits);
148 offset -= from; 143 offset -= from;
149 dir = pgd_offset(mm, from); 144 dir = pgd_offset(mm, from);
150 flush_cache_range(vma, beg, end); 145 flush_cache_range(vma, beg, end);
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 625cbb336a23..074620d413d4 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * SPARC64 Huge TLB page support. 2 * SPARC64 Huge TLB page support.
3 * 3 *
4 * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) 4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
5 */ 5 */
6 6
7#include <linux/config.h> 7#include <linux/config.h>
@@ -22,6 +22,175 @@
22#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
23#include <asm/mmu_context.h> 23#include <asm/mmu_context.h>
24 24
25/* Slightly simplified from the non-hugepage variant because by
26 * definition we don't have to worry about any page coloring stuff
27 */
28#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
29#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
30
31static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
32 unsigned long addr,
33 unsigned long len,
34 unsigned long pgoff,
35 unsigned long flags)
36{
37 struct mm_struct *mm = current->mm;
38 struct vm_area_struct * vma;
39 unsigned long task_size = TASK_SIZE;
40 unsigned long start_addr;
41
42 if (test_thread_flag(TIF_32BIT))
43 task_size = STACK_TOP32;
44 if (unlikely(len >= VA_EXCLUDE_START))
45 return -ENOMEM;
46
47 if (len > mm->cached_hole_size) {
48 start_addr = addr = mm->free_area_cache;
49 } else {
50 start_addr = addr = TASK_UNMAPPED_BASE;
51 mm->cached_hole_size = 0;
52 }
53
54 task_size -= len;
55
56full_search:
57 addr = ALIGN(addr, HPAGE_SIZE);
58
59 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
60 /* At this point: (!vma || addr < vma->vm_end). */
61 if (addr < VA_EXCLUDE_START &&
62 (addr + len) >= VA_EXCLUDE_START) {
63 addr = VA_EXCLUDE_END;
64 vma = find_vma(mm, VA_EXCLUDE_END);
65 }
66 if (unlikely(task_size < addr)) {
67 if (start_addr != TASK_UNMAPPED_BASE) {
68 start_addr = addr = TASK_UNMAPPED_BASE;
69 mm->cached_hole_size = 0;
70 goto full_search;
71 }
72 return -ENOMEM;
73 }
74 if (likely(!vma || addr + len <= vma->vm_start)) {
75 /*
76 * Remember the place where we stopped the search:
77 */
78 mm->free_area_cache = addr + len;
79 return addr;
80 }
81 if (addr + mm->cached_hole_size < vma->vm_start)
82 mm->cached_hole_size = vma->vm_start - addr;
83
84 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
85 }
86}
87
88static unsigned long
89hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
90 const unsigned long len,
91 const unsigned long pgoff,
92 const unsigned long flags)
93{
94 struct vm_area_struct *vma;
95 struct mm_struct *mm = current->mm;
96 unsigned long addr = addr0;
97
98 /* This should only ever run for 32-bit processes. */
99 BUG_ON(!test_thread_flag(TIF_32BIT));
100
101 /* check if free_area_cache is useful for us */
102 if (len <= mm->cached_hole_size) {
103 mm->cached_hole_size = 0;
104 mm->free_area_cache = mm->mmap_base;
105 }
106
107 /* either no address requested or can't fit in requested address hole */
108 addr = mm->free_area_cache & HPAGE_MASK;
109
110 /* make sure it can fit in the remaining address space */
111 if (likely(addr > len)) {
112 vma = find_vma(mm, addr-len);
113 if (!vma || addr <= vma->vm_start) {
114 /* remember the address as a hint for next time */
115 return (mm->free_area_cache = addr-len);
116 }
117 }
118
119 if (unlikely(mm->mmap_base < len))
120 goto bottomup;
121
122 addr = (mm->mmap_base-len) & HPAGE_MASK;
123
124 do {
125 /*
126 * Lookup failure means no vma is above this address,
127 * else if new region fits below vma->vm_start,
128 * return with success:
129 */
130 vma = find_vma(mm, addr);
131 if (likely(!vma || addr+len <= vma->vm_start)) {
132 /* remember the address as a hint for next time */
133 return (mm->free_area_cache = addr);
134 }
135
136 /* remember the largest hole we saw so far */
137 if (addr + mm->cached_hole_size < vma->vm_start)
138 mm->cached_hole_size = vma->vm_start - addr;
139
140 /* try just below the current vma->vm_start */
141 addr = (vma->vm_start-len) & HPAGE_MASK;
142 } while (likely(len < vma->vm_start));
143
144bottomup:
145 /*
146 * A failed mmap() very likely causes application failure,
147 * so fall back to the bottom-up function here. This scenario
148 * can happen with large stack limits and large mmap()
149 * allocations.
150 */
151 mm->cached_hole_size = ~0UL;
152 mm->free_area_cache = TASK_UNMAPPED_BASE;
153 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
154 /*
155 * Restore the topdown base:
156 */
157 mm->free_area_cache = mm->mmap_base;
158 mm->cached_hole_size = ~0UL;
159
160 return addr;
161}
162
163unsigned long
164hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
165 unsigned long len, unsigned long pgoff, unsigned long flags)
166{
167 struct mm_struct *mm = current->mm;
168 struct vm_area_struct *vma;
169 unsigned long task_size = TASK_SIZE;
170
171 if (test_thread_flag(TIF_32BIT))
172 task_size = STACK_TOP32;
173
174 if (len & ~HPAGE_MASK)
175 return -EINVAL;
176 if (len > task_size)
177 return -ENOMEM;
178
179 if (addr) {
180 addr = ALIGN(addr, HPAGE_SIZE);
181 vma = find_vma(mm, addr);
182 if (task_size - len >= addr &&
183 (!vma || addr + len <= vma->vm_start))
184 return addr;
185 }
186 if (mm->get_unmapped_area == arch_get_unmapped_area)
187 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
188 pgoff, flags);
189 else
190 return hugetlb_get_unmapped_area_topdown(file, addr, len,
191 pgoff, flags);
192}
193
25pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 194pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
26{ 195{
27 pgd_t *pgd; 196 pgd_t *pgd;
@@ -30,13 +199,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
30 pte_t *pte = NULL; 199 pte_t *pte = NULL;
31 200
32 pgd = pgd_offset(mm, addr); 201 pgd = pgd_offset(mm, addr);
33 if (pgd) { 202 pud = pud_alloc(mm, pgd, addr);
34 pud = pud_offset(pgd, addr); 203 if (pud) {
35 if (pud) { 204 pmd = pmd_alloc(mm, pud, addr);
36 pmd = pmd_alloc(mm, pud, addr); 205 if (pmd)
37 if (pmd) 206 pte = pte_alloc_map(mm, pmd, addr);
38 pte = pte_alloc_map(mm, pmd, addr);
39 }
40 } 207 }
41 return pte; 208 return pte;
42} 209}
@@ -48,25 +215,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
48 pmd_t *pmd; 215 pmd_t *pmd;
49 pte_t *pte = NULL; 216 pte_t *pte = NULL;
50 217
218 addr &= HPAGE_MASK;
219
51 pgd = pgd_offset(mm, addr); 220 pgd = pgd_offset(mm, addr);
52 if (pgd) { 221 if (!pgd_none(*pgd)) {
53 pud = pud_offset(pgd, addr); 222 pud = pud_offset(pgd, addr);
54 if (pud) { 223 if (!pud_none(*pud)) {
55 pmd = pmd_offset(pud, addr); 224 pmd = pmd_offset(pud, addr);
56 if (pmd) 225 if (!pmd_none(*pmd))
57 pte = pte_offset_map(pmd, addr); 226 pte = pte_offset_map(pmd, addr);
58 } 227 }
59 } 228 }
60 return pte; 229 return pte;
61} 230}
62 231
63#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
64
65void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 232void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
66 pte_t *ptep, pte_t entry) 233 pte_t *ptep, pte_t entry)
67{ 234{
68 int i; 235 int i;
69 236
237 if (!pte_present(*ptep) && pte_present(entry))
238 mm->context.huge_pte_count++;
239
70 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 240 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
71 set_pte_at(mm, addr, ptep, entry); 241 set_pte_at(mm, addr, ptep, entry);
72 ptep++; 242 ptep++;
@@ -82,6 +252,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
82 int i; 252 int i;
83 253
84 entry = *ptep; 254 entry = *ptep;
255 if (pte_present(entry))
256 mm->context.huge_pte_count--;
85 257
86 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 258 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
87 pte_clear(mm, addr, ptep); 259 pte_clear(mm, addr, ptep);
@@ -92,18 +264,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
92 return entry; 264 return entry;
93} 265}
94 266
95/*
96 * This function checks for proper alignment of input addr and len parameters.
97 */
98int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
99{
100 if (len & ~HPAGE_MASK)
101 return -EINVAL;
102 if (addr & ~HPAGE_MASK)
103 return -EINVAL;
104 return 0;
105}
106
107struct page *follow_huge_addr(struct mm_struct *mm, 267struct page *follow_huge_addr(struct mm_struct *mm,
108 unsigned long address, int write) 268 unsigned long address, int write)
109{ 269{
@@ -131,6 +291,15 @@ static void context_reload(void *__data)
131 291
132void hugetlb_prefault_arch_hook(struct mm_struct *mm) 292void hugetlb_prefault_arch_hook(struct mm_struct *mm)
133{ 293{
294 struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
295
296 if (likely(tp->tsb != NULL))
297 return;
298
299 tsb_grow(mm, MM_TSB_HUGE, 0);
300 tsb_context_switch(mm);
301 smp_tsb_sync(mm);
302
134 /* On UltraSPARC-III+ and later, configure the second half of 303 /* On UltraSPARC-III+ and later, configure the second half of
135 * the Data-TLB for huge pages. 304 * the Data-TLB for huge pages.
136 */ 305 */
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee8..1539a8362b6f 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/config.h> 8#include <linux/config.h>
9#include <linux/module.h>
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/string.h> 12#include <linux/string.h>
@@ -39,9 +40,27 @@
39#include <asm/tlb.h> 40#include <asm/tlb.h>
40#include <asm/spitfire.h> 41#include <asm/spitfire.h>
41#include <asm/sections.h> 42#include <asm/sections.h>
43#include <asm/tsb.h>
44#include <asm/hypervisor.h>
42 45
43extern void device_scan(void); 46extern void device_scan(void);
44 47
48#define MAX_PHYS_ADDRESS (1UL << 42UL)
49#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
50#define KPTE_BITMAP_BYTES \
51 ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
52
53unsigned long kern_linear_pte_xor[2] __read_mostly;
54
55/* A bitmap, one bit for every 256MB of physical memory. If the bit
56 * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
57 * if set we should use a 256MB page (via kern_linear_pte_xor[1]).
58 */
59unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
60
61/* A special kernel TSB for 4MB and 256MB linear mappings. */
62struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
63
45#define MAX_BANKS 32 64#define MAX_BANKS 32
46 65
47static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; 66static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
@@ -111,11 +130,9 @@ static void __init read_obp_memory(const char *property,
111 130
112unsigned long *sparc64_valid_addr_bitmap __read_mostly; 131unsigned long *sparc64_valid_addr_bitmap __read_mostly;
113 132
114/* Ugly, but necessary... -DaveM */ 133/* Kernel physical address base and size in bytes. */
115unsigned long phys_base __read_mostly;
116unsigned long kern_base __read_mostly; 134unsigned long kern_base __read_mostly;
117unsigned long kern_size __read_mostly; 135unsigned long kern_size __read_mostly;
118unsigned long pfn_base __read_mostly;
119 136
120/* get_new_mmu_context() uses "cache + 1". */ 137/* get_new_mmu_context() uses "cache + 1". */
121DEFINE_SPINLOCK(ctx_alloc_lock); 138DEFINE_SPINLOCK(ctx_alloc_lock);
@@ -141,24 +158,28 @@ unsigned long sparc64_kern_sec_context __read_mostly;
141 158
142int bigkernel = 0; 159int bigkernel = 0;
143 160
144/* XXX Tune this... */ 161kmem_cache_t *pgtable_cache __read_mostly;
145#define PGT_CACHE_LOW 25 162
146#define PGT_CACHE_HIGH 50 163static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
164{
165 clear_page(addr);
166}
167
168extern void tsb_cache_init(void);
147 169
148void check_pgt_cache(void) 170void pgtable_cache_init(void)
149{ 171{
150 preempt_disable(); 172 pgtable_cache = kmem_cache_create("pgtable_cache",
151 if (pgtable_cache_size > PGT_CACHE_HIGH) { 173 PAGE_SIZE, PAGE_SIZE,
152 do { 174 SLAB_HWCACHE_ALIGN |
153 if (pgd_quicklist) 175 SLAB_MUST_HWCACHE_ALIGN,
154 free_pgd_slow(get_pgd_fast()); 176 zero_ctor,
155 if (pte_quicklist[0]) 177 NULL);
156 free_pte_slow(pte_alloc_one_fast(NULL, 0)); 178 if (!pgtable_cache) {
157 if (pte_quicklist[1]) 179 prom_printf("Could not create pgtable_cache\n");
158 free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10))); 180 prom_halt();
159 } while (pgtable_cache_size > PGT_CACHE_LOW);
160 } 181 }
161 preempt_enable(); 182 tsb_cache_init();
162} 183}
163 184
164#ifdef CONFIG_DEBUG_DCFLUSH 185#ifdef CONFIG_DEBUG_DCFLUSH
@@ -168,8 +189,9 @@ atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
168#endif 189#endif
169#endif 190#endif
170 191
171__inline__ void flush_dcache_page_impl(struct page *page) 192inline void flush_dcache_page_impl(struct page *page)
172{ 193{
194 BUG_ON(tlb_type == hypervisor);
173#ifdef CONFIG_DEBUG_DCFLUSH 195#ifdef CONFIG_DEBUG_DCFLUSH
174 atomic_inc(&dcpage_flushes); 196 atomic_inc(&dcpage_flushes);
175#endif 197#endif
@@ -186,8 +208,8 @@ __inline__ void flush_dcache_page_impl(struct page *page)
186} 208}
187 209
188#define PG_dcache_dirty PG_arch_1 210#define PG_dcache_dirty PG_arch_1
189#define PG_dcache_cpu_shift 24 211#define PG_dcache_cpu_shift 24UL
190#define PG_dcache_cpu_mask (256 - 1) 212#define PG_dcache_cpu_mask (256UL - 1UL)
191 213
192#if NR_CPUS > 256 214#if NR_CPUS > 256
193#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus 215#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
@@ -243,32 +265,78 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
243 : "g1", "g7"); 265 : "g1", "g7");
244} 266}
245 267
268static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
269{
270 unsigned long tsb_addr = (unsigned long) ent;
271
272 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
273 tsb_addr = __pa(tsb_addr);
274
275 __tsb_insert(tsb_addr, tag, pte);
276}
277
278unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
279unsigned long _PAGE_SZBITS __read_mostly;
280
246void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) 281void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
247{ 282{
248 struct page *page; 283 struct mm_struct *mm;
249 unsigned long pfn; 284 struct tsb *tsb;
250 unsigned long pg_flags; 285 unsigned long tag, flags;
251 286 unsigned long tsb_index, tsb_hash_shift;
252 pfn = pte_pfn(pte); 287
253 if (pfn_valid(pfn) && 288 if (tlb_type != hypervisor) {
254 (page = pfn_to_page(pfn), page_mapping(page)) && 289 unsigned long pfn = pte_pfn(pte);
255 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { 290 unsigned long pg_flags;
256 int cpu = ((pg_flags >> PG_dcache_cpu_shift) & 291 struct page *page;
257 PG_dcache_cpu_mask); 292
258 int this_cpu = get_cpu(); 293 if (pfn_valid(pfn) &&
259 294 (page = pfn_to_page(pfn), page_mapping(page)) &&
260 /* This is just to optimize away some function calls 295 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
261 * in the SMP case. 296 int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
262 */ 297 PG_dcache_cpu_mask);
263 if (cpu == this_cpu) 298 int this_cpu = get_cpu();
264 flush_dcache_page_impl(page); 299
265 else 300 /* This is just to optimize away some function calls
266 smp_flush_dcache_page_impl(page, cpu); 301 * in the SMP case.
302 */
303 if (cpu == this_cpu)
304 flush_dcache_page_impl(page);
305 else
306 smp_flush_dcache_page_impl(page, cpu);
307
308 clear_dcache_dirty_cpu(page, cpu);
267 309
268 clear_dcache_dirty_cpu(page, cpu); 310 put_cpu();
311 }
312 }
269 313
270 put_cpu(); 314 mm = vma->vm_mm;
315
316 tsb_index = MM_TSB_BASE;
317 tsb_hash_shift = PAGE_SHIFT;
318
319 spin_lock_irqsave(&mm->context.lock, flags);
320
321#ifdef CONFIG_HUGETLB_PAGE
322 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
323 if ((tlb_type == hypervisor &&
324 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
325 (tlb_type != hypervisor &&
326 (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) {
327 tsb_index = MM_TSB_HUGE;
328 tsb_hash_shift = HPAGE_SHIFT;
329 }
271 } 330 }
331#endif
332
333 tsb = mm->context.tsb_block[tsb_index].tsb;
334 tsb += ((address >> tsb_hash_shift) &
335 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
336 tag = (address >> 22UL);
337 tsb_insert(tsb, tag, pte_val(pte));
338
339 spin_unlock_irqrestore(&mm->context.lock, flags);
272} 340}
273 341
274void flush_dcache_page(struct page *page) 342void flush_dcache_page(struct page *page)
@@ -276,6 +344,9 @@ void flush_dcache_page(struct page *page)
276 struct address_space *mapping; 344 struct address_space *mapping;
277 int this_cpu; 345 int this_cpu;
278 346
347 if (tlb_type == hypervisor)
348 return;
349
279 /* Do not bother with the expensive D-cache flush if it 350 /* Do not bother with the expensive D-cache flush if it
280 * is merely the zero page. The 'bigcore' testcase in GDB 351 * is merely the zero page. The 'bigcore' testcase in GDB
281 * causes this case to run millions of times. 352 * causes this case to run millions of times.
@@ -311,7 +382,7 @@ out:
311 382
312void __kprobes flush_icache_range(unsigned long start, unsigned long end) 383void __kprobes flush_icache_range(unsigned long start, unsigned long end)
313{ 384{
314 /* Cheetah has coherent I-cache. */ 385 /* Cheetah and Hypervisor platform cpus have coherent I-cache. */
315 if (tlb_type == spitfire) { 386 if (tlb_type == spitfire) {
316 unsigned long kaddr; 387 unsigned long kaddr;
317 388
@@ -320,16 +391,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
320 } 391 }
321} 392}
322 393
323unsigned long page_to_pfn(struct page *page)
324{
325 return (unsigned long) ((page - mem_map) + pfn_base);
326}
327
328struct page *pfn_to_page(unsigned long pfn)
329{
330 return (mem_map + (pfn - pfn_base));
331}
332
333void show_mem(void) 394void show_mem(void)
334{ 395{
335 printk("Mem-info:\n"); 396 printk("Mem-info:\n");
@@ -338,7 +399,6 @@ void show_mem(void)
338 nr_swap_pages << (PAGE_SHIFT-10)); 399 nr_swap_pages << (PAGE_SHIFT-10));
339 printk("%ld pages of RAM\n", num_physpages); 400 printk("%ld pages of RAM\n", num_physpages);
340 printk("%d free pages\n", nr_free_pages()); 401 printk("%d free pages\n", nr_free_pages());
341 printk("%d pages in page table cache\n",pgtable_cache_size);
342} 402}
343 403
344void mmu_info(struct seq_file *m) 404void mmu_info(struct seq_file *m)
@@ -349,6 +409,8 @@ void mmu_info(struct seq_file *m)
349 seq_printf(m, "MMU Type\t: Cheetah+\n"); 409 seq_printf(m, "MMU Type\t: Cheetah+\n");
350 else if (tlb_type == spitfire) 410 else if (tlb_type == spitfire)
351 seq_printf(m, "MMU Type\t: Spitfire\n"); 411 seq_printf(m, "MMU Type\t: Spitfire\n");
412 else if (tlb_type == hypervisor)
413 seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
352 else 414 else
353 seq_printf(m, "MMU Type\t: ???\n"); 415 seq_printf(m, "MMU Type\t: ???\n");
354 416
@@ -371,45 +433,13 @@ struct linux_prom_translation {
371/* Exported for kernel TLB miss handling in ktlb.S */ 433/* Exported for kernel TLB miss handling in ktlb.S */
372struct linux_prom_translation prom_trans[512] __read_mostly; 434struct linux_prom_translation prom_trans[512] __read_mostly;
373unsigned int prom_trans_ents __read_mostly; 435unsigned int prom_trans_ents __read_mostly;
374unsigned int swapper_pgd_zero __read_mostly;
375
376extern unsigned long prom_boot_page;
377extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
378extern int prom_get_mmu_ihandle(void);
379extern void register_prom_callbacks(void);
380 436
381/* Exported for SMP bootup purposes. */ 437/* Exported for SMP bootup purposes. */
382unsigned long kern_locked_tte_data; 438unsigned long kern_locked_tte_data;
383 439
384/*
385 * Translate PROM's mapping we capture at boot time into physical address.
386 * The second parameter is only set from prom_callback() invocations.
387 */
388unsigned long prom_virt_to_phys(unsigned long promva, int *error)
389{
390 int i;
391
392 for (i = 0; i < prom_trans_ents; i++) {
393 struct linux_prom_translation *p = &prom_trans[i];
394
395 if (promva >= p->virt &&
396 promva < (p->virt + p->size)) {
397 unsigned long base = p->data & _PAGE_PADDR;
398
399 if (error)
400 *error = 0;
401 return base + (promva & (8192 - 1));
402 }
403 }
404 if (error)
405 *error = 1;
406 return 0UL;
407}
408
409/* The obp translations are saved based on 8k pagesize, since obp can 440/* The obp translations are saved based on 8k pagesize, since obp can
410 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> 441 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
411 * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte 442 * HI_OBP_ADDRESS range are handled in ktlb.S.
412 * scheme (also, see rant in inherit_locked_prom_mappings()).
413 */ 443 */
414static inline int in_obp_range(unsigned long vaddr) 444static inline int in_obp_range(unsigned long vaddr)
415{ 445{
@@ -490,6 +520,36 @@ static void __init read_obp_translations(void)
490 } 520 }
491} 521}
492 522
523static void __init hypervisor_tlb_lock(unsigned long vaddr,
524 unsigned long pte,
525 unsigned long mmu)
526{
527 register unsigned long func asm("%o5");
528 register unsigned long arg0 asm("%o0");
529 register unsigned long arg1 asm("%o1");
530 register unsigned long arg2 asm("%o2");
531 register unsigned long arg3 asm("%o3");
532
533 func = HV_FAST_MMU_MAP_PERM_ADDR;
534 arg0 = vaddr;
535 arg1 = 0;
536 arg2 = pte;
537 arg3 = mmu;
538 __asm__ __volatile__("ta 0x80"
539 : "=&r" (func), "=&r" (arg0),
540 "=&r" (arg1), "=&r" (arg2),
541 "=&r" (arg3)
542 : "0" (func), "1" (arg0), "2" (arg1),
543 "3" (arg2), "4" (arg3));
544 if (arg0 != 0) {
545 prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
546 "errors with %lx\n", vaddr, 0, pte, mmu, arg0);
547 prom_halt();
548 }
549}
550
551static unsigned long kern_large_tte(unsigned long paddr);
552
493static void __init remap_kernel(void) 553static void __init remap_kernel(void)
494{ 554{
495 unsigned long phys_page, tte_vaddr, tte_data; 555 unsigned long phys_page, tte_vaddr, tte_data;
@@ -497,25 +557,34 @@ static void __init remap_kernel(void)
497 557
498 tte_vaddr = (unsigned long) KERNBASE; 558 tte_vaddr = (unsigned long) KERNBASE;
499 phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; 559 phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
500 tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB | 560 tte_data = kern_large_tte(phys_page);
501 _PAGE_CP | _PAGE_CV | _PAGE_P |
502 _PAGE_L | _PAGE_W));
503 561
504 kern_locked_tte_data = tte_data; 562 kern_locked_tte_data = tte_data;
505 563
506 /* Now lock us into the TLBs via OBP. */ 564 /* Now lock us into the TLBs via Hypervisor or OBP. */
507 prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); 565 if (tlb_type == hypervisor) {
508 prom_itlb_load(tlb_ent, tte_data, tte_vaddr); 566 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
509 if (bigkernel) { 567 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
510 tlb_ent -= 1; 568 if (bigkernel) {
511 prom_dtlb_load(tlb_ent, 569 tte_vaddr += 0x400000;
512 tte_data + 0x400000, 570 tte_data += 0x400000;
513 tte_vaddr + 0x400000); 571 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
514 prom_itlb_load(tlb_ent, 572 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
515 tte_data + 0x400000, 573 }
516 tte_vaddr + 0x400000); 574 } else {
575 prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
576 prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
577 if (bigkernel) {
578 tlb_ent -= 1;
579 prom_dtlb_load(tlb_ent,
580 tte_data + 0x400000,
581 tte_vaddr + 0x400000);
582 prom_itlb_load(tlb_ent,
583 tte_data + 0x400000,
584 tte_vaddr + 0x400000);
585 }
586 sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
517 } 587 }
518 sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
519 if (tlb_type == cheetah_plus) { 588 if (tlb_type == cheetah_plus) {
520 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | 589 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
521 CTX_CHEETAH_PLUS_NUC); 590 CTX_CHEETAH_PLUS_NUC);
@@ -533,372 +602,14 @@ static void __init inherit_prom_mappings(void)
533 prom_printf("Remapping the kernel... "); 602 prom_printf("Remapping the kernel... ");
534 remap_kernel(); 603 remap_kernel();
535 prom_printf("done.\n"); 604 prom_printf("done.\n");
536
537 prom_printf("Registering callbacks... ");
538 register_prom_callbacks();
539 prom_printf("done.\n");
540} 605}
541 606
542/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
543 * upwards as reserved for use by the firmware (I wonder if this
544 * will be the same on Cheetah...). We use this virtual address
545 * range for the VPTE table mappings of the nucleus so we need
546 * to zap them when we enter the PROM. -DaveM
547 */
548static void __flush_nucleus_vptes(void)
549{
550 unsigned long prom_reserved_base = 0xfffffffc00000000UL;
551 int i;
552
553 /* Only DTLB must be checked for VPTE entries. */
554 if (tlb_type == spitfire) {
555 for (i = 0; i < 63; i++) {
556 unsigned long tag;
557
558 /* Spitfire Errata #32 workaround */
559 /* NOTE: Always runs on spitfire, so no cheetah+
560 * page size encodings.
561 */
562 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
563 "flush %%g6"
564 : /* No outputs */
565 : "r" (0),
566 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
567
568 tag = spitfire_get_dtlb_tag(i);
569 if (((tag & ~(PAGE_MASK)) == 0) &&
570 ((tag & (PAGE_MASK)) >= prom_reserved_base)) {
571 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
572 "membar #Sync"
573 : /* no outputs */
574 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
575 spitfire_put_dtlb_data(i, 0x0UL);
576 }
577 }
578 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
579 for (i = 0; i < 512; i++) {
580 unsigned long tag = cheetah_get_dtlb_tag(i, 2);
581
582 if ((tag & ~PAGE_MASK) == 0 &&
583 (tag & PAGE_MASK) >= prom_reserved_base) {
584 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
585 "membar #Sync"
586 : /* no outputs */
587 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
588 cheetah_put_dtlb_data(i, 0x0UL, 2);
589 }
590
591 if (tlb_type != cheetah_plus)
592 continue;
593
594 tag = cheetah_get_dtlb_tag(i, 3);
595
596 if ((tag & ~PAGE_MASK) == 0 &&
597 (tag & PAGE_MASK) >= prom_reserved_base) {
598 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
599 "membar #Sync"
600 : /* no outputs */
601 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
602 cheetah_put_dtlb_data(i, 0x0UL, 3);
603 }
604 }
605 } else {
606 /* Implement me :-) */
607 BUG();
608 }
609}
610
611static int prom_ditlb_set;
612struct prom_tlb_entry {
613 int tlb_ent;
614 unsigned long tlb_tag;
615 unsigned long tlb_data;
616};
617struct prom_tlb_entry prom_itlb[16], prom_dtlb[16];
618
619void prom_world(int enter) 607void prom_world(int enter)
620{ 608{
621 unsigned long pstate;
622 int i;
623
624 if (!enter) 609 if (!enter)
625 set_fs((mm_segment_t) { get_thread_current_ds() }); 610 set_fs((mm_segment_t) { get_thread_current_ds() });
626 611
627 if (!prom_ditlb_set) 612 __asm__ __volatile__("flushw");
628 return;
629
630 /* Make sure the following runs atomically. */
631 __asm__ __volatile__("flushw\n\t"
632 "rdpr %%pstate, %0\n\t"
633 "wrpr %0, %1, %%pstate"
634 : "=r" (pstate)
635 : "i" (PSTATE_IE));
636
637 if (enter) {
638 /* Kick out nucleus VPTEs. */
639 __flush_nucleus_vptes();
640
641 /* Install PROM world. */
642 for (i = 0; i < 16; i++) {
643 if (prom_dtlb[i].tlb_ent != -1) {
644 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
645 "membar #Sync"
646 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
647 "i" (ASI_DMMU));
648 if (tlb_type == spitfire)
649 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
650 prom_dtlb[i].tlb_data);
651 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
652 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
653 prom_dtlb[i].tlb_data);
654 }
655 if (prom_itlb[i].tlb_ent != -1) {
656 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
657 "membar #Sync"
658 : : "r" (prom_itlb[i].tlb_tag),
659 "r" (TLB_TAG_ACCESS),
660 "i" (ASI_IMMU));
661 if (tlb_type == spitfire)
662 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
663 prom_itlb[i].tlb_data);
664 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
665 cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
666 prom_itlb[i].tlb_data);
667 }
668 }
669 } else {
670 for (i = 0; i < 16; i++) {
671 if (prom_dtlb[i].tlb_ent != -1) {
672 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
673 "membar #Sync"
674 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
675 if (tlb_type == spitfire)
676 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
677 else
678 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
679 }
680 if (prom_itlb[i].tlb_ent != -1) {
681 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
682 "membar #Sync"
683 : : "r" (TLB_TAG_ACCESS),
684 "i" (ASI_IMMU));
685 if (tlb_type == spitfire)
686 spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL);
687 else
688 cheetah_put_litlb_data(prom_itlb[i].tlb_ent, 0x0UL);
689 }
690 }
691 }
692 __asm__ __volatile__("wrpr %0, 0, %%pstate"
693 : : "r" (pstate));
694}
695
696void inherit_locked_prom_mappings(int save_p)
697{
698 int i;
699 int dtlb_seen = 0;
700 int itlb_seen = 0;
701
702 /* Fucking losing PROM has more mappings in the TLB, but
703 * it (conveniently) fails to mention any of these in the
704 * translations property. The only ones that matter are
705 * the locked PROM tlb entries, so we impose the following
706 * irrecovable rule on the PROM, it is allowed 8 locked
707 * entries in the ITLB and 8 in the DTLB.
708 *
709 * Supposedly the upper 16GB of the address space is
710 * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED
711 * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface
712 * used between the client program and the firmware on sun5
713 * systems to coordinate mmu mappings is also COMPLETELY
714 * UNDOCUMENTED!!!!!! Thanks S(t)un!
715 */
716 if (save_p) {
717 for (i = 0; i < 16; i++) {
718 prom_itlb[i].tlb_ent = -1;
719 prom_dtlb[i].tlb_ent = -1;
720 }
721 }
722 if (tlb_type == spitfire) {
723 int high = sparc64_highest_unlocked_tlb_ent;
724 for (i = 0; i <= high; i++) {
725 unsigned long data;
726
727 /* Spitfire Errata #32 workaround */
728 /* NOTE: Always runs on spitfire, so no cheetah+
729 * page size encodings.
730 */
731 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
732 "flush %%g6"
733 : /* No outputs */
734 : "r" (0),
735 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
736
737 data = spitfire_get_dtlb_data(i);
738 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
739 unsigned long tag;
740
741 /* Spitfire Errata #32 workaround */
742 /* NOTE: Always runs on spitfire, so no
743 * cheetah+ page size encodings.
744 */
745 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
746 "flush %%g6"
747 : /* No outputs */
748 : "r" (0),
749 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
750
751 tag = spitfire_get_dtlb_tag(i);
752 if (save_p) {
753 prom_dtlb[dtlb_seen].tlb_ent = i;
754 prom_dtlb[dtlb_seen].tlb_tag = tag;
755 prom_dtlb[dtlb_seen].tlb_data = data;
756 }
757 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
758 "membar #Sync"
759 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
760 spitfire_put_dtlb_data(i, 0x0UL);
761
762 dtlb_seen++;
763 if (dtlb_seen > 15)
764 break;
765 }
766 }
767
768 for (i = 0; i < high; i++) {
769 unsigned long data;
770
771 /* Spitfire Errata #32 workaround */
772 /* NOTE: Always runs on spitfire, so no
773 * cheetah+ page size encodings.
774 */
775 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
776 "flush %%g6"
777 : /* No outputs */
778 : "r" (0),
779 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
780
781 data = spitfire_get_itlb_data(i);
782 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
783 unsigned long tag;
784
785 /* Spitfire Errata #32 workaround */
786 /* NOTE: Always runs on spitfire, so no
787 * cheetah+ page size encodings.
788 */
789 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
790 "flush %%g6"
791 : /* No outputs */
792 : "r" (0),
793 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
794
795 tag = spitfire_get_itlb_tag(i);
796 if (save_p) {
797 prom_itlb[itlb_seen].tlb_ent = i;
798 prom_itlb[itlb_seen].tlb_tag = tag;
799 prom_itlb[itlb_seen].tlb_data = data;
800 }
801 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
802 "membar #Sync"
803 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
804 spitfire_put_itlb_data(i, 0x0UL);
805
806 itlb_seen++;
807 if (itlb_seen > 15)
808 break;
809 }
810 }
811 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
812 int high = sparc64_highest_unlocked_tlb_ent;
813
814 for (i = 0; i <= high; i++) {
815 unsigned long data;
816
817 data = cheetah_get_ldtlb_data(i);
818 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
819 unsigned long tag;
820
821 tag = cheetah_get_ldtlb_tag(i);
822 if (save_p) {
823 prom_dtlb[dtlb_seen].tlb_ent = i;
824 prom_dtlb[dtlb_seen].tlb_tag = tag;
825 prom_dtlb[dtlb_seen].tlb_data = data;
826 }
827 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
828 "membar #Sync"
829 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
830 cheetah_put_ldtlb_data(i, 0x0UL);
831
832 dtlb_seen++;
833 if (dtlb_seen > 15)
834 break;
835 }
836 }
837
838 for (i = 0; i < high; i++) {
839 unsigned long data;
840
841 data = cheetah_get_litlb_data(i);
842 if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
843 unsigned long tag;
844
845 tag = cheetah_get_litlb_tag(i);
846 if (save_p) {
847 prom_itlb[itlb_seen].tlb_ent = i;
848 prom_itlb[itlb_seen].tlb_tag = tag;
849 prom_itlb[itlb_seen].tlb_data = data;
850 }
851 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
852 "membar #Sync"
853 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
854 cheetah_put_litlb_data(i, 0x0UL);
855
856 itlb_seen++;
857 if (itlb_seen > 15)
858 break;
859 }
860 }
861 } else {
862 /* Implement me :-) */
863 BUG();
864 }
865 if (save_p)
866 prom_ditlb_set = 1;
867}
868
869/* Give PROM back his world, done during reboots... */
870void prom_reload_locked(void)
871{
872 int i;
873
874 for (i = 0; i < 16; i++) {
875 if (prom_dtlb[i].tlb_ent != -1) {
876 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
877 "membar #Sync"
878 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
879 "i" (ASI_DMMU));
880 if (tlb_type == spitfire)
881 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
882 prom_dtlb[i].tlb_data);
883 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
884 cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
885 prom_dtlb[i].tlb_data);
886 }
887
888 if (prom_itlb[i].tlb_ent != -1) {
889 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
890 "membar #Sync"
891 : : "r" (prom_itlb[i].tlb_tag),
892 "r" (TLB_TAG_ACCESS),
893 "i" (ASI_IMMU));
894 if (tlb_type == spitfire)
895 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
896 prom_itlb[i].tlb_data);
897 else
898 cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
899 prom_itlb[i].tlb_data);
900 }
901 }
902} 613}
903 614
904#ifdef DCACHE_ALIASING_POSSIBLE 615#ifdef DCACHE_ALIASING_POSSIBLE
@@ -914,7 +625,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
914 if (++n >= 512) 625 if (++n >= 512)
915 break; 626 break;
916 } 627 }
917 } else { 628 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
918 start = __pa(start); 629 start = __pa(start);
919 end = __pa(end); 630 end = __pa(end);
920 for (va = start; va < end; va += 32) 631 for (va = start; va < end; va += 32)
@@ -927,63 +638,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
927} 638}
928#endif /* DCACHE_ALIASING_POSSIBLE */ 639#endif /* DCACHE_ALIASING_POSSIBLE */
929 640
930/* If not locked, zap it. */
931void __flush_tlb_all(void)
932{
933 unsigned long pstate;
934 int i;
935
936 __asm__ __volatile__("flushw\n\t"
937 "rdpr %%pstate, %0\n\t"
938 "wrpr %0, %1, %%pstate"
939 : "=r" (pstate)
940 : "i" (PSTATE_IE));
941 if (tlb_type == spitfire) {
942 for (i = 0; i < 64; i++) {
943 /* Spitfire Errata #32 workaround */
944 /* NOTE: Always runs on spitfire, so no
945 * cheetah+ page size encodings.
946 */
947 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
948 "flush %%g6"
949 : /* No outputs */
950 : "r" (0),
951 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
952
953 if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) {
954 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
955 "membar #Sync"
956 : /* no outputs */
957 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
958 spitfire_put_dtlb_data(i, 0x0UL);
959 }
960
961 /* Spitfire Errata #32 workaround */
962 /* NOTE: Always runs on spitfire, so no
963 * cheetah+ page size encodings.
964 */
965 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
966 "flush %%g6"
967 : /* No outputs */
968 : "r" (0),
969 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
970
971 if (!(spitfire_get_itlb_data(i) & _PAGE_L)) {
972 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
973 "membar #Sync"
974 : /* no outputs */
975 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
976 spitfire_put_itlb_data(i, 0x0UL);
977 }
978 }
979 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
980 cheetah_flush_dtlb_all();
981 cheetah_flush_itlb_all();
982 }
983 __asm__ __volatile__("wrpr %0, 0, %%pstate"
984 : : "r" (pstate));
985}
986
987/* Caller does TLB context flushing on local CPU if necessary. 641/* Caller does TLB context flushing on local CPU if necessary.
988 * The caller also ensures that CTX_VALID(mm->context) is false. 642 * The caller also ensures that CTX_VALID(mm->context) is false.
989 * 643 *
@@ -991,17 +645,21 @@ void __flush_tlb_all(void)
991 * let the user have CTX 0 (nucleus) or we ever use a CTX 645 * let the user have CTX 0 (nucleus) or we ever use a CTX
992 * version of zero (and thus NO_CONTEXT would not be caught 646 * version of zero (and thus NO_CONTEXT would not be caught
993 * by version mis-match tests in mmu_context.h). 647 * by version mis-match tests in mmu_context.h).
648 *
649 * Always invoked with interrupts disabled.
994 */ 650 */
995void get_new_mmu_context(struct mm_struct *mm) 651void get_new_mmu_context(struct mm_struct *mm)
996{ 652{
997 unsigned long ctx, new_ctx; 653 unsigned long ctx, new_ctx;
998 unsigned long orig_pgsz_bits; 654 unsigned long orig_pgsz_bits;
999 655 unsigned long flags;
656 int new_version;
1000 657
1001 spin_lock(&ctx_alloc_lock); 658 spin_lock_irqsave(&ctx_alloc_lock, flags);
1002 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); 659 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
1003 ctx = (tlb_context_cache + 1) & CTX_NR_MASK; 660 ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
1004 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); 661 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
662 new_version = 0;
1005 if (new_ctx >= (1 << CTX_NR_BITS)) { 663 if (new_ctx >= (1 << CTX_NR_BITS)) {
1006 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); 664 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
1007 if (new_ctx >= ctx) { 665 if (new_ctx >= ctx) {
@@ -1024,6 +682,7 @@ void get_new_mmu_context(struct mm_struct *mm)
1024 mmu_context_bmap[i + 2] = 0; 682 mmu_context_bmap[i + 2] = 0;
1025 mmu_context_bmap[i + 3] = 0; 683 mmu_context_bmap[i + 3] = 0;
1026 } 684 }
685 new_version = 1;
1027 goto out; 686 goto out;
1028 } 687 }
1029 } 688 }
@@ -1032,79 +691,10 @@ void get_new_mmu_context(struct mm_struct *mm)
1032out: 691out:
1033 tlb_context_cache = new_ctx; 692 tlb_context_cache = new_ctx;
1034 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; 693 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
1035 spin_unlock(&ctx_alloc_lock); 694 spin_unlock_irqrestore(&ctx_alloc_lock, flags);
1036}
1037
1038#ifndef CONFIG_SMP
1039struct pgtable_cache_struct pgt_quicklists;
1040#endif
1041 695
1042/* OK, we have to color these pages. The page tables are accessed 696 if (unlikely(new_version))
1043 * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S 697 smp_new_mmu_context_version();
1044 * code, as well as by PAGE_OFFSET range direct-mapped addresses by
1045 * other parts of the kernel. By coloring, we make sure that the tlbmiss
1046 * fast handlers do not get data from old/garbage dcache lines that
1047 * correspond to an old/stale virtual address (user/kernel) that
1048 * previously mapped the pagetable page while accessing vpte range
1049 * addresses. The idea is that if the vpte color and PAGE_OFFSET range
1050 * color is the same, then when the kernel initializes the pagetable
1051 * using the later address range, accesses with the first address
1052 * range will see the newly initialized data rather than the garbage.
1053 */
1054#ifdef DCACHE_ALIASING_POSSIBLE
1055#define DC_ALIAS_SHIFT 1
1056#else
1057#define DC_ALIAS_SHIFT 0
1058#endif
1059pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
1060{
1061 struct page *page;
1062 unsigned long color;
1063
1064 {
1065 pte_t *ptep = pte_alloc_one_fast(mm, address);
1066
1067 if (ptep)
1068 return ptep;
1069 }
1070
1071 color = VPTE_COLOR(address);
1072 page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, DC_ALIAS_SHIFT);
1073 if (page) {
1074 unsigned long *to_free;
1075 unsigned long paddr;
1076 pte_t *pte;
1077
1078#ifdef DCACHE_ALIASING_POSSIBLE
1079 set_page_count(page, 1);
1080 ClearPageCompound(page);
1081
1082 set_page_count((page + 1), 1);
1083 ClearPageCompound(page + 1);
1084#endif
1085 paddr = (unsigned long) page_address(page);
1086 memset((char *)paddr, 0, (PAGE_SIZE << DC_ALIAS_SHIFT));
1087
1088 if (!color) {
1089 pte = (pte_t *) paddr;
1090 to_free = (unsigned long *) (paddr + PAGE_SIZE);
1091 } else {
1092 pte = (pte_t *) (paddr + PAGE_SIZE);
1093 to_free = (unsigned long *) paddr;
1094 }
1095
1096#ifdef DCACHE_ALIASING_POSSIBLE
1097 /* Now free the other one up, adjust cache size. */
1098 preempt_disable();
1099 *to_free = (unsigned long) pte_quicklist[color ^ 0x1];
1100 pte_quicklist[color ^ 0x1] = to_free;
1101 pgtable_cache_size++;
1102 preempt_enable();
1103#endif
1104
1105 return pte;
1106 }
1107 return NULL;
1108} 698}
1109 699
1110void sparc_ultra_dump_itlb(void) 700void sparc_ultra_dump_itlb(void)
@@ -1196,9 +786,78 @@ void sparc_ultra_dump_dtlb(void)
1196 786
1197extern unsigned long cmdline_memory_size; 787extern unsigned long cmdline_memory_size;
1198 788
1199unsigned long __init bootmem_init(unsigned long *pages_avail) 789/* Find a free area for the bootmem map, avoiding the kernel image
790 * and the initial ramdisk.
791 */
792static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
793 unsigned long end_pfn)
1200{ 794{
1201 unsigned long bootmap_size, start_pfn, end_pfn; 795 unsigned long avoid_start, avoid_end, bootmap_size;
796 int i;
797
798 bootmap_size = ((end_pfn - start_pfn) + 7) / 8;
799 bootmap_size = ALIGN(bootmap_size, sizeof(long));
800
801 avoid_start = avoid_end = 0;
802#ifdef CONFIG_BLK_DEV_INITRD
803 avoid_start = initrd_start;
804 avoid_end = PAGE_ALIGN(initrd_end);
805#endif
806
807#ifdef CONFIG_DEBUG_BOOTMEM
808 prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n",
809 kern_base, PAGE_ALIGN(kern_base + kern_size),
810 avoid_start, avoid_end);
811#endif
812 for (i = 0; i < pavail_ents; i++) {
813 unsigned long start, end;
814
815 start = pavail[i].phys_addr;
816 end = start + pavail[i].reg_size;
817
818 while (start < end) {
819 if (start >= kern_base &&
820 start < PAGE_ALIGN(kern_base + kern_size)) {
821 start = PAGE_ALIGN(kern_base + kern_size);
822 continue;
823 }
824 if (start >= avoid_start && start < avoid_end) {
825 start = avoid_end;
826 continue;
827 }
828
829 if ((end - start) < bootmap_size)
830 break;
831
832 if (start < kern_base &&
833 (start + bootmap_size) > kern_base) {
834 start = PAGE_ALIGN(kern_base + kern_size);
835 continue;
836 }
837
838 if (start < avoid_start &&
839 (start + bootmap_size) > avoid_start) {
840 start = avoid_end;
841 continue;
842 }
843
844 /* OK, it doesn't overlap anything, use it. */
845#ifdef CONFIG_DEBUG_BOOTMEM
846 prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n",
847 start >> PAGE_SHIFT, start);
848#endif
849 return start >> PAGE_SHIFT;
850 }
851 }
852
853 prom_printf("Cannot find free area for bootmap, aborting.\n");
854 prom_halt();
855}
856
857static unsigned long __init bootmem_init(unsigned long *pages_avail,
858 unsigned long phys_base)
859{
860 unsigned long bootmap_size, end_pfn;
1202 unsigned long end_of_phys_memory = 0UL; 861 unsigned long end_of_phys_memory = 0UL;
1203 unsigned long bootmap_pfn, bytes_avail, size; 862 unsigned long bootmap_pfn, bytes_avail, size;
1204 int i; 863 int i;
@@ -1236,14 +895,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1236 895
1237 *pages_avail = bytes_avail >> PAGE_SHIFT; 896 *pages_avail = bytes_avail >> PAGE_SHIFT;
1238 897
1239 /* Start with page aligned address of last symbol in kernel
1240 * image. The kernel is hard mapped below PAGE_OFFSET in a
1241 * 4MB locked TLB translation.
1242 */
1243 start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT;
1244
1245 bootmap_pfn = start_pfn;
1246
1247 end_pfn = end_of_phys_memory >> PAGE_SHIFT; 898 end_pfn = end_of_phys_memory >> PAGE_SHIFT;
1248 899
1249#ifdef CONFIG_BLK_DEV_INITRD 900#ifdef CONFIG_BLK_DEV_INITRD
@@ -1260,23 +911,22 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1260 "(0x%016lx > 0x%016lx)\ndisabling initrd\n", 911 "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
1261 initrd_end, end_of_phys_memory); 912 initrd_end, end_of_phys_memory);
1262 initrd_start = 0; 913 initrd_start = 0;
1263 } 914 initrd_end = 0;
1264 if (initrd_start) {
1265 if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
1266 initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
1267 bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
1268 } 915 }
1269 } 916 }
1270#endif 917#endif
1271 /* Initialize the boot-time allocator. */ 918 /* Initialize the boot-time allocator. */
1272 max_pfn = max_low_pfn = end_pfn; 919 max_pfn = max_low_pfn = end_pfn;
1273 min_low_pfn = pfn_base; 920 min_low_pfn = (phys_base >> PAGE_SHIFT);
921
922 bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
1274 923
1275#ifdef CONFIG_DEBUG_BOOTMEM 924#ifdef CONFIG_DEBUG_BOOTMEM
1276 prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n", 925 prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n",
1277 min_low_pfn, bootmap_pfn, max_low_pfn); 926 min_low_pfn, bootmap_pfn, max_low_pfn);
1278#endif 927#endif
1279 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn); 928 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
929 min_low_pfn, end_pfn);
1280 930
1281 /* Now register the available physical memory with the 931 /* Now register the available physical memory with the
1282 * allocator. 932 * allocator.
@@ -1324,9 +974,26 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
1324 reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); 974 reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
1325 *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; 975 *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
1326 976
977 for (i = 0; i < pavail_ents; i++) {
978 unsigned long start_pfn, end_pfn;
979
980 start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
981 end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
982#ifdef CONFIG_DEBUG_BOOTMEM
983 prom_printf("memory_present(0, %lx, %lx)\n",
984 start_pfn, end_pfn);
985#endif
986 memory_present(0, start_pfn, end_pfn);
987 }
988
989 sparse_init();
990
1327 return end_pfn; 991 return end_pfn;
1328} 992}
1329 993
994static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
995static int pall_ents __initdata;
996
1330#ifdef CONFIG_DEBUG_PAGEALLOC 997#ifdef CONFIG_DEBUG_PAGEALLOC
1331static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot) 998static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
1332{ 999{
@@ -1382,14 +1049,44 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend,
1382 return alloc_bytes; 1049 return alloc_bytes;
1383} 1050}
1384 1051
1385static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
1386static int pall_ents __initdata;
1387
1388extern unsigned int kvmap_linear_patch[1]; 1052extern unsigned int kvmap_linear_patch[1];
1053#endif /* CONFIG_DEBUG_PAGEALLOC */
1054
1055static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
1056{
1057 const unsigned long shift_256MB = 28;
1058 const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
1059 const unsigned long size_256MB = (1UL << shift_256MB);
1060
1061 while (start < end) {
1062 long remains;
1063
1064 remains = end - start;
1065 if (remains < size_256MB)
1066 break;
1067
1068 if (start & mask_256MB) {
1069 start = (start + size_256MB) & ~mask_256MB;
1070 continue;
1071 }
1072
1073 while (remains >= size_256MB) {
1074 unsigned long index = start >> shift_256MB;
1075
1076 __set_bit(index, kpte_linear_bitmap);
1077
1078 start += size_256MB;
1079 remains -= size_256MB;
1080 }
1081 }
1082}
1389 1083
1390static void __init kernel_physical_mapping_init(void) 1084static void __init kernel_physical_mapping_init(void)
1391{ 1085{
1392 unsigned long i, mem_alloced = 0UL; 1086 unsigned long i;
1087#ifdef CONFIG_DEBUG_PAGEALLOC
1088 unsigned long mem_alloced = 0UL;
1089#endif
1393 1090
1394 read_obp_memory("reg", &pall[0], &pall_ents); 1091 read_obp_memory("reg", &pall[0], &pall_ents);
1395 1092
@@ -1398,10 +1095,16 @@ static void __init kernel_physical_mapping_init(void)
1398 1095
1399 phys_start = pall[i].phys_addr; 1096 phys_start = pall[i].phys_addr;
1400 phys_end = phys_start + pall[i].reg_size; 1097 phys_end = phys_start + pall[i].reg_size;
1098
1099 mark_kpte_bitmap(phys_start, phys_end);
1100
1101#ifdef CONFIG_DEBUG_PAGEALLOC
1401 mem_alloced += kernel_map_range(phys_start, phys_end, 1102 mem_alloced += kernel_map_range(phys_start, phys_end,
1402 PAGE_KERNEL); 1103 PAGE_KERNEL);
1104#endif
1403 } 1105 }
1404 1106
1107#ifdef CONFIG_DEBUG_PAGEALLOC
1405 printk("Allocated %ld bytes for kernel page tables.\n", 1108 printk("Allocated %ld bytes for kernel page tables.\n",
1406 mem_alloced); 1109 mem_alloced);
1407 1110
@@ -1409,8 +1112,10 @@ static void __init kernel_physical_mapping_init(void)
1409 flushi(&kvmap_linear_patch[0]); 1112 flushi(&kvmap_linear_patch[0]);
1410 1113
1411 __flush_tlb_all(); 1114 __flush_tlb_all();
1115#endif
1412} 1116}
1413 1117
1118#ifdef CONFIG_DEBUG_PAGEALLOC
1414void kernel_map_pages(struct page *page, int numpages, int enable) 1119void kernel_map_pages(struct page *page, int numpages, int enable)
1415{ 1120{
1416 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; 1121 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
@@ -1419,6 +1124,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1419 kernel_map_range(phys_start, phys_end, 1124 kernel_map_range(phys_start, phys_end,
1420 (enable ? PAGE_KERNEL : __pgprot(0))); 1125 (enable ? PAGE_KERNEL : __pgprot(0)));
1421 1126
1127 flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
1128 PAGE_OFFSET + phys_end);
1129
1422 /* we should perform an IPI and flush all tlbs, 1130 /* we should perform an IPI and flush all tlbs,
1423 * but that can deadlock->flush only current cpu. 1131 * but that can deadlock->flush only current cpu.
1424 */ 1132 */
@@ -1439,18 +1147,150 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
1439 return ~0UL; 1147 return ~0UL;
1440} 1148}
1441 1149
1150static void __init tsb_phys_patch(void)
1151{
1152 struct tsb_ldquad_phys_patch_entry *pquad;
1153 struct tsb_phys_patch_entry *p;
1154
1155 pquad = &__tsb_ldquad_phys_patch;
1156 while (pquad < &__tsb_ldquad_phys_patch_end) {
1157 unsigned long addr = pquad->addr;
1158
1159 if (tlb_type == hypervisor)
1160 *(unsigned int *) addr = pquad->sun4v_insn;
1161 else
1162 *(unsigned int *) addr = pquad->sun4u_insn;
1163 wmb();
1164 __asm__ __volatile__("flush %0"
1165 : /* no outputs */
1166 : "r" (addr));
1167
1168 pquad++;
1169 }
1170
1171 p = &__tsb_phys_patch;
1172 while (p < &__tsb_phys_patch_end) {
1173 unsigned long addr = p->addr;
1174
1175 *(unsigned int *) addr = p->insn;
1176 wmb();
1177 __asm__ __volatile__("flush %0"
1178 : /* no outputs */
1179 : "r" (addr));
1180
1181 p++;
1182 }
1183}
1184
1185/* Don't mark as init, we give this to the Hypervisor. */
1186static struct hv_tsb_descr ktsb_descr[2];
1187extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
1188
1189static void __init sun4v_ktsb_init(void)
1190{
1191 unsigned long ktsb_pa;
1192
1193 /* First KTSB for PAGE_SIZE mappings. */
1194 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
1195
1196 switch (PAGE_SIZE) {
1197 case 8 * 1024:
1198 default:
1199 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
1200 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
1201 break;
1202
1203 case 64 * 1024:
1204 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
1205 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
1206 break;
1207
1208 case 512 * 1024:
1209 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
1210 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
1211 break;
1212
1213 case 4 * 1024 * 1024:
1214 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
1215 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
1216 break;
1217 };
1218
1219 ktsb_descr[0].assoc = 1;
1220 ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
1221 ktsb_descr[0].ctx_idx = 0;
1222 ktsb_descr[0].tsb_base = ktsb_pa;
1223 ktsb_descr[0].resv = 0;
1224
1225 /* Second KTSB for 4MB/256MB mappings. */
1226 ktsb_pa = (kern_base +
1227 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1228
1229 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
1230 ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
1231 HV_PGSZ_MASK_256MB);
1232 ktsb_descr[1].assoc = 1;
1233 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
1234 ktsb_descr[1].ctx_idx = 0;
1235 ktsb_descr[1].tsb_base = ktsb_pa;
1236 ktsb_descr[1].resv = 0;
1237}
1238
1239void __cpuinit sun4v_ktsb_register(void)
1240{
1241 register unsigned long func asm("%o5");
1242 register unsigned long arg0 asm("%o0");
1243 register unsigned long arg1 asm("%o1");
1244 unsigned long pa;
1245
1246 pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
1247
1248 func = HV_FAST_MMU_TSB_CTX0;
1249 arg0 = 2;
1250 arg1 = pa;
1251 __asm__ __volatile__("ta %6"
1252 : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
1253 : "0" (func), "1" (arg0), "2" (arg1),
1254 "i" (HV_FAST_TRAP));
1255}
1256
1442/* paging_init() sets up the page tables */ 1257/* paging_init() sets up the page tables */
1443 1258
1444extern void cheetah_ecache_flush_init(void); 1259extern void cheetah_ecache_flush_init(void);
1260extern void sun4v_patch_tlb_handlers(void);
1445 1261
1446static unsigned long last_valid_pfn; 1262static unsigned long last_valid_pfn;
1447pgd_t swapper_pg_dir[2048]; 1263pgd_t swapper_pg_dir[2048];
1448 1264
1265static void sun4u_pgprot_init(void);
1266static void sun4v_pgprot_init(void);
1267
1449void __init paging_init(void) 1268void __init paging_init(void)
1450{ 1269{
1451 unsigned long end_pfn, pages_avail, shift; 1270 unsigned long end_pfn, pages_avail, shift, phys_base;
1452 unsigned long real_end, i; 1271 unsigned long real_end, i;
1453 1272
1273 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
1274 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1275
1276 /* Invalidate both kernel TSBs. */
1277 memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
1278 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
1279
1280 if (tlb_type == hypervisor)
1281 sun4v_pgprot_init();
1282 else
1283 sun4u_pgprot_init();
1284
1285 if (tlb_type == cheetah_plus ||
1286 tlb_type == hypervisor)
1287 tsb_phys_patch();
1288
1289 if (tlb_type == hypervisor) {
1290 sun4v_patch_tlb_handlers();
1291 sun4v_ktsb_init();
1292 }
1293
1454 /* Find available physical memory... */ 1294 /* Find available physical memory... */
1455 read_obp_memory("available", &pavail[0], &pavail_ents); 1295 read_obp_memory("available", &pavail[0], &pavail_ents);
1456 1296
@@ -1458,11 +1298,6 @@ void __init paging_init(void)
1458 for (i = 0; i < pavail_ents; i++) 1298 for (i = 0; i < pavail_ents; i++)
1459 phys_base = min(phys_base, pavail[i].phys_addr); 1299 phys_base = min(phys_base, pavail[i].phys_addr);
1460 1300
1461 pfn_base = phys_base >> PAGE_SHIFT;
1462
1463 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
1464 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1465
1466 set_bit(0, mmu_context_bmap); 1301 set_bit(0, mmu_context_bmap);
1467 1302
1468 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); 1303 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
@@ -1486,47 +1321,38 @@ void __init paging_init(void)
1486 pud_set(pud_offset(&swapper_pg_dir[0], 0), 1321 pud_set(pud_offset(&swapper_pg_dir[0], 0),
1487 swapper_low_pmd_dir + (shift / sizeof(pgd_t))); 1322 swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
1488 1323
1489 swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
1490
1491 inherit_prom_mappings(); 1324 inherit_prom_mappings();
1492 1325
1493 /* Ok, we can use our TLB miss and window trap handlers safely. 1326 /* Ok, we can use our TLB miss and window trap handlers safely. */
1494 * We need to do a quick peek here to see if we are on StarFire 1327 setup_tba();
1495 * or not, so setup_tba can setup the IRQ globals correctly (it
1496 * needs to get the hard smp processor id correctly).
1497 */
1498 {
1499 extern void setup_tba(int);
1500 setup_tba(this_is_starfire);
1501 }
1502
1503 inherit_locked_prom_mappings(1);
1504 1328
1505 __flush_tlb_all(); 1329 __flush_tlb_all();
1506 1330
1331 if (tlb_type == hypervisor)
1332 sun4v_ktsb_register();
1333
1507 /* Setup bootmem... */ 1334 /* Setup bootmem... */
1508 pages_avail = 0; 1335 pages_avail = 0;
1509 last_valid_pfn = end_pfn = bootmem_init(&pages_avail); 1336 last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
1337
1338 max_mapnr = last_valid_pfn;
1510 1339
1511#ifdef CONFIG_DEBUG_PAGEALLOC
1512 kernel_physical_mapping_init(); 1340 kernel_physical_mapping_init();
1513#endif
1514 1341
1515 { 1342 {
1516 unsigned long zones_size[MAX_NR_ZONES]; 1343 unsigned long zones_size[MAX_NR_ZONES];
1517 unsigned long zholes_size[MAX_NR_ZONES]; 1344 unsigned long zholes_size[MAX_NR_ZONES];
1518 unsigned long npages;
1519 int znum; 1345 int znum;
1520 1346
1521 for (znum = 0; znum < MAX_NR_ZONES; znum++) 1347 for (znum = 0; znum < MAX_NR_ZONES; znum++)
1522 zones_size[znum] = zholes_size[znum] = 0; 1348 zones_size[znum] = zholes_size[znum] = 0;
1523 1349
1524 npages = end_pfn - pfn_base; 1350 zones_size[ZONE_DMA] = end_pfn;
1525 zones_size[ZONE_DMA] = npages; 1351 zholes_size[ZONE_DMA] = end_pfn - pages_avail;
1526 zholes_size[ZONE_DMA] = npages - pages_avail;
1527 1352
1528 free_area_init_node(0, &contig_page_data, zones_size, 1353 free_area_init_node(0, &contig_page_data, zones_size,
1529 phys_base >> PAGE_SHIFT, zholes_size); 1354 __pa(PAGE_OFFSET) >> PAGE_SHIFT,
1355 zholes_size);
1530 } 1356 }
1531 1357
1532 device_scan(); 1358 device_scan();
@@ -1596,7 +1422,6 @@ void __init mem_init(void)
1596 1422
1597 taint_real_pages(); 1423 taint_real_pages();
1598 1424
1599 max_mapnr = last_valid_pfn - pfn_base;
1600 high_memory = __va(last_valid_pfn << PAGE_SHIFT); 1425 high_memory = __va(last_valid_pfn << PAGE_SHIFT);
1601 1426
1602#ifdef CONFIG_DEBUG_BOOTMEM 1427#ifdef CONFIG_DEBUG_BOOTMEM
@@ -1653,7 +1478,7 @@ void free_initmem(void)
1653 p = virt_to_page(page); 1478 p = virt_to_page(page);
1654 1479
1655 ClearPageReserved(p); 1480 ClearPageReserved(p);
1656 set_page_count(p, 1); 1481 init_page_count(p);
1657 __free_page(p); 1482 __free_page(p);
1658 num_physpages++; 1483 num_physpages++;
1659 totalram_pages++; 1484 totalram_pages++;
@@ -1669,10 +1494,349 @@ void free_initrd_mem(unsigned long start, unsigned long end)
1669 struct page *p = virt_to_page(start); 1494 struct page *p = virt_to_page(start);
1670 1495
1671 ClearPageReserved(p); 1496 ClearPageReserved(p);
1672 set_page_count(p, 1); 1497 init_page_count(p);
1673 __free_page(p); 1498 __free_page(p);
1674 num_physpages++; 1499 num_physpages++;
1675 totalram_pages++; 1500 totalram_pages++;
1676 } 1501 }
1677} 1502}
1678#endif 1503#endif
1504
1505#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
1506#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
1507#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
1508#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
1509#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
1510#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
1511
1512pgprot_t PAGE_KERNEL __read_mostly;
1513EXPORT_SYMBOL(PAGE_KERNEL);
1514
1515pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
1516pgprot_t PAGE_COPY __read_mostly;
1517
1518pgprot_t PAGE_SHARED __read_mostly;
1519EXPORT_SYMBOL(PAGE_SHARED);
1520
1521pgprot_t PAGE_EXEC __read_mostly;
1522unsigned long pg_iobits __read_mostly;
1523
1524unsigned long _PAGE_IE __read_mostly;
1525
1526unsigned long _PAGE_E __read_mostly;
1527EXPORT_SYMBOL(_PAGE_E);
1528
1529unsigned long _PAGE_CACHE __read_mostly;
1530EXPORT_SYMBOL(_PAGE_CACHE);
1531
1532static void prot_init_common(unsigned long page_none,
1533 unsigned long page_shared,
1534 unsigned long page_copy,
1535 unsigned long page_readonly,
1536 unsigned long page_exec_bit)
1537{
1538 PAGE_COPY = __pgprot(page_copy);
1539 PAGE_SHARED = __pgprot(page_shared);
1540
1541 protection_map[0x0] = __pgprot(page_none);
1542 protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
1543 protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
1544 protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
1545 protection_map[0x4] = __pgprot(page_readonly);
1546 protection_map[0x5] = __pgprot(page_readonly);
1547 protection_map[0x6] = __pgprot(page_copy);
1548 protection_map[0x7] = __pgprot(page_copy);
1549 protection_map[0x8] = __pgprot(page_none);
1550 protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
1551 protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
1552 protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
1553 protection_map[0xc] = __pgprot(page_readonly);
1554 protection_map[0xd] = __pgprot(page_readonly);
1555 protection_map[0xe] = __pgprot(page_shared);
1556 protection_map[0xf] = __pgprot(page_shared);
1557}
1558
1559static void __init sun4u_pgprot_init(void)
1560{
1561 unsigned long page_none, page_shared, page_copy, page_readonly;
1562 unsigned long page_exec_bit;
1563
1564 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
1565 _PAGE_CACHE_4U | _PAGE_P_4U |
1566 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
1567 _PAGE_EXEC_4U);
1568 PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
1569 _PAGE_CACHE_4U | _PAGE_P_4U |
1570 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
1571 _PAGE_EXEC_4U | _PAGE_L_4U);
1572 PAGE_EXEC = __pgprot(_PAGE_EXEC_4U);
1573
1574 _PAGE_IE = _PAGE_IE_4U;
1575 _PAGE_E = _PAGE_E_4U;
1576 _PAGE_CACHE = _PAGE_CACHE_4U;
1577
1578 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
1579 __ACCESS_BITS_4U | _PAGE_E_4U);
1580
1581 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
1582 0xfffff80000000000;
1583 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
1584 _PAGE_P_4U | _PAGE_W_4U);
1585
1586 /* XXX Should use 256MB on Panther. XXX */
1587 kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
1588
1589 _PAGE_SZBITS = _PAGE_SZBITS_4U;
1590 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
1591 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
1592 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
1593
1594
1595 page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
1596 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1597 __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
1598 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1599 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
1600 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
1601 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
1602
1603 page_exec_bit = _PAGE_EXEC_4U;
1604
1605 prot_init_common(page_none, page_shared, page_copy, page_readonly,
1606 page_exec_bit);
1607}
1608
1609static void __init sun4v_pgprot_init(void)
1610{
1611 unsigned long page_none, page_shared, page_copy, page_readonly;
1612 unsigned long page_exec_bit;
1613
1614 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
1615 _PAGE_CACHE_4V | _PAGE_P_4V |
1616 __ACCESS_BITS_4V | __DIRTY_BITS_4V |
1617 _PAGE_EXEC_4V);
1618 PAGE_KERNEL_LOCKED = PAGE_KERNEL;
1619 PAGE_EXEC = __pgprot(_PAGE_EXEC_4V);
1620
1621 _PAGE_IE = _PAGE_IE_4V;
1622 _PAGE_E = _PAGE_E_4V;
1623 _PAGE_CACHE = _PAGE_CACHE_4V;
1624
1625 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
1626 0xfffff80000000000;
1627 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1628 _PAGE_P_4V | _PAGE_W_4V);
1629
1630 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
1631 0xfffff80000000000;
1632 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1633 _PAGE_P_4V | _PAGE_W_4V);
1634
1635 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
1636 __ACCESS_BITS_4V | _PAGE_E_4V);
1637
1638 _PAGE_SZBITS = _PAGE_SZBITS_4V;
1639 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
1640 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
1641 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
1642 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
1643
1644 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
1645 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1646 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
1647 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1648 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
1649 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
1650 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
1651
1652 page_exec_bit = _PAGE_EXEC_4V;
1653
1654 prot_init_common(page_none, page_shared, page_copy, page_readonly,
1655 page_exec_bit);
1656}
1657
1658unsigned long pte_sz_bits(unsigned long sz)
1659{
1660 if (tlb_type == hypervisor) {
1661 switch (sz) {
1662 case 8 * 1024:
1663 default:
1664 return _PAGE_SZ8K_4V;
1665 case 64 * 1024:
1666 return _PAGE_SZ64K_4V;
1667 case 512 * 1024:
1668 return _PAGE_SZ512K_4V;
1669 case 4 * 1024 * 1024:
1670 return _PAGE_SZ4MB_4V;
1671 };
1672 } else {
1673 switch (sz) {
1674 case 8 * 1024:
1675 default:
1676 return _PAGE_SZ8K_4U;
1677 case 64 * 1024:
1678 return _PAGE_SZ64K_4U;
1679 case 512 * 1024:
1680 return _PAGE_SZ512K_4U;
1681 case 4 * 1024 * 1024:
1682 return _PAGE_SZ4MB_4U;
1683 };
1684 }
1685}
1686
1687pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
1688{
1689 pte_t pte;
1690
1691 pte_val(pte) = page | pgprot_val(pgprot_noncached(prot));
1692 pte_val(pte) |= (((unsigned long)space) << 32);
1693 pte_val(pte) |= pte_sz_bits(page_size);
1694
1695 return pte;
1696}
1697
1698static unsigned long kern_large_tte(unsigned long paddr)
1699{
1700 unsigned long val;
1701
1702 val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
1703 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
1704 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
1705 if (tlb_type == hypervisor)
1706 val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
1707 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
1708 _PAGE_EXEC_4V | _PAGE_W_4V);
1709
1710 return val | paddr;
1711}
1712
1713/*
1714 * Translate PROM's mapping we capture at boot time into physical address.
1715 * The second parameter is only set from prom_callback() invocations.
1716 */
1717unsigned long prom_virt_to_phys(unsigned long promva, int *error)
1718{
1719 unsigned long mask;
1720 int i;
1721
1722 mask = _PAGE_PADDR_4U;
1723 if (tlb_type == hypervisor)
1724 mask = _PAGE_PADDR_4V;
1725
1726 for (i = 0; i < prom_trans_ents; i++) {
1727 struct linux_prom_translation *p = &prom_trans[i];
1728
1729 if (promva >= p->virt &&
1730 promva < (p->virt + p->size)) {
1731 unsigned long base = p->data & mask;
1732
1733 if (error)
1734 *error = 0;
1735 return base + (promva & (8192 - 1));
1736 }
1737 }
1738 if (error)
1739 *error = 1;
1740 return 0UL;
1741}
1742
1743/* XXX We should kill off this ugly thing at so me point. XXX */
1744unsigned long sun4u_get_pte(unsigned long addr)
1745{
1746 pgd_t *pgdp;
1747 pud_t *pudp;
1748 pmd_t *pmdp;
1749 pte_t *ptep;
1750 unsigned long mask = _PAGE_PADDR_4U;
1751
1752 if (tlb_type == hypervisor)
1753 mask = _PAGE_PADDR_4V;
1754
1755 if (addr >= PAGE_OFFSET)
1756 return addr & mask;
1757
1758 if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS))
1759 return prom_virt_to_phys(addr, NULL);
1760
1761 pgdp = pgd_offset_k(addr);
1762 pudp = pud_offset(pgdp, addr);
1763 pmdp = pmd_offset(pudp, addr);
1764 ptep = pte_offset_kernel(pmdp, addr);
1765
1766 return pte_val(*ptep) & mask;
1767}
1768
1769/* If not locked, zap it. */
1770void __flush_tlb_all(void)
1771{
1772 unsigned long pstate;
1773 int i;
1774
1775 __asm__ __volatile__("flushw\n\t"
1776 "rdpr %%pstate, %0\n\t"
1777 "wrpr %0, %1, %%pstate"
1778 : "=r" (pstate)
1779 : "i" (PSTATE_IE));
1780 if (tlb_type == spitfire) {
1781 for (i = 0; i < 64; i++) {
1782 /* Spitfire Errata #32 workaround */
1783 /* NOTE: Always runs on spitfire, so no
1784 * cheetah+ page size encodings.
1785 */
1786 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
1787 "flush %%g6"
1788 : /* No outputs */
1789 : "r" (0),
1790 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
1791
1792 if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
1793 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
1794 "membar #Sync"
1795 : /* no outputs */
1796 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
1797 spitfire_put_dtlb_data(i, 0x0UL);
1798 }
1799
1800 /* Spitfire Errata #32 workaround */
1801 /* NOTE: Always runs on spitfire, so no
1802 * cheetah+ page size encodings.
1803 */
1804 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
1805 "flush %%g6"
1806 : /* No outputs */
1807 : "r" (0),
1808 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
1809
1810 if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
1811 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
1812 "membar #Sync"
1813 : /* no outputs */
1814 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
1815 spitfire_put_itlb_data(i, 0x0UL);
1816 }
1817 }
1818 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1819 cheetah_flush_dtlb_all();
1820 cheetah_flush_itlb_all();
1821 }
1822 __asm__ __volatile__("wrpr %0, 0, %%pstate"
1823 : : "r" (pstate));
1824}
1825
1826#ifdef CONFIG_MEMORY_HOTPLUG
1827
1828void online_page(struct page *page)
1829{
1830 ClearPageReserved(page);
1831 init_page_count(page);
1832 __free_page(page);
1833 totalram_pages++;
1834 num_physpages++;
1835}
1836
1837int remove_memory(u64 start, u64 size)
1838{
1839 return -EINVAL;
1840}
1841
1842#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662b..a079cf42505e 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
25 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); 25 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
26 26
27 if (mp->tlb_nr) { 27 if (mp->tlb_nr) {
28 flush_tsb_user(mp);
29
28 if (CTX_VALID(mp->mm->context)) { 30 if (CTX_VALID(mp->mm->context)) {
29#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
30 smp_flush_tlb_pending(mp->mm, mp->tlb_nr, 32 smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -47,7 +49,8 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
47 if (pte_exec(orig)) 49 if (pte_exec(orig))
48 vaddr |= 0x1UL; 50 vaddr |= 0x1UL;
49 51
50 if (pte_dirty(orig)) { 52 if (tlb_type != hypervisor &&
53 pte_dirty(orig)) {
51 unsigned long paddr, pfn = pte_pfn(orig); 54 unsigned long paddr, pfn = pte_pfn(orig);
52 struct address_space *mapping; 55 struct address_space *mapping;
53 struct page *page; 56 struct page *page;
@@ -89,62 +92,3 @@ no_cache_flush:
89 if (nr >= TLB_BATCH_NR) 92 if (nr >= TLB_BATCH_NR)
90 flush_tlb_pending(); 93 flush_tlb_pending();
91} 94}
92
93void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
94{
95 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
96 unsigned long nr = mp->tlb_nr;
97 long s = start, e = end, vpte_base;
98
99 if (mp->fullmm)
100 return;
101
102 /* If start is greater than end, that is a real problem. */
103 BUG_ON(start > end);
104
105 /* However, straddling the VA space hole is quite normal. */
106 s &= PMD_MASK;
107 e = (e + PMD_SIZE - 1) & PMD_MASK;
108
109 vpte_base = (tlb_type == spitfire ?
110 VPTE_BASE_SPITFIRE :
111 VPTE_BASE_CHEETAH);
112
113 if (unlikely(nr != 0 && mm != mp->mm)) {
114 flush_tlb_pending();
115 nr = 0;
116 }
117
118 if (nr == 0)
119 mp->mm = mm;
120
121 start = vpte_base + (s >> (PAGE_SHIFT - 3));
122 end = vpte_base + (e >> (PAGE_SHIFT - 3));
123
124 /* If the request straddles the VA space hole, we
125 * need to swap start and end. The reason this
126 * occurs is that "vpte_base" is the center of
127 * the linear page table mapping area. Thus,
128 * high addresses with the sign bit set map to
129 * addresses below vpte_base and non-sign bit
130 * addresses map to addresses above vpte_base.
131 */
132 if (end < start) {
133 unsigned long tmp = start;
134
135 start = end;
136 end = tmp;
137 }
138
139 while (start < end) {
140 mp->vaddrs[nr] = start;
141 mp->tlb_nr = ++nr;
142 if (nr >= TLB_BATCH_NR) {
143 flush_tlb_pending();
144 nr = 0;
145 }
146 start += PAGE_SIZE;
147 }
148 if (nr)
149 flush_tlb_pending();
150}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..beaa02810f0e
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,500 @@
1/* arch/sparc64/mm/tsb.c
2 *
3 * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <asm/system.h>
8#include <asm/page.h>
9#include <asm/tlbflush.h>
10#include <asm/tlb.h>
11#include <asm/mmu_context.h>
12#include <asm/pgtable.h>
13#include <asm/tsb.h>
14#include <asm/oplib.h>
15
16extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
17
18static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
19{
20 vaddr >>= hash_shift;
21 return vaddr & (nentries - 1);
22}
23
24static inline int tag_compare(unsigned long tag, unsigned long vaddr)
25{
26 return (tag == (vaddr >> 22));
27}
28
29/* TSB flushes need only occur on the processor initiating the address
30 * space modification, not on each cpu the address space has run on.
31 * Only the TLB flush needs that treatment.
32 */
33
34void flush_tsb_kernel_range(unsigned long start, unsigned long end)
35{
36 unsigned long v;
37
38 for (v = start; v < end; v += PAGE_SIZE) {
39 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
40 KERNEL_TSB_NENTRIES);
41 struct tsb *ent = &swapper_tsb[hash];
42
43 if (tag_compare(ent->tag, v)) {
44 ent->tag = (1UL << TSB_TAG_INVALID_BIT);
45 membar_storeload_storestore();
46 }
47 }
48}
49
50static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
51{
52 unsigned long i;
53
54 for (i = 0; i < mp->tlb_nr; i++) {
55 unsigned long v = mp->vaddrs[i];
56 unsigned long tag, ent, hash;
57
58 v &= ~0x1UL;
59
60 hash = tsb_hash(v, hash_shift, nentries);
61 ent = tsb + (hash * sizeof(struct tsb));
62 tag = (v >> 22UL);
63
64 tsb_flush(ent, tag);
65 }
66}
67
68void flush_tsb_user(struct mmu_gather *mp)
69{
70 struct mm_struct *mm = mp->mm;
71 unsigned long nentries, base, flags;
72
73 spin_lock_irqsave(&mm->context.lock, flags);
74
75 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
76 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
77 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
78 base = __pa(base);
79 __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
80
81#ifdef CONFIG_HUGETLB_PAGE
82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
85 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
86 base = __pa(base);
87 __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
88 }
89#endif
90 spin_unlock_irqrestore(&mm->context.lock, flags);
91}
92
93#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
94#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
95#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
96#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
97#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
98#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
99#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
100#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_512K
101#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_512K
102#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
103#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_4MB
104#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_4MB
105#else
106#error Broken base page size setting...
107#endif
108
109#ifdef CONFIG_HUGETLB_PAGE
110#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
111#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
112#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
113#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
114#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
115#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
116#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
117#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
118#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
119#else
120#error Broken huge page size setting...
121#endif
122#endif
123
124static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
125{
126 unsigned long tsb_reg, base, tsb_paddr;
127 unsigned long page_sz, tte;
128
129 mm->context.tsb_block[tsb_idx].tsb_nentries =
130 tsb_bytes / sizeof(struct tsb);
131
132 base = TSBMAP_BASE;
133 tte = pgprot_val(PAGE_KERNEL_LOCKED);
134 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
135 BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
136
137 /* Use the smallest page size that can map the whole TSB
138 * in one TLB entry.
139 */
140 switch (tsb_bytes) {
141 case 8192 << 0:
142 tsb_reg = 0x0UL;
143#ifdef DCACHE_ALIASING_POSSIBLE
144 base += (tsb_paddr & 8192);
145#endif
146 page_sz = 8192;
147 break;
148
149 case 8192 << 1:
150 tsb_reg = 0x1UL;
151 page_sz = 64 * 1024;
152 break;
153
154 case 8192 << 2:
155 tsb_reg = 0x2UL;
156 page_sz = 64 * 1024;
157 break;
158
159 case 8192 << 3:
160 tsb_reg = 0x3UL;
161 page_sz = 64 * 1024;
162 break;
163
164 case 8192 << 4:
165 tsb_reg = 0x4UL;
166 page_sz = 512 * 1024;
167 break;
168
169 case 8192 << 5:
170 tsb_reg = 0x5UL;
171 page_sz = 512 * 1024;
172 break;
173
174 case 8192 << 6:
175 tsb_reg = 0x6UL;
176 page_sz = 512 * 1024;
177 break;
178
179 case 8192 << 7:
180 tsb_reg = 0x7UL;
181 page_sz = 4 * 1024 * 1024;
182 break;
183
184 default:
185 BUG();
186 };
187 tte |= pte_sz_bits(page_sz);
188
189 if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
190 /* Physical mapping, no locked TLB entry for TSB. */
191 tsb_reg |= tsb_paddr;
192
193 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
194 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
195 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
196 } else {
197 tsb_reg |= base;
198 tsb_reg |= (tsb_paddr & (page_sz - 1UL));
199 tte |= (tsb_paddr & ~(page_sz - 1UL));
200
201 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
202 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
203 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
204 }
205
206 /* Setup the Hypervisor TSB descriptor. */
207 if (tlb_type == hypervisor) {
208 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
209
210 switch (tsb_idx) {
211 case MM_TSB_BASE:
212 hp->pgsz_idx = HV_PGSZ_IDX_BASE;
213 break;
214#ifdef CONFIG_HUGETLB_PAGE
215 case MM_TSB_HUGE:
216 hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
217 break;
218#endif
219 default:
220 BUG();
221 };
222 hp->assoc = 1;
223 hp->num_ttes = tsb_bytes / 16;
224 hp->ctx_idx = 0;
225 switch (tsb_idx) {
226 case MM_TSB_BASE:
227 hp->pgsz_mask = HV_PGSZ_MASK_BASE;
228 break;
229#ifdef CONFIG_HUGETLB_PAGE
230 case MM_TSB_HUGE:
231 hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
232 break;
233#endif
234 default:
235 BUG();
236 };
237 hp->tsb_base = tsb_paddr;
238 hp->resv = 0;
239 }
240}
241
242static kmem_cache_t *tsb_caches[8] __read_mostly;
243
244static const char *tsb_cache_names[8] = {
245 "tsb_8KB",
246 "tsb_16KB",
247 "tsb_32KB",
248 "tsb_64KB",
249 "tsb_128KB",
250 "tsb_256KB",
251 "tsb_512KB",
252 "tsb_1MB",
253};
254
255void __init tsb_cache_init(void)
256{
257 unsigned long i;
258
259 for (i = 0; i < 8; i++) {
260 unsigned long size = 8192 << i;
261 const char *name = tsb_cache_names[i];
262
263 tsb_caches[i] = kmem_cache_create(name,
264 size, size,
265 SLAB_HWCACHE_ALIGN |
266 SLAB_MUST_HWCACHE_ALIGN,
267 NULL, NULL);
268 if (!tsb_caches[i]) {
269 prom_printf("Could not create %s cache\n", name);
270 prom_halt();
271 }
272 }
273}
274
275/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
276 * do_sparc64_fault() invokes this routine to try and grow it.
277 *
278 * When we reach the maximum TSB size supported, we stick ~0UL into
279 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
280 * will not trigger any longer.
281 *
282 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
283 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
284 * must be 512K aligned. It also must be physically contiguous, so we
285 * cannot use vmalloc().
286 *
287 * The idea here is to grow the TSB when the RSS of the process approaches
288 * the number of entries that the current TSB can hold at once. Currently,
289 * we trigger when the RSS hits 3/4 of the TSB capacity.
290 */
291void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
292{
293 unsigned long max_tsb_size = 1 * 1024 * 1024;
294 unsigned long new_size, old_size, flags;
295 struct tsb *old_tsb, *new_tsb;
296 unsigned long new_cache_index, old_cache_index;
297 unsigned long new_rss_limit;
298 gfp_t gfp_flags;
299
300 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
301 max_tsb_size = (PAGE_SIZE << MAX_ORDER);
302
303 new_cache_index = 0;
304 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
305 unsigned long n_entries = new_size / sizeof(struct tsb);
306
307 n_entries = (n_entries * 3) / 4;
308 if (n_entries > rss)
309 break;
310
311 new_cache_index++;
312 }
313
314 if (new_size == max_tsb_size)
315 new_rss_limit = ~0UL;
316 else
317 new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4;
318
319retry_tsb_alloc:
320 gfp_flags = GFP_KERNEL;
321 if (new_size > (PAGE_SIZE * 2))
322 gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
323
324 new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
325 if (unlikely(!new_tsb)) {
326 /* Not being able to fork due to a high-order TSB
327 * allocation failure is very bad behavior. Just back
328 * down to a 0-order allocation and force no TSB
329 * growing for this address space.
330 */
331 if (mm->context.tsb_block[tsb_index].tsb == NULL &&
332 new_cache_index > 0) {
333 new_cache_index = 0;
334 new_size = 8192;
335 new_rss_limit = ~0UL;
336 goto retry_tsb_alloc;
337 }
338
339 /* If we failed on a TSB grow, we are under serious
340 * memory pressure so don't try to grow any more.
341 */
342 if (mm->context.tsb_block[tsb_index].tsb != NULL)
343 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
344 return;
345 }
346
347 /* Mark all tags as invalid. */
348 tsb_init(new_tsb, new_size);
349
350 /* Ok, we are about to commit the changes. If we are
351 * growing an existing TSB the locking is very tricky,
352 * so WATCH OUT!
353 *
354 * We have to hold mm->context.lock while committing to the
355 * new TSB, this synchronizes us with processors in
356 * flush_tsb_user() and switch_mm() for this address space.
357 *
358 * But even with that lock held, processors run asynchronously
359 * accessing the old TSB via TLB miss handling. This is OK
360 * because those actions are just propagating state from the
361 * Linux page tables into the TSB, page table mappings are not
362 * being changed. If a real fault occurs, the processor will
363 * synchronize with us when it hits flush_tsb_user(), this is
364 * also true for the case where vmscan is modifying the page
365 * tables. The only thing we need to be careful with is to
366 * skip any locked TSB entries during copy_tsb().
367 *
368 * When we finish committing to the new TSB, we have to drop
369 * the lock and ask all other cpus running this address space
370 * to run tsb_context_switch() to see the new TSB table.
371 */
372 spin_lock_irqsave(&mm->context.lock, flags);
373
374 old_tsb = mm->context.tsb_block[tsb_index].tsb;
375 old_cache_index =
376 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
377 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
378 sizeof(struct tsb));
379
380
381 /* Handle multiple threads trying to grow the TSB at the same time.
382 * One will get in here first, and bump the size and the RSS limit.
383 * The others will get in here next and hit this check.
384 */
385 if (unlikely(old_tsb &&
386 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
387 spin_unlock_irqrestore(&mm->context.lock, flags);
388
389 kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
390 return;
391 }
392
393 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
394
395 if (old_tsb) {
396 extern void copy_tsb(unsigned long old_tsb_base,
397 unsigned long old_tsb_size,
398 unsigned long new_tsb_base,
399 unsigned long new_tsb_size);
400 unsigned long old_tsb_base = (unsigned long) old_tsb;
401 unsigned long new_tsb_base = (unsigned long) new_tsb;
402
403 if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
404 old_tsb_base = __pa(old_tsb_base);
405 new_tsb_base = __pa(new_tsb_base);
406 }
407 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
408 }
409
410 mm->context.tsb_block[tsb_index].tsb = new_tsb;
411 setup_tsb_params(mm, tsb_index, new_size);
412
413 spin_unlock_irqrestore(&mm->context.lock, flags);
414
415 /* If old_tsb is NULL, we're being invoked for the first time
416 * from init_new_context().
417 */
418 if (old_tsb) {
419 /* Reload it on the local cpu. */
420 tsb_context_switch(mm);
421
422 /* Now force other processors to do the same. */
423 smp_tsb_sync(mm);
424
425 /* Now it is safe to free the old tsb. */
426 kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
427 }
428}
429
430int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
431{
432#ifdef CONFIG_HUGETLB_PAGE
433 unsigned long huge_pte_count;
434#endif
435 unsigned int i;
436
437 spin_lock_init(&mm->context.lock);
438
439 mm->context.sparc64_ctx_val = 0UL;
440
441#ifdef CONFIG_HUGETLB_PAGE
442 /* We reset it to zero because the fork() page copying
443 * will re-increment the counters as the parent PTEs are
444 * copied into the child address space.
445 */
446 huge_pte_count = mm->context.huge_pte_count;
447 mm->context.huge_pte_count = 0;
448#endif
449
450 /* copy_mm() copies over the parent's mm_struct before calling
451 * us, so we need to zero out the TSB pointer or else tsb_grow()
452 * will be confused and think there is an older TSB to free up.
453 */
454 for (i = 0; i < MM_NUM_TSBS; i++)
455 mm->context.tsb_block[i].tsb = NULL;
456
457 /* If this is fork, inherit the parent's TSB size. We would
458 * grow it to that size on the first page fault anyways.
459 */
460 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
461
462#ifdef CONFIG_HUGETLB_PAGE
463 if (unlikely(huge_pte_count))
464 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
465#endif
466
467 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
468 return -ENOMEM;
469
470 return 0;
471}
472
473static void tsb_destroy_one(struct tsb_config *tp)
474{
475 unsigned long cache_index;
476
477 if (!tp->tsb)
478 return;
479 cache_index = tp->tsb_reg_val & 0x7UL;
480 kmem_cache_free(tsb_caches[cache_index], tp->tsb);
481 tp->tsb = NULL;
482 tp->tsb_reg_val = 0UL;
483}
484
485void destroy_context(struct mm_struct *mm)
486{
487 unsigned long flags, i;
488
489 for (i = 0; i < MM_NUM_TSBS; i++)
490 tsb_destroy_one(&mm->context.tsb_block[i]);
491
492 spin_lock_irqsave(&ctx_alloc_lock, flags);
493
494 if (CTX_VALID(mm->context)) {
495 unsigned long nr = CTX_NRBITS(mm->context);
496 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
497 }
498
499 spin_unlock_irqrestore(&ctx_alloc_lock, flags);
500}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa116..f8479fad4047 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -15,6 +15,7 @@
15#include <asm/head.h> 15#include <asm/head.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/hypervisor.h>
18 19
19 /* Basically, most of the Spitfire vs. Cheetah madness 20 /* Basically, most of the Spitfire vs. Cheetah madness
20 * has to do with the fact that Cheetah does not support 21 * has to do with the fact that Cheetah does not support
@@ -29,16 +30,18 @@
29 .text 30 .text
30 .align 32 31 .align 32
31 .globl __flush_tlb_mm 32 .globl __flush_tlb_mm
32__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ 33__flush_tlb_mm: /* 18 insns */
34 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
33 ldxa [%o1] ASI_DMMU, %g2 35 ldxa [%o1] ASI_DMMU, %g2
34 cmp %g2, %o0 36 cmp %g2, %o0
35 bne,pn %icc, __spitfire_flush_tlb_mm_slow 37 bne,pn %icc, __spitfire_flush_tlb_mm_slow
36 mov 0x50, %g3 38 mov 0x50, %g3
37 stxa %g0, [%g3] ASI_DMMU_DEMAP 39 stxa %g0, [%g3] ASI_DMMU_DEMAP
38 stxa %g0, [%g3] ASI_IMMU_DEMAP 40 stxa %g0, [%g3] ASI_IMMU_DEMAP
41 sethi %hi(KERNBASE), %g3
42 flush %g3
39 retl 43 retl
40 flush %g6 44 nop
41 nop
42 nop 45 nop
43 nop 46 nop
44 nop 47 nop
@@ -51,7 +54,7 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
51 54
52 .align 32 55 .align 32
53 .globl __flush_tlb_pending 56 .globl __flush_tlb_pending
54__flush_tlb_pending: 57__flush_tlb_pending: /* 26 insns */
55 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 58 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
56 rdpr %pstate, %g7 59 rdpr %pstate, %g7
57 sllx %o1, 3, %o1 60 sllx %o1, 3, %o1
@@ -72,7 +75,8 @@ __flush_tlb_pending:
72 brnz,pt %o1, 1b 75 brnz,pt %o1, 1b
73 nop 76 nop
74 stxa %g2, [%o4] ASI_DMMU 77 stxa %g2, [%o4] ASI_DMMU
75 flush %g6 78 sethi %hi(KERNBASE), %o4
79 flush %o4
76 retl 80 retl
77 wrpr %g7, 0x0, %pstate 81 wrpr %g7, 0x0, %pstate
78 nop 82 nop
@@ -82,7 +86,8 @@ __flush_tlb_pending:
82 86
83 .align 32 87 .align 32
84 .globl __flush_tlb_kernel_range 88 .globl __flush_tlb_kernel_range
85__flush_tlb_kernel_range: /* %o0=start, %o1=end */ 89__flush_tlb_kernel_range: /* 16 insns */
90 /* %o0=start, %o1=end */
86 cmp %o0, %o1 91 cmp %o0, %o1
87 be,pn %xcc, 2f 92 be,pn %xcc, 2f
88 sethi %hi(PAGE_SIZE), %o4 93 sethi %hi(PAGE_SIZE), %o4
@@ -94,8 +99,11 @@ __flush_tlb_kernel_range: /* %o0=start, %o1=end */
94 membar #Sync 99 membar #Sync
95 brnz,pt %o3, 1b 100 brnz,pt %o3, 1b
96 sub %o3, %o4, %o3 101 sub %o3, %o4, %o3
972: retl 1022: sethi %hi(KERNBASE), %o3
98 flush %g6 103 flush %o3
104 retl
105 nop
106 nop
99 107
100__spitfire_flush_tlb_mm_slow: 108__spitfire_flush_tlb_mm_slow:
101 rdpr %pstate, %g1 109 rdpr %pstate, %g1
@@ -105,7 +113,8 @@ __spitfire_flush_tlb_mm_slow:
105 stxa %g0, [%g3] ASI_IMMU_DEMAP 113 stxa %g0, [%g3] ASI_IMMU_DEMAP
106 flush %g6 114 flush %g6
107 stxa %g2, [%o1] ASI_DMMU 115 stxa %g2, [%o1] ASI_DMMU
108 flush %g6 116 sethi %hi(KERNBASE), %o1
117 flush %o1
109 retl 118 retl
110 wrpr %g1, 0, %pstate 119 wrpr %g1, 0, %pstate
111 120
@@ -181,7 +190,7 @@ __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
181 .previous 190 .previous
182 191
183 /* Cheetah specific versions, patched at boot time. */ 192 /* Cheetah specific versions, patched at boot time. */
184__cheetah_flush_tlb_mm: /* 18 insns */ 193__cheetah_flush_tlb_mm: /* 19 insns */
185 rdpr %pstate, %g7 194 rdpr %pstate, %g7
186 andn %g7, PSTATE_IE, %g2 195 andn %g7, PSTATE_IE, %g2
187 wrpr %g2, 0x0, %pstate 196 wrpr %g2, 0x0, %pstate
@@ -196,12 +205,13 @@ __cheetah_flush_tlb_mm: /* 18 insns */
196 stxa %g0, [%g3] ASI_DMMU_DEMAP 205 stxa %g0, [%g3] ASI_DMMU_DEMAP
197 stxa %g0, [%g3] ASI_IMMU_DEMAP 206 stxa %g0, [%g3] ASI_IMMU_DEMAP
198 stxa %g2, [%o2] ASI_DMMU 207 stxa %g2, [%o2] ASI_DMMU
199 flush %g6 208 sethi %hi(KERNBASE), %o2
209 flush %o2
200 wrpr %g0, 0, %tl 210 wrpr %g0, 0, %tl
201 retl 211 retl
202 wrpr %g7, 0x0, %pstate 212 wrpr %g7, 0x0, %pstate
203 213
204__cheetah_flush_tlb_pending: /* 26 insns */ 214__cheetah_flush_tlb_pending: /* 27 insns */
205 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 215 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
206 rdpr %pstate, %g7 216 rdpr %pstate, %g7
207 sllx %o1, 3, %o1 217 sllx %o1, 3, %o1
@@ -225,7 +235,8 @@ __cheetah_flush_tlb_pending: /* 26 insns */
225 brnz,pt %o1, 1b 235 brnz,pt %o1, 1b
226 nop 236 nop
227 stxa %g2, [%o4] ASI_DMMU 237 stxa %g2, [%o4] ASI_DMMU
228 flush %g6 238 sethi %hi(KERNBASE), %o4
239 flush %o4
229 wrpr %g0, 0, %tl 240 wrpr %g0, 0, %tl
230 retl 241 retl
231 wrpr %g7, 0x0, %pstate 242 wrpr %g7, 0x0, %pstate
@@ -245,7 +256,76 @@ __cheetah_flush_dcache_page: /* 11 insns */
245 nop 256 nop
246#endif /* DCACHE_ALIASING_POSSIBLE */ 257#endif /* DCACHE_ALIASING_POSSIBLE */
247 258
248cheetah_patch_one: 259 /* Hypervisor specific versions, patched at boot time. */
260__hypervisor_tlb_tl0_error:
261 save %sp, -192, %sp
262 mov %i0, %o0
263 call hypervisor_tlbop_error
264 mov %i1, %o1
265 ret
266 restore
267
268__hypervisor_flush_tlb_mm: /* 10 insns */
269 mov %o0, %o2 /* ARG2: mmu context */
270 mov 0, %o0 /* ARG0: CPU lists unimplemented */
271 mov 0, %o1 /* ARG1: CPU lists unimplemented */
272 mov HV_MMU_ALL, %o3 /* ARG3: flags */
273 mov HV_FAST_MMU_DEMAP_CTX, %o5
274 ta HV_FAST_TRAP
275 brnz,pn %o0, __hypervisor_tlb_tl0_error
276 mov HV_FAST_MMU_DEMAP_CTX, %o1
277 retl
278 nop
279
280__hypervisor_flush_tlb_pending: /* 16 insns */
281 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
282 sllx %o1, 3, %g1
283 mov %o2, %g2
284 mov %o0, %g3
2851: sub %g1, (1 << 3), %g1
286 ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */
287 mov %g3, %o1 /* ARG1: mmu context */
288 mov HV_MMU_ALL, %o2 /* ARG2: flags */
289 srlx %o0, PAGE_SHIFT, %o0
290 sllx %o0, PAGE_SHIFT, %o0
291 ta HV_MMU_UNMAP_ADDR_TRAP
292 brnz,pn %o0, __hypervisor_tlb_tl0_error
293 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
294 brnz,pt %g1, 1b
295 nop
296 retl
297 nop
298
299__hypervisor_flush_tlb_kernel_range: /* 16 insns */
300 /* %o0=start, %o1=end */
301 cmp %o0, %o1
302 be,pn %xcc, 2f
303 sethi %hi(PAGE_SIZE), %g3
304 mov %o0, %g1
305 sub %o1, %g1, %g2
306 sub %g2, %g3, %g2
3071: add %g1, %g2, %o0 /* ARG0: virtual address */
308 mov 0, %o1 /* ARG1: mmu context */
309 mov HV_MMU_ALL, %o2 /* ARG2: flags */
310 ta HV_MMU_UNMAP_ADDR_TRAP
311 brnz,pn %o0, __hypervisor_tlb_tl0_error
312 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
313 brnz,pt %g2, 1b
314 sub %g2, %g3, %g2
3152: retl
316 nop
317
318#ifdef DCACHE_ALIASING_POSSIBLE
319 /* XXX Niagara and friends have an 8K cache, so no aliasing is
320 * XXX possible, but nothing explicit in the Hypervisor API
321 * XXX guarantees this.
322 */
323__hypervisor_flush_dcache_page: /* 2 insns */
324 retl
325 nop
326#endif
327
328tlb_patch_one:
2491: lduw [%o1], %g1 3291: lduw [%o1], %g1
250 stw %g1, [%o0] 330 stw %g1, [%o0]
251 flush %o0 331 flush %o0
@@ -264,22 +344,22 @@ cheetah_patch_cachetlbops:
264 or %o0, %lo(__flush_tlb_mm), %o0 344 or %o0, %lo(__flush_tlb_mm), %o0
265 sethi %hi(__cheetah_flush_tlb_mm), %o1 345 sethi %hi(__cheetah_flush_tlb_mm), %o1
266 or %o1, %lo(__cheetah_flush_tlb_mm), %o1 346 or %o1, %lo(__cheetah_flush_tlb_mm), %o1
267 call cheetah_patch_one 347 call tlb_patch_one
268 mov 18, %o2 348 mov 19, %o2
269 349
270 sethi %hi(__flush_tlb_pending), %o0 350 sethi %hi(__flush_tlb_pending), %o0
271 or %o0, %lo(__flush_tlb_pending), %o0 351 or %o0, %lo(__flush_tlb_pending), %o0
272 sethi %hi(__cheetah_flush_tlb_pending), %o1 352 sethi %hi(__cheetah_flush_tlb_pending), %o1
273 or %o1, %lo(__cheetah_flush_tlb_pending), %o1 353 or %o1, %lo(__cheetah_flush_tlb_pending), %o1
274 call cheetah_patch_one 354 call tlb_patch_one
275 mov 26, %o2 355 mov 27, %o2
276 356
277#ifdef DCACHE_ALIASING_POSSIBLE 357#ifdef DCACHE_ALIASING_POSSIBLE
278 sethi %hi(__flush_dcache_page), %o0 358 sethi %hi(__flush_dcache_page), %o0
279 or %o0, %lo(__flush_dcache_page), %o0 359 or %o0, %lo(__flush_dcache_page), %o0
280 sethi %hi(__cheetah_flush_dcache_page), %o1 360 sethi %hi(__cheetah_flush_dcache_page), %o1
281 or %o1, %lo(__cheetah_flush_dcache_page), %o1 361 or %o1, %lo(__cheetah_flush_dcache_page), %o1
282 call cheetah_patch_one 362 call tlb_patch_one
283 mov 11, %o2 363 mov 11, %o2
284#endif /* DCACHE_ALIASING_POSSIBLE */ 364#endif /* DCACHE_ALIASING_POSSIBLE */
285 365
@@ -295,16 +375,14 @@ cheetah_patch_cachetlbops:
295 * %g1 address arg 1 (tlb page and range flushes) 375 * %g1 address arg 1 (tlb page and range flushes)
296 * %g7 address arg 2 (tlb range flush only) 376 * %g7 address arg 2 (tlb range flush only)
297 * 377 *
298 * %g6 ivector table, don't touch 378 * %g6 scratch 1
299 * %g2 scratch 1 379 * %g2 scratch 2
300 * %g3 scratch 2 380 * %g3 scratch 3
301 * %g4 scratch 3 381 * %g4 scratch 4
302 *
303 * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
304 */ 382 */
305 .align 32 383 .align 32
306 .globl xcall_flush_tlb_mm 384 .globl xcall_flush_tlb_mm
307xcall_flush_tlb_mm: 385xcall_flush_tlb_mm: /* 21 insns */
308 mov PRIMARY_CONTEXT, %g2 386 mov PRIMARY_CONTEXT, %g2
309 ldxa [%g2] ASI_DMMU, %g3 387 ldxa [%g2] ASI_DMMU, %g3
310 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 388 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -316,9 +394,19 @@ xcall_flush_tlb_mm:
316 stxa %g0, [%g4] ASI_IMMU_DEMAP 394 stxa %g0, [%g4] ASI_IMMU_DEMAP
317 stxa %g3, [%g2] ASI_DMMU 395 stxa %g3, [%g2] ASI_DMMU
318 retry 396 retry
397 nop
398 nop
399 nop
400 nop
401 nop
402 nop
403 nop
404 nop
405 nop
406 nop
319 407
320 .globl xcall_flush_tlb_pending 408 .globl xcall_flush_tlb_pending
321xcall_flush_tlb_pending: 409xcall_flush_tlb_pending: /* 21 insns */
322 /* %g5=context, %g1=nr, %g7=vaddrs[] */ 410 /* %g5=context, %g1=nr, %g7=vaddrs[] */
323 sllx %g1, 3, %g1 411 sllx %g1, 3, %g1
324 mov PRIMARY_CONTEXT, %g4 412 mov PRIMARY_CONTEXT, %g4
@@ -341,9 +429,10 @@ xcall_flush_tlb_pending:
341 nop 429 nop
342 stxa %g2, [%g4] ASI_DMMU 430 stxa %g2, [%g4] ASI_DMMU
343 retry 431 retry
432 nop
344 433
345 .globl xcall_flush_tlb_kernel_range 434 .globl xcall_flush_tlb_kernel_range
346xcall_flush_tlb_kernel_range: 435xcall_flush_tlb_kernel_range: /* 25 insns */
347 sethi %hi(PAGE_SIZE - 1), %g2 436 sethi %hi(PAGE_SIZE - 1), %g2
348 or %g2, %lo(PAGE_SIZE - 1), %g2 437 or %g2, %lo(PAGE_SIZE - 1), %g2
349 andn %g1, %g2, %g1 438 andn %g1, %g2, %g1
@@ -360,14 +449,30 @@ xcall_flush_tlb_kernel_range:
360 retry 449 retry
361 nop 450 nop
362 nop 451 nop
452 nop
453 nop
454 nop
455 nop
456 nop
457 nop
458 nop
459 nop
460 nop
363 461
364 /* This runs in a very controlled environment, so we do 462 /* This runs in a very controlled environment, so we do
365 * not need to worry about BH races etc. 463 * not need to worry about BH races etc.
366 */ 464 */
367 .globl xcall_sync_tick 465 .globl xcall_sync_tick
368xcall_sync_tick: 466xcall_sync_tick:
369 rdpr %pstate, %g2 467
468661: rdpr %pstate, %g2
370 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate 469 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
470 .section .sun4v_2insn_patch, "ax"
471 .word 661b
472 nop
473 nop
474 .previous
475
371 rdpr %pil, %g2 476 rdpr %pil, %g2
372 wrpr %g0, 15, %pil 477 wrpr %g0, 15, %pil
373 sethi %hi(109f), %g7 478 sethi %hi(109f), %g7
@@ -390,8 +495,15 @@ xcall_sync_tick:
390 */ 495 */
391 .globl xcall_report_regs 496 .globl xcall_report_regs
392xcall_report_regs: 497xcall_report_regs:
393 rdpr %pstate, %g2 498
499661: rdpr %pstate, %g2
394 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate 500 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
501 .section .sun4v_2insn_patch, "ax"
502 .word 661b
503 nop
504 nop
505 .previous
506
395 rdpr %pil, %g2 507 rdpr %pil, %g2
396 wrpr %g0, 15, %pil 508 wrpr %g0, 15, %pil
397 sethi %hi(109f), %g7 509 sethi %hi(109f), %g7
@@ -453,62 +565,96 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
453 nop 565 nop
454 nop 566 nop
455 567
456 .data 568 /* %g5: error
457 569 * %g6: tlb op
458errata32_hwbug: 570 */
459 .xword 0 571__hypervisor_tlb_xcall_error:
460 572 mov %g5, %g4
461 .text 573 mov %g6, %g5
462 574 ba,pt %xcc, etrap
463 /* These two are not performance critical... */ 575 rd %pc, %g7
464 .globl xcall_flush_tlb_all_spitfire 576 mov %l4, %o0
465xcall_flush_tlb_all_spitfire: 577 call hypervisor_tlbop_error_xcall
466 /* Spitfire Errata #32 workaround. */ 578 mov %l5, %o1
467 sethi %hi(errata32_hwbug), %g4 579 ba,a,pt %xcc, rtrap_clr_l6
468 stx %g0, [%g4 + %lo(errata32_hwbug)] 580
469 581 .globl __hypervisor_xcall_flush_tlb_mm
470 clr %g2 582__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
471 clr %g3 583 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
4721: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4 584 mov %o0, %g2
473 and %g4, _PAGE_L, %g5 585 mov %o1, %g3
474 brnz,pn %g5, 2f 586 mov %o2, %g4
475 mov TLB_TAG_ACCESS, %g7 587 mov %o3, %g1
476 588 mov %o5, %g7
477 stxa %g0, [%g7] ASI_DMMU 589 clr %o0 /* ARG0: CPU lists unimplemented */
478 membar #Sync 590 clr %o1 /* ARG1: CPU lists unimplemented */
479 stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS 591 mov %g5, %o2 /* ARG2: mmu context */
592 mov HV_MMU_ALL, %o3 /* ARG3: flags */
593 mov HV_FAST_MMU_DEMAP_CTX, %o5
594 ta HV_FAST_TRAP
595 mov HV_FAST_MMU_DEMAP_CTX, %g6
596 brnz,pn %o0, __hypervisor_tlb_xcall_error
597 mov %o0, %g5
598 mov %g2, %o0
599 mov %g3, %o1
600 mov %g4, %o2
601 mov %g1, %o3
602 mov %g7, %o5
480 membar #Sync 603 membar #Sync
604 retry
481 605
482 /* Spitfire Errata #32 workaround. */ 606 .globl __hypervisor_xcall_flush_tlb_pending
483 sethi %hi(errata32_hwbug), %g4 607__hypervisor_xcall_flush_tlb_pending: /* 21 insns */
484 stx %g0, [%g4 + %lo(errata32_hwbug)] 608 /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */
485 609 sllx %g1, 3, %g1
4862: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4 610 mov %o0, %g2
487 and %g4, _PAGE_L, %g5 611 mov %o1, %g3
488 brnz,pn %g5, 2f 612 mov %o2, %g4
489 mov TLB_TAG_ACCESS, %g7 6131: sub %g1, (1 << 3), %g1
490 614 ldx [%g7 + %g1], %o0 /* ARG0: virtual address */
491 stxa %g0, [%g7] ASI_IMMU 615 mov %g5, %o1 /* ARG1: mmu context */
492 membar #Sync 616 mov HV_MMU_ALL, %o2 /* ARG2: flags */
493 stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS 617 srlx %o0, PAGE_SHIFT, %o0
618 sllx %o0, PAGE_SHIFT, %o0
619 ta HV_MMU_UNMAP_ADDR_TRAP
620 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
621 brnz,a,pn %o0, __hypervisor_tlb_xcall_error
622 mov %o0, %g5
623 brnz,pt %g1, 1b
624 nop
625 mov %g2, %o0
626 mov %g3, %o1
627 mov %g4, %o2
494 membar #Sync 628 membar #Sync
495
496 /* Spitfire Errata #32 workaround. */
497 sethi %hi(errata32_hwbug), %g4
498 stx %g0, [%g4 + %lo(errata32_hwbug)]
499
5002: add %g2, 1, %g2
501 cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
502 ble,pt %icc, 1b
503 sll %g2, 3, %g3
504 flush %g6
505 retry 629 retry
506 630
507 .globl xcall_flush_tlb_all_cheetah 631 .globl __hypervisor_xcall_flush_tlb_kernel_range
508xcall_flush_tlb_all_cheetah: 632__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
509 mov 0x80, %g2 633 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
510 stxa %g0, [%g2] ASI_DMMU_DEMAP 634 sethi %hi(PAGE_SIZE - 1), %g2
511 stxa %g0, [%g2] ASI_IMMU_DEMAP 635 or %g2, %lo(PAGE_SIZE - 1), %g2
636 andn %g1, %g2, %g1
637 andn %g7, %g2, %g7
638 sub %g7, %g1, %g3
639 add %g2, 1, %g2
640 sub %g3, %g2, %g3
641 mov %o0, %g2
642 mov %o1, %g4
643 mov %o2, %g7
6441: add %g1, %g3, %o0 /* ARG0: virtual address */
645 mov 0, %o1 /* ARG1: mmu context */
646 mov HV_MMU_ALL, %o2 /* ARG2: flags */
647 ta HV_MMU_UNMAP_ADDR_TRAP
648 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
649 brnz,pn %o0, __hypervisor_tlb_xcall_error
650 mov %o0, %g5
651 sethi %hi(PAGE_SIZE), %o2
652 brnz,pt %g3, 1b
653 sub %g3, %o2, %g3
654 mov %g2, %o0
655 mov %g4, %o1
656 mov %g7, %o2
657 membar #Sync
512 retry 658 retry
513 659
514 /* These just get rescheduled to PIL vectors. */ 660 /* These just get rescheduled to PIL vectors. */
@@ -527,4 +673,70 @@ xcall_capture:
527 wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint 673 wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
528 retry 674 retry
529 675
676 .globl xcall_new_mmu_context_version
677xcall_new_mmu_context_version:
678 wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
679 retry
680
530#endif /* CONFIG_SMP */ 681#endif /* CONFIG_SMP */
682
683
684 .globl hypervisor_patch_cachetlbops
685hypervisor_patch_cachetlbops:
686 save %sp, -128, %sp
687
688 sethi %hi(__flush_tlb_mm), %o0
689 or %o0, %lo(__flush_tlb_mm), %o0
690 sethi %hi(__hypervisor_flush_tlb_mm), %o1
691 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
692 call tlb_patch_one
693 mov 10, %o2
694
695 sethi %hi(__flush_tlb_pending), %o0
696 or %o0, %lo(__flush_tlb_pending), %o0
697 sethi %hi(__hypervisor_flush_tlb_pending), %o1
698 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
699 call tlb_patch_one
700 mov 16, %o2
701
702 sethi %hi(__flush_tlb_kernel_range), %o0
703 or %o0, %lo(__flush_tlb_kernel_range), %o0
704 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
705 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
706 call tlb_patch_one
707 mov 16, %o2
708
709#ifdef DCACHE_ALIASING_POSSIBLE
710 sethi %hi(__flush_dcache_page), %o0
711 or %o0, %lo(__flush_dcache_page), %o0
712 sethi %hi(__hypervisor_flush_dcache_page), %o1
713 or %o1, %lo(__hypervisor_flush_dcache_page), %o1
714 call tlb_patch_one
715 mov 2, %o2
716#endif /* DCACHE_ALIASING_POSSIBLE */
717
718#ifdef CONFIG_SMP
719 sethi %hi(xcall_flush_tlb_mm), %o0
720 or %o0, %lo(xcall_flush_tlb_mm), %o0
721 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
722 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
723 call tlb_patch_one
724 mov 21, %o2
725
726 sethi %hi(xcall_flush_tlb_pending), %o0
727 or %o0, %lo(xcall_flush_tlb_pending), %o0
728 sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1
729 or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1
730 call tlb_patch_one
731 mov 21, %o2
732
733 sethi %hi(xcall_flush_tlb_kernel_range), %o0
734 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
735 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
736 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
737 call tlb_patch_one
738 mov 25, %o2
739#endif /* CONFIG_SMP */
740
741 ret
742 restore