diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2012-01-11 09:41:47 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2012-01-11 09:41:47 -0500 |
commit | 39b741431af7f6f46b2e0e7f7f13ea2351fb4a5f (patch) | |
tree | 89355f4ae7bbb874537bb65f71ba0d19b3d468e1 /arch/mips/mm | |
parent | 5b0ec2efb7d373faa7b1a7632c459b93895d45cd (diff) | |
parent | d7a887a73dec6c387b02a966a71aac767bbd9ce6 (diff) |
Merge branch 'next/generic' into mips-for-linux-next
Diffstat (limited to 'arch/mips/mm')
-rw-r--r-- | arch/mips/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/mips/mm/gup.c | 315 | ||||
-rw-r--r-- | arch/mips/mm/init.c | 9 | ||||
-rw-r--r-- | arch/mips/mm/tlb-r4k.c | 67 |
4 files changed, 338 insertions, 57 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4d8c1623eee2..3ca2a065cf76 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile | |||
@@ -3,8 +3,8 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y += cache.o dma-default.o extable.o fault.o \ | 5 | obj-y += cache.o dma-default.o extable.o fault.o \ |
6 | init.o mmap.o tlbex.o tlbex-fault.o uasm.o \ | 6 | gup.o init.o mmap.o page.o tlbex.o \ |
7 | page.o | 7 | tlbex-fault.o uasm.o |
8 | 8 | ||
9 | obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o | 9 | obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o |
10 | obj-$(CONFIG_64BIT) += pgtable-64.o | 10 | obj-$(CONFIG_64BIT) += pgtable-64.o |
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c new file mode 100644 index 000000000000..33aadbcf170b --- /dev/null +++ b/arch/mips/mm/gup.c | |||
@@ -0,0 +1,315 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for MIPS | ||
3 | * | ||
4 | * Copyright (C) 2008 Nick Piggin | ||
5 | * Copyright (C) 2008 Novell Inc. | ||
6 | * Copyright (C) 2011 Ralf Baechle | ||
7 | */ | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/vmstat.h> | ||
11 | #include <linux/highmem.h> | ||
12 | #include <linux/swap.h> | ||
13 | #include <linux/hugetlb.h> | ||
14 | |||
15 | #include <asm/pgtable.h> | ||
16 | |||
17 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
18 | { | ||
19 | #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) | ||
20 | pte_t pte; | ||
21 | |||
22 | retry: | ||
23 | pte.pte_low = ptep->pte_low; | ||
24 | smp_rmb(); | ||
25 | pte.pte_high = ptep->pte_high; | ||
26 | smp_rmb(); | ||
27 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
28 | goto retry; | ||
29 | |||
30 | return pte; | ||
31 | #else | ||
32 | return ACCESS_ONCE(*ptep); | ||
33 | #endif | ||
34 | } | ||
35 | |||
36 | static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, | ||
37 | int write, struct page **pages, int *nr) | ||
38 | { | ||
39 | pte_t *ptep = pte_offset_map(&pmd, addr); | ||
40 | do { | ||
41 | pte_t pte = gup_get_pte(ptep); | ||
42 | struct page *page; | ||
43 | |||
44 | if (!pte_present(pte) || | ||
45 | pte_special(pte) || (write && !pte_write(pte))) { | ||
46 | pte_unmap(ptep); | ||
47 | return 0; | ||
48 | } | ||
49 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
50 | page = pte_page(pte); | ||
51 | get_page(page); | ||
52 | SetPageReferenced(page); | ||
53 | pages[*nr] = page; | ||
54 | (*nr)++; | ||
55 | |||
56 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
57 | |||
58 | pte_unmap(ptep - 1); | ||
59 | return 1; | ||
60 | } | ||
61 | |||
62 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
63 | { | ||
64 | VM_BUG_ON(page != compound_head(page)); | ||
65 | VM_BUG_ON(page_count(page) == 0); | ||
66 | atomic_add(nr, &page->_count); | ||
67 | SetPageReferenced(page); | ||
68 | } | ||
69 | |||
70 | static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, | ||
71 | int write, struct page **pages, int *nr) | ||
72 | { | ||
73 | pte_t pte = *(pte_t *)&pmd; | ||
74 | struct page *head, *page; | ||
75 | int refs; | ||
76 | |||
77 | if (write && !pte_write(pte)) | ||
78 | return 0; | ||
79 | /* hugepages are never "special" */ | ||
80 | VM_BUG_ON(pte_special(pte)); | ||
81 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
82 | |||
83 | refs = 0; | ||
84 | head = pte_page(pte); | ||
85 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
86 | do { | ||
87 | VM_BUG_ON(compound_head(page) != head); | ||
88 | pages[*nr] = page; | ||
89 | if (PageTail(page)) | ||
90 | get_huge_page_tail(page); | ||
91 | (*nr)++; | ||
92 | page++; | ||
93 | refs++; | ||
94 | } while (addr += PAGE_SIZE, addr != end); | ||
95 | |||
96 | get_head_page_multiple(head, refs); | ||
97 | return 1; | ||
98 | } | ||
99 | |||
100 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
101 | int write, struct page **pages, int *nr) | ||
102 | { | ||
103 | unsigned long next; | ||
104 | pmd_t *pmdp; | ||
105 | |||
106 | pmdp = pmd_offset(&pud, addr); | ||
107 | do { | ||
108 | pmd_t pmd = *pmdp; | ||
109 | |||
110 | next = pmd_addr_end(addr, end); | ||
111 | /* | ||
112 | * The pmd_trans_splitting() check below explains why | ||
113 | * pmdp_splitting_flush has to flush the tlb, to stop | ||
114 | * this gup-fast code from running while we set the | ||
115 | * splitting bit in the pmd. Returning zero will take | ||
116 | * the slow path that will call wait_split_huge_page() | ||
117 | * if the pmd is still in splitting state. gup-fast | ||
118 | * can't because it has irq disabled and | ||
119 | * wait_split_huge_page() would never return as the | ||
120 | * tlb flush IPI wouldn't run. | ||
121 | */ | ||
122 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
123 | return 0; | ||
124 | if (unlikely(pmd_huge(pmd))) { | ||
125 | if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) | ||
126 | return 0; | ||
127 | } else { | ||
128 | if (!gup_pte_range(pmd, addr, next, write, pages,nr)) | ||
129 | return 0; | ||
130 | } | ||
131 | } while (pmdp++, addr = next, addr != end); | ||
132 | |||
133 | return 1; | ||
134 | } | ||
135 | |||
136 | static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, | ||
137 | int write, struct page **pages, int *nr) | ||
138 | { | ||
139 | pte_t pte = *(pte_t *)&pud; | ||
140 | struct page *head, *page; | ||
141 | int refs; | ||
142 | |||
143 | if (write && !pte_write(pte)) | ||
144 | return 0; | ||
145 | /* hugepages are never "special" */ | ||
146 | VM_BUG_ON(pte_special(pte)); | ||
147 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
148 | |||
149 | refs = 0; | ||
150 | head = pte_page(pte); | ||
151 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
152 | do { | ||
153 | VM_BUG_ON(compound_head(page) != head); | ||
154 | pages[*nr] = page; | ||
155 | (*nr)++; | ||
156 | page++; | ||
157 | refs++; | ||
158 | } while (addr += PAGE_SIZE, addr != end); | ||
159 | |||
160 | get_head_page_multiple(head, refs); | ||
161 | return 1; | ||
162 | } | ||
163 | |||
164 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
165 | int write, struct page **pages, int *nr) | ||
166 | { | ||
167 | unsigned long next; | ||
168 | pud_t *pudp; | ||
169 | |||
170 | pudp = pud_offset(&pgd, addr); | ||
171 | do { | ||
172 | pud_t pud = *pudp; | ||
173 | |||
174 | next = pud_addr_end(addr, end); | ||
175 | if (pud_none(pud)) | ||
176 | return 0; | ||
177 | if (unlikely(pud_huge(pud))) { | ||
178 | if (!gup_huge_pud(pud, addr, next, write, pages,nr)) | ||
179 | return 0; | ||
180 | } else { | ||
181 | if (!gup_pmd_range(pud, addr, next, write, pages,nr)) | ||
182 | return 0; | ||
183 | } | ||
184 | } while (pudp++, addr = next, addr != end); | ||
185 | |||
186 | return 1; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
191 | * back to the regular GUP. | ||
192 | */ | ||
193 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
194 | struct page **pages) | ||
195 | { | ||
196 | struct mm_struct *mm = current->mm; | ||
197 | unsigned long addr, len, end; | ||
198 | unsigned long next; | ||
199 | unsigned long flags; | ||
200 | pgd_t *pgdp; | ||
201 | int nr = 0; | ||
202 | |||
203 | start &= PAGE_MASK; | ||
204 | addr = start; | ||
205 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
206 | end = start + len; | ||
207 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
208 | (void __user *)start, len))) | ||
209 | return 0; | ||
210 | |||
211 | /* | ||
212 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
213 | * needs some instrumenting to determine the common sizes used by | ||
214 | * important workloads (eg. DB2), and whether limiting the batch | ||
215 | * size will decrease performance. | ||
216 | * | ||
217 | * It seems like we're in the clear for the moment. Direct-IO is | ||
218 | * the main guy that batches up lots of get_user_pages, and even | ||
219 | * they are limited to 64-at-a-time which is not so many. | ||
220 | */ | ||
221 | /* | ||
222 | * This doesn't prevent pagetable teardown, but does prevent | ||
223 | * the pagetables and pages from being freed. | ||
224 | * | ||
225 | * So long as we atomically load page table pointers versus teardown, | ||
226 | * we can follow the address down to the page and take a ref on it. | ||
227 | */ | ||
228 | local_irq_save(flags); | ||
229 | pgdp = pgd_offset(mm, addr); | ||
230 | do { | ||
231 | pgd_t pgd = *pgdp; | ||
232 | |||
233 | next = pgd_addr_end(addr, end); | ||
234 | if (pgd_none(pgd)) | ||
235 | break; | ||
236 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
237 | break; | ||
238 | } while (pgdp++, addr = next, addr != end); | ||
239 | local_irq_restore(flags); | ||
240 | |||
241 | return nr; | ||
242 | } | ||
243 | |||
244 | /** | ||
245 | * get_user_pages_fast() - pin user pages in memory | ||
246 | * @start: starting user address | ||
247 | * @nr_pages: number of pages from start to pin | ||
248 | * @write: whether pages will be written to | ||
249 | * @pages: array that receives pointers to the pages pinned. | ||
250 | * Should be at least nr_pages long. | ||
251 | * | ||
252 | * Attempt to pin user pages in memory without taking mm->mmap_sem. | ||
253 | * If not successful, it will fall back to taking the lock and | ||
254 | * calling get_user_pages(). | ||
255 | * | ||
256 | * Returns number of pages pinned. This may be fewer than the number | ||
257 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | ||
258 | * were pinned, returns -errno. | ||
259 | */ | ||
260 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
261 | struct page **pages) | ||
262 | { | ||
263 | struct mm_struct *mm = current->mm; | ||
264 | unsigned long addr, len, end; | ||
265 | unsigned long next; | ||
266 | pgd_t *pgdp; | ||
267 | int ret, nr = 0; | ||
268 | |||
269 | start &= PAGE_MASK; | ||
270 | addr = start; | ||
271 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
272 | |||
273 | end = start + len; | ||
274 | if (end < start) | ||
275 | goto slow_irqon; | ||
276 | |||
277 | /* XXX: batch / limit 'nr' */ | ||
278 | local_irq_disable(); | ||
279 | pgdp = pgd_offset(mm, addr); | ||
280 | do { | ||
281 | pgd_t pgd = *pgdp; | ||
282 | |||
283 | next = pgd_addr_end(addr, end); | ||
284 | if (pgd_none(pgd)) | ||
285 | goto slow; | ||
286 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
287 | goto slow; | ||
288 | } while (pgdp++, addr = next, addr != end); | ||
289 | local_irq_enable(); | ||
290 | |||
291 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
292 | return nr; | ||
293 | slow: | ||
294 | local_irq_enable(); | ||
295 | |||
296 | slow_irqon: | ||
297 | /* Try to get the remaining pages with get_user_pages */ | ||
298 | start += nr << PAGE_SHIFT; | ||
299 | pages += nr; | ||
300 | |||
301 | down_read(&mm->mmap_sem); | ||
302 | ret = get_user_pages(current, mm, start, | ||
303 | (end - start) >> PAGE_SHIFT, | ||
304 | write, 0, pages, NULL); | ||
305 | up_read(&mm->mmap_sem); | ||
306 | |||
307 | /* Have to be a bit careful with return values */ | ||
308 | if (nr > 0) { | ||
309 | if (ret < 0) | ||
310 | ret = nr; | ||
311 | else | ||
312 | ret += nr; | ||
313 | } | ||
314 | return ret; | ||
315 | } | ||
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index b7ebc4fa89bc..3b3ffd439cd7 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c | |||
@@ -304,9 +304,14 @@ int page_is_ram(unsigned long pagenr) | |||
304 | for (i = 0; i < boot_mem_map.nr_map; i++) { | 304 | for (i = 0; i < boot_mem_map.nr_map; i++) { |
305 | unsigned long addr, end; | 305 | unsigned long addr, end; |
306 | 306 | ||
307 | if (boot_mem_map.map[i].type != BOOT_MEM_RAM) | 307 | switch (boot_mem_map.map[i].type) { |
308 | case BOOT_MEM_RAM: | ||
309 | case BOOT_MEM_INIT_RAM: | ||
310 | break; | ||
311 | default: | ||
308 | /* not usable memory */ | 312 | /* not usable memory */ |
309 | continue; | 313 | continue; |
314 | } | ||
310 | 315 | ||
311 | addr = PFN_UP(boot_mem_map.map[i].addr); | 316 | addr = PFN_UP(boot_mem_map.map[i].addr); |
312 | end = PFN_DOWN(boot_mem_map.map[i].addr + | 317 | end = PFN_DOWN(boot_mem_map.map[i].addr + |
@@ -379,7 +384,7 @@ void __init mem_init(void) | |||
379 | 384 | ||
380 | reservedpages = ram = 0; | 385 | reservedpages = ram = 0; |
381 | for (tmp = 0; tmp < max_low_pfn; tmp++) | 386 | for (tmp = 0; tmp < max_low_pfn; tmp++) |
382 | if (page_is_ram(tmp)) { | 387 | if (page_is_ram(tmp) && pfn_valid(tmp)) { |
383 | ram++; | 388 | ram++; |
384 | if (PageReserved(pfn_to_page(tmp))) | 389 | if (PageReserved(pfn_to_page(tmp))) |
385 | reservedpages++; | 390 | reservedpages++; |
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index d163455552b0..2dc625346c40 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c | |||
@@ -121,22 +121,30 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, | |||
121 | 121 | ||
122 | if (cpu_context(cpu, mm) != 0) { | 122 | if (cpu_context(cpu, mm) != 0) { |
123 | unsigned long size, flags; | 123 | unsigned long size, flags; |
124 | int huge = is_vm_hugetlb_page(vma); | ||
124 | 125 | ||
125 | ENTER_CRITICAL(flags); | 126 | ENTER_CRITICAL(flags); |
126 | size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; | 127 | if (huge) { |
127 | size = (size + 1) >> 1; | 128 | start = round_down(start, HPAGE_SIZE); |
129 | end = round_up(end, HPAGE_SIZE); | ||
130 | size = (end - start) >> HPAGE_SHIFT; | ||
131 | } else { | ||
132 | start = round_down(start, PAGE_SIZE << 1); | ||
133 | end = round_up(end, PAGE_SIZE << 1); | ||
134 | size = (end - start) >> (PAGE_SHIFT + 1); | ||
135 | } | ||
128 | if (size <= current_cpu_data.tlbsize/2) { | 136 | if (size <= current_cpu_data.tlbsize/2) { |
129 | int oldpid = read_c0_entryhi(); | 137 | int oldpid = read_c0_entryhi(); |
130 | int newpid = cpu_asid(cpu, mm); | 138 | int newpid = cpu_asid(cpu, mm); |
131 | 139 | ||
132 | start &= (PAGE_MASK << 1); | ||
133 | end += ((PAGE_SIZE << 1) - 1); | ||
134 | end &= (PAGE_MASK << 1); | ||
135 | while (start < end) { | 140 | while (start < end) { |
136 | int idx; | 141 | int idx; |
137 | 142 | ||
138 | write_c0_entryhi(start | newpid); | 143 | write_c0_entryhi(start | newpid); |
139 | start += (PAGE_SIZE << 1); | 144 | if (huge) |
145 | start += HPAGE_SIZE; | ||
146 | else | ||
147 | start += (PAGE_SIZE << 1); | ||
140 | mtc0_tlbw_hazard(); | 148 | mtc0_tlbw_hazard(); |
141 | tlb_probe(); | 149 | tlb_probe(); |
142 | tlb_probe_hazard(); | 150 | tlb_probe_hazard(); |
@@ -369,51 +377,6 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, | |||
369 | EXIT_CRITICAL(flags); | 377 | EXIT_CRITICAL(flags); |
370 | } | 378 | } |
371 | 379 | ||
372 | /* | ||
373 | * Used for loading TLB entries before trap_init() has started, when we | ||
374 | * don't actually want to add a wired entry which remains throughout the | ||
375 | * lifetime of the system | ||
376 | */ | ||
377 | |||
378 | static int temp_tlb_entry __cpuinitdata; | ||
379 | |||
380 | __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, | ||
381 | unsigned long entryhi, unsigned long pagemask) | ||
382 | { | ||
383 | int ret = 0; | ||
384 | unsigned long flags; | ||
385 | unsigned long wired; | ||
386 | unsigned long old_pagemask; | ||
387 | unsigned long old_ctx; | ||
388 | |||
389 | ENTER_CRITICAL(flags); | ||
390 | /* Save old context and create impossible VPN2 value */ | ||
391 | old_ctx = read_c0_entryhi(); | ||
392 | old_pagemask = read_c0_pagemask(); | ||
393 | wired = read_c0_wired(); | ||
394 | if (--temp_tlb_entry < wired) { | ||
395 | printk(KERN_WARNING | ||
396 | "No TLB space left for add_temporary_entry\n"); | ||
397 | ret = -ENOSPC; | ||
398 | goto out; | ||
399 | } | ||
400 | |||
401 | write_c0_index(temp_tlb_entry); | ||
402 | write_c0_pagemask(pagemask); | ||
403 | write_c0_entryhi(entryhi); | ||
404 | write_c0_entrylo0(entrylo0); | ||
405 | write_c0_entrylo1(entrylo1); | ||
406 | mtc0_tlbw_hazard(); | ||
407 | tlb_write_indexed(); | ||
408 | tlbw_use_hazard(); | ||
409 | |||
410 | write_c0_entryhi(old_ctx); | ||
411 | write_c0_pagemask(old_pagemask); | ||
412 | out: | ||
413 | EXIT_CRITICAL(flags); | ||
414 | return ret; | ||
415 | } | ||
416 | |||
417 | static int __cpuinitdata ntlb; | 380 | static int __cpuinitdata ntlb; |
418 | static int __init set_ntlb(char *str) | 381 | static int __init set_ntlb(char *str) |
419 | { | 382 | { |
@@ -451,8 +414,6 @@ void __cpuinit tlb_init(void) | |||
451 | write_c0_pagegrain(pg); | 414 | write_c0_pagegrain(pg); |
452 | } | 415 | } |
453 | 416 | ||
454 | temp_tlb_entry = current_cpu_data.tlbsize - 1; | ||
455 | |||
456 | /* From this point on the ARC firmware is dead. */ | 417 | /* From this point on the ARC firmware is dead. */ |
457 | local_flush_tlb_all(); | 418 | local_flush_tlb_all(); |
458 | 419 | ||