aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2005-06-21 20:14:44 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:15 -0400
commit63551ae0feaaa23807ebea60de1901564bbef32e (patch)
treef6f97f60f83c3e9813bdfcc6039c499997b1ea10 /arch
parent1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 (diff)
[PATCH] Hugepage consolidation
A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/mm/hugetlbpage.c170
-rw-r--r--arch/ia64/mm/hugetlbpage.c158
-rw-r--r--arch/ppc64/mm/hugetlbpage.c180
-rw-r--r--arch/sh/mm/hugetlbpage.c196
-rw-r--r--arch/sh64/mm/hugetlbpage.c18
-rw-r--r--arch/sparc64/mm/hugetlbpage.c195
6 files changed, 76 insertions, 841 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 171fc925e1e4..5aa06001a4bd 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -18,7 +18,7 @@
18#include <asm/tlb.h> 18#include <asm/tlb.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20 20
21static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 21pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22{ 22{
23 pgd_t *pgd; 23 pgd_t *pgd;
24 pud_t *pud; 24 pud_t *pud;
@@ -30,7 +30,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
30 return (pte_t *) pmd; 30 return (pte_t *) pmd;
31} 31}
32 32
33static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 33pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
34{ 34{
35 pgd_t *pgd; 35 pgd_t *pgd;
36 pud_t *pud; 36 pud_t *pud;
@@ -42,21 +42,6 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
42 return (pte_t *) pmd; 42 return (pte_t *) pmd;
43} 43}
44 44
45static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
46{
47 pte_t entry;
48
49 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
50 if (write_access) {
51 entry =
52 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
53 } else
54 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
55 entry = pte_mkyoung(entry);
56 mk_pte_huge(entry);
57 set_pte(page_table, entry);
58}
59
60/* 45/*
61 * This function checks for proper alignment of input addr and len parameters. 46 * This function checks for proper alignment of input addr and len parameters.
62 */ 47 */
@@ -69,77 +54,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
69 return 0; 54 return 0;
70} 55}
71 56
72int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
73 struct vm_area_struct *vma)
74{
75 pte_t *src_pte, *dst_pte, entry;
76 struct page *ptepage;
77 unsigned long addr = vma->vm_start;
78 unsigned long end = vma->vm_end;
79
80 while (addr < end) {
81 dst_pte = huge_pte_alloc(dst, addr);
82 if (!dst_pte)
83 goto nomem;
84 src_pte = huge_pte_offset(src, addr);
85 entry = *src_pte;
86 ptepage = pte_page(entry);
87 get_page(ptepage);
88 set_pte(dst_pte, entry);
89 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
90 addr += HPAGE_SIZE;
91 }
92 return 0;
93
94nomem:
95 return -ENOMEM;
96}
97
98int
99follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
100 struct page **pages, struct vm_area_struct **vmas,
101 unsigned long *position, int *length, int i)
102{
103 unsigned long vpfn, vaddr = *position;
104 int remainder = *length;
105
106 WARN_ON(!is_vm_hugetlb_page(vma));
107
108 vpfn = vaddr/PAGE_SIZE;
109 while (vaddr < vma->vm_end && remainder) {
110
111 if (pages) {
112 pte_t *pte;
113 struct page *page;
114
115 pte = huge_pte_offset(mm, vaddr);
116
117 /* hugetlb should be locked, and hence, prefaulted */
118 WARN_ON(!pte || pte_none(*pte));
119
120 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
121
122 WARN_ON(!PageCompound(page));
123
124 get_page(page);
125 pages[i] = page;
126 }
127
128 if (vmas)
129 vmas[i] = vma;
130
131 vaddr += PAGE_SIZE;
132 ++vpfn;
133 --remainder;
134 ++i;
135 }
136
137 *length = remainder;
138 *position = vaddr;
139
140 return i;
141}
142
143#if 0 /* This is just for testing */ 57#if 0 /* This is just for testing */
144struct page * 58struct page *
145follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 59follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -204,83 +118,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
204} 118}
205#endif 119#endif
206 120
207void unmap_hugepage_range(struct vm_area_struct *vma, 121void hugetlb_clean_stale_pgtable(pte_t *pte)
208 unsigned long start, unsigned long end)
209{ 122{
210 struct mm_struct *mm = vma->vm_mm; 123 pmd_t *pmd = (pmd_t *) pte;
211 unsigned long address;
212 pte_t pte, *ptep;
213 struct page *page; 124 struct page *page;
214 125
215 BUG_ON(start & (HPAGE_SIZE - 1)); 126 page = pmd_page(*pmd);
216 BUG_ON(end & (HPAGE_SIZE - 1)); 127 pmd_clear(pmd);
217 128 dec_page_state(nr_page_table_pages);
218 for (address = start; address < end; address += HPAGE_SIZE) { 129 page_cache_release(page);
219 ptep = huge_pte_offset(mm, address);
220 if (!ptep)
221 continue;
222 pte = ptep_get_and_clear(mm, address, ptep);
223 if (pte_none(pte))
224 continue;
225 page = pte_page(pte);
226 put_page(page);
227 }
228 add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT));
229 flush_tlb_range(vma, start, end);
230}
231
232int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
233{
234 struct mm_struct *mm = current->mm;
235 unsigned long addr;
236 int ret = 0;
237
238 BUG_ON(vma->vm_start & ~HPAGE_MASK);
239 BUG_ON(vma->vm_end & ~HPAGE_MASK);
240
241 spin_lock(&mm->page_table_lock);
242 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
243 unsigned long idx;
244 pte_t *pte = huge_pte_alloc(mm, addr);
245 struct page *page;
246
247 if (!pte) {
248 ret = -ENOMEM;
249 goto out;
250 }
251
252 if (!pte_none(*pte))
253 continue;
254
255 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
256 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
257 page = find_get_page(mapping, idx);
258 if (!page) {
259 /* charge the fs quota first */
260 if (hugetlb_get_quota(mapping)) {
261 ret = -ENOMEM;
262 goto out;
263 }
264 page = alloc_huge_page();
265 if (!page) {
266 hugetlb_put_quota(mapping);
267 ret = -ENOMEM;
268 goto out;
269 }
270 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
271 if (! ret) {
272 unlock_page(page);
273 } else {
274 hugetlb_put_quota(mapping);
275 free_huge_page(page);
276 goto out;
277 }
278 }
279 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
280 }
281out:
282 spin_unlock(&mm->page_table_lock);
283 return ret;
284} 130}
285 131
286/* x86_64 also uses this file */ 132/* x86_64 also uses this file */
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index df08ae7634b6..e0a776a3044c 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24 24
25unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; 25unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
26 26
27static pte_t * 27pte_t *
28huge_pte_alloc (struct mm_struct *mm, unsigned long addr) 28huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
29{ 29{
30 unsigned long taddr = htlbpage_to_page(addr); 30 unsigned long taddr = htlbpage_to_page(addr);
@@ -43,7 +43,7 @@ huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
43 return pte; 43 return pte;
44} 44}
45 45
46static pte_t * 46pte_t *
47huge_pte_offset (struct mm_struct *mm, unsigned long addr) 47huge_pte_offset (struct mm_struct *mm, unsigned long addr)
48{ 48{
49 unsigned long taddr = htlbpage_to_page(addr); 49 unsigned long taddr = htlbpage_to_page(addr);
@@ -67,23 +67,6 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
67 67
68#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } 68#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
69 69
70static void
71set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
72 struct page *page, pte_t * page_table, int write_access)
73{
74 pte_t entry;
75
76 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
77 if (write_access) {
78 entry =
79 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
80 } else
81 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
82 entry = pte_mkyoung(entry);
83 mk_pte_huge(entry);
84 set_pte(page_table, entry);
85 return;
86}
87/* 70/*
88 * This function checks for proper alignment of input addr and len parameters. 71 * This function checks for proper alignment of input addr and len parameters.
89 */ 72 */
@@ -99,68 +82,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
99 return 0; 82 return 0;
100} 83}
101 84
102int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
103 struct vm_area_struct *vma)
104{
105 pte_t *src_pte, *dst_pte, entry;
106 struct page *ptepage;
107 unsigned long addr = vma->vm_start;
108 unsigned long end = vma->vm_end;
109
110 while (addr < end) {
111 dst_pte = huge_pte_alloc(dst, addr);
112 if (!dst_pte)
113 goto nomem;
114 src_pte = huge_pte_offset(src, addr);
115 entry = *src_pte;
116 ptepage = pte_page(entry);
117 get_page(ptepage);
118 set_pte(dst_pte, entry);
119 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
120 addr += HPAGE_SIZE;
121 }
122 return 0;
123nomem:
124 return -ENOMEM;
125}
126
127int
128follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
129 struct page **pages, struct vm_area_struct **vmas,
130 unsigned long *st, int *length, int i)
131{
132 pte_t *ptep, pte;
133 unsigned long start = *st;
134 unsigned long pstart;
135 int len = *length;
136 struct page *page;
137
138 do {
139 pstart = start & HPAGE_MASK;
140 ptep = huge_pte_offset(mm, start);
141 pte = *ptep;
142
143back1:
144 page = pte_page(pte);
145 if (pages) {
146 page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
147 get_page(page);
148 pages[i] = page;
149 }
150 if (vmas)
151 vmas[i] = vma;
152 i++;
153 len--;
154 start += PAGE_SIZE;
155 if (((start & HPAGE_MASK) == pstart) && len &&
156 (start < vma->vm_end))
157 goto back1;
158 } while (len && start < vma->vm_end);
159 *length = len;
160 *st = start;
161 return i;
162}
163
164struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) 85struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
165{ 86{
166 struct page *page; 87 struct page *page;
@@ -212,81 +133,6 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
212 free_pgd_range(tlb, addr, end, floor, ceiling); 133 free_pgd_range(tlb, addr, end, floor, ceiling);
213} 134}
214 135
215void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
216{
217 struct mm_struct *mm = vma->vm_mm;
218 unsigned long address;
219 pte_t *pte;
220 struct page *page;
221
222 BUG_ON(start & (HPAGE_SIZE - 1));
223 BUG_ON(end & (HPAGE_SIZE - 1));
224
225 for (address = start; address < end; address += HPAGE_SIZE) {
226 pte = huge_pte_offset(mm, address);
227 if (pte_none(*pte))
228 continue;
229 page = pte_page(*pte);
230 put_page(page);
231 pte_clear(mm, address, pte);
232 }
233 add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT));
234 flush_tlb_range(vma, start, end);
235}
236
237int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
238{
239 struct mm_struct *mm = current->mm;
240 unsigned long addr;
241 int ret = 0;
242
243 BUG_ON(vma->vm_start & ~HPAGE_MASK);
244 BUG_ON(vma->vm_end & ~HPAGE_MASK);
245
246 spin_lock(&mm->page_table_lock);
247 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
248 unsigned long idx;
249 pte_t *pte = huge_pte_alloc(mm, addr);
250 struct page *page;
251
252 if (!pte) {
253 ret = -ENOMEM;
254 goto out;
255 }
256 if (!pte_none(*pte))
257 continue;
258
259 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
260 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
261 page = find_get_page(mapping, idx);
262 if (!page) {
263 /* charge the fs quota first */
264 if (hugetlb_get_quota(mapping)) {
265 ret = -ENOMEM;
266 goto out;
267 }
268 page = alloc_huge_page();
269 if (!page) {
270 hugetlb_put_quota(mapping);
271 ret = -ENOMEM;
272 goto out;
273 }
274 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
275 if (! ret) {
276 unlock_page(page);
277 } else {
278 hugetlb_put_quota(mapping);
279 page_cache_release(page);
280 goto out;
281 }
282 }
283 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
284 }
285out:
286 spin_unlock(&mm->page_table_lock);
287 return ret;
288}
289
290unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, 136unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
291 unsigned long pgoff, unsigned long flags) 137 unsigned long pgoff, unsigned long flags)
292{ 138{
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index d3bf86a5c1ad..b4ab766f5980 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr
121 return hugepte_offset(dir, addr); 121 return hugepte_offset(dir, addr);
122} 122}
123 123
124static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 124pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
125{ 125{
126 pud_t *pud; 126 pud_t *pud;
127 127
@@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
134 return hugepte_offset(pud, addr); 134 return hugepte_offset(pud, addr);
135} 135}
136 136
137static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 137pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
138{ 138{
139 pud_t *pud; 139 pud_t *pud;
140 140
@@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
147 return hugepte_alloc(mm, pud, addr); 147 return hugepte_alloc(mm, pud, addr);
148} 148}
149 149
150static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
151 unsigned long addr, struct page *page,
152 pte_t *ptep, int write_access)
153{
154 pte_t entry;
155
156 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
157 if (write_access) {
158 entry =
159 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
160 } else {
161 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
162 }
163 entry = pte_mkyoung(entry);
164 entry = pte_mkhuge(entry);
165
166 set_pte_at(mm, addr, ptep, entry);
167}
168
169/* 150/*
170 * This function checks for proper alignment of input addr and len parameters. 151 * This function checks for proper alignment of input addr and len parameters.
171 */ 152 */
@@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len)
259 return -EINVAL; 240 return -EINVAL;
260} 241}
261 242
262int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
263 struct vm_area_struct *vma)
264{
265 pte_t *src_pte, *dst_pte, entry;
266 struct page *ptepage;
267 unsigned long addr = vma->vm_start;
268 unsigned long end = vma->vm_end;
269 int err = -ENOMEM;
270
271 while (addr < end) {
272 dst_pte = huge_pte_alloc(dst, addr);
273 if (!dst_pte)
274 goto out;
275
276 src_pte = huge_pte_offset(src, addr);
277 entry = *src_pte;
278
279 ptepage = pte_page(entry);
280 get_page(ptepage);
281 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
282 set_pte_at(dst, addr, dst_pte, entry);
283
284 addr += HPAGE_SIZE;
285 }
286
287 err = 0;
288 out:
289 return err;
290}
291
292int
293follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
294 struct page **pages, struct vm_area_struct **vmas,
295 unsigned long *position, int *length, int i)
296{
297 unsigned long vpfn, vaddr = *position;
298 int remainder = *length;
299
300 WARN_ON(!is_vm_hugetlb_page(vma));
301
302 vpfn = vaddr/PAGE_SIZE;
303 while (vaddr < vma->vm_end && remainder) {
304 if (pages) {
305 pte_t *pte;
306 struct page *page;
307
308 pte = huge_pte_offset(mm, vaddr);
309
310 /* hugetlb should be locked, and hence, prefaulted */
311 WARN_ON(!pte || pte_none(*pte));
312
313 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
314
315 WARN_ON(!PageCompound(page));
316
317 get_page(page);
318 pages[i] = page;
319 }
320
321 if (vmas)
322 vmas[i] = vma;
323
324 vaddr += PAGE_SIZE;
325 ++vpfn;
326 --remainder;
327 ++i;
328 }
329
330 *length = remainder;
331 *position = vaddr;
332
333 return i;
334}
335
336struct page * 243struct page *
337follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 244follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
338{ 245{
@@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
363 return NULL; 270 return NULL;
364} 271}
365 272
366void unmap_hugepage_range(struct vm_area_struct *vma,
367 unsigned long start, unsigned long end)
368{
369 struct mm_struct *mm = vma->vm_mm;
370 unsigned long addr;
371 pte_t *ptep;
372 struct page *page;
373
374 WARN_ON(!is_vm_hugetlb_page(vma));
375 BUG_ON((start % HPAGE_SIZE) != 0);
376 BUG_ON((end % HPAGE_SIZE) != 0);
377
378 for (addr = start; addr < end; addr += HPAGE_SIZE) {
379 pte_t pte;
380
381 ptep = huge_pte_offset(mm, addr);
382 if (!ptep || pte_none(*ptep))
383 continue;
384
385 pte = *ptep;
386 page = pte_page(pte);
387 pte_clear(mm, addr, ptep);
388
389 put_page(page);
390 }
391 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
392 flush_tlb_pending();
393}
394
395int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
396{
397 struct mm_struct *mm = current->mm;
398 unsigned long addr;
399 int ret = 0;
400
401 WARN_ON(!is_vm_hugetlb_page(vma));
402 BUG_ON((vma->vm_start % HPAGE_SIZE) != 0);
403 BUG_ON((vma->vm_end % HPAGE_SIZE) != 0);
404
405 spin_lock(&mm->page_table_lock);
406 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
407 unsigned long idx;
408 pte_t *pte = huge_pte_alloc(mm, addr);
409 struct page *page;
410
411 if (!pte) {
412 ret = -ENOMEM;
413 goto out;
414 }
415 if (! pte_none(*pte))
416 continue;
417
418 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
419 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
420 page = find_get_page(mapping, idx);
421 if (!page) {
422 /* charge the fs quota first */
423 if (hugetlb_get_quota(mapping)) {
424 ret = -ENOMEM;
425 goto out;
426 }
427 page = alloc_huge_page();
428 if (!page) {
429 hugetlb_put_quota(mapping);
430 ret = -ENOMEM;
431 goto out;
432 }
433 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
434 if (! ret) {
435 unlock_page(page);
436 } else {
437 hugetlb_put_quota(mapping);
438 free_huge_page(page);
439 goto out;
440 }
441 }
442 set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
443 }
444out:
445 spin_unlock(&mm->page_table_lock);
446 return ret;
447}
448
449/* Because we have an exclusive hugepage region which lies within the 273/* Because we have an exclusive hugepage region which lies within the
450 * normal user address space, we have to take special measures to make 274 * normal user address space, we have to take special measures to make
451 * non-huge mmap()s evade the hugepage reserved regions. */ 275 * non-huge mmap()s evade the hugepage reserved regions. */
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 1f897bab2318..95bb1a6c6060 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
26 26
27static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 27pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
28{ 28{
29 pgd_t *pgd; 29 pgd_t *pgd;
30 pmd_t *pmd; 30 pmd_t *pmd;
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
39 return pte; 39 return pte;
40} 40}
41 41
42static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 42pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43{ 43{
44 pgd_t *pgd; 44 pgd_t *pgd;
45 pmd_t *pmd; 45 pmd_t *pmd;
@@ -56,28 +56,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
56 56
57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) 57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
58 58
59static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, 59void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
60 struct page *page, pte_t * page_table, int write_access) 60 pte_t *ptep, pte_t entry)
61{ 61{
62 unsigned long i; 62 int i;
63 pte_t entry;
64 63
65 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); 64 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
65 set_pte_at(mm, addr, ptep, entry);
66 ptep++;
67 addr += PAGE_SIZE;
68 pte_val(entry) += PAGE_SIZE;
69 }
70}
66 71
67 if (write_access) 72pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
68 entry = pte_mkwrite(pte_mkdirty(mk_pte(page, 73 pte_t *ptep)
69 vma->vm_page_prot))); 74{
70 else 75 pte_t entry;
71 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 76 int i;
72 entry = pte_mkyoung(entry);
73 mk_pte_huge(entry);
74 77
75 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 78 entry = *ptep;
76 set_pte(page_table, entry);
77 page_table++;
78 79
79 pte_val(entry) += PAGE_SIZE; 80 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
81 pte_clear(mm, addr, ptep);
82 addr += PAGE_SIZE;
83 ptep++;
80 } 84 }
85
86 return entry;
81} 87}
82 88
83/* 89/*
@@ -92,79 +98,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
92 return 0; 98 return 0;
93} 99}
94 100
95int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
96 struct vm_area_struct *vma)
97{
98 pte_t *src_pte, *dst_pte, entry;
99 struct page *ptepage;
100 unsigned long addr = vma->vm_start;
101 unsigned long end = vma->vm_end;
102 int i;
103
104 while (addr < end) {
105 dst_pte = huge_pte_alloc(dst, addr);
106 if (!dst_pte)
107 goto nomem;
108 src_pte = huge_pte_offset(src, addr);
109 BUG_ON(!src_pte || pte_none(*src_pte));
110 entry = *src_pte;
111 ptepage = pte_page(entry);
112 get_page(ptepage);
113 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
114 set_pte(dst_pte, entry);
115 pte_val(entry) += PAGE_SIZE;
116 dst_pte++;
117 }
118 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
119 addr += HPAGE_SIZE;
120 }
121 return 0;
122
123nomem:
124 return -ENOMEM;
125}
126
127int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
128 struct page **pages, struct vm_area_struct **vmas,
129 unsigned long *position, int *length, int i)
130{
131 unsigned long vaddr = *position;
132 int remainder = *length;
133
134 WARN_ON(!is_vm_hugetlb_page(vma));
135
136 while (vaddr < vma->vm_end && remainder) {
137 if (pages) {
138 pte_t *pte;
139 struct page *page;
140
141 pte = huge_pte_offset(mm, vaddr);
142
143 /* hugetlb should be locked, and hence, prefaulted */
144 BUG_ON(!pte || pte_none(*pte));
145
146 page = pte_page(*pte);
147
148 WARN_ON(!PageCompound(page));
149
150 get_page(page);
151 pages[i] = page;
152 }
153
154 if (vmas)
155 vmas[i] = vma;
156
157 vaddr += PAGE_SIZE;
158 --remainder;
159 ++i;
160 }
161
162 *length = remainder;
163 *position = vaddr;
164
165 return i;
166}
167
168struct page *follow_huge_addr(struct mm_struct *mm, 101struct page *follow_huge_addr(struct mm_struct *mm,
169 unsigned long address, int write) 102 unsigned long address, int write)
170{ 103{
@@ -181,84 +114,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
181{ 114{
182 return NULL; 115 return NULL;
183} 116}
184
185void unmap_hugepage_range(struct vm_area_struct *vma,
186 unsigned long start, unsigned long end)
187{
188 struct mm_struct *mm = vma->vm_mm;
189 unsigned long address;
190 pte_t *pte;
191 struct page *page;
192 int i;
193
194 BUG_ON(start & (HPAGE_SIZE - 1));
195 BUG_ON(end & (HPAGE_SIZE - 1));
196
197 for (address = start; address < end; address += HPAGE_SIZE) {
198 pte = huge_pte_offset(mm, address);
199 BUG_ON(!pte);
200 if (pte_none(*pte))
201 continue;
202 page = pte_page(*pte);
203 put_page(page);
204 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
205 pte_clear(mm, address+(i*PAGE_SIZE), pte);
206 pte++;
207 }
208 }
209 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
210 flush_tlb_range(vma, start, end);
211}
212
213int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
214{
215 struct mm_struct *mm = current->mm;
216 unsigned long addr;
217 int ret = 0;
218
219 BUG_ON(vma->vm_start & ~HPAGE_MASK);
220 BUG_ON(vma->vm_end & ~HPAGE_MASK);
221
222 spin_lock(&mm->page_table_lock);
223 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
224 unsigned long idx;
225 pte_t *pte = huge_pte_alloc(mm, addr);
226 struct page *page;
227
228 if (!pte) {
229 ret = -ENOMEM;
230 goto out;
231 }
232 if (!pte_none(*pte))
233 continue;
234
235 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
236 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
237 page = find_get_page(mapping, idx);
238 if (!page) {
239 /* charge the fs quota first */
240 if (hugetlb_get_quota(mapping)) {
241 ret = -ENOMEM;
242 goto out;
243 }
244 page = alloc_huge_page();
245 if (!page) {
246 hugetlb_put_quota(mapping);
247 ret = -ENOMEM;
248 goto out;
249 }
250 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
251 if (! ret) {
252 unlock_page(page);
253 } else {
254 hugetlb_put_quota(mapping);
255 free_huge_page(page);
256 goto out;
257 }
258 }
259 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
260 }
261out:
262 spin_unlock(&mm->page_table_lock);
263 return ret;
264}
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index bcad2aefa4ee..dcd9c8a8baf8 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
26 26
27static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 27pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
28{ 28{
29 pgd_t *pgd; 29 pgd_t *pgd;
30 pmd_t *pmd; 30 pmd_t *pmd;
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
39 return pte; 39 return pte;
40} 40}
41 41
42static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 42pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43{ 43{
44 pgd_t *pgd; 44 pgd_t *pgd;
45 pmd_t *pmd; 45 pmd_t *pmd;
@@ -80,6 +80,20 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
80 } 80 }
81} 81}
82 82
83pte_t huge_ptep_get_and_clear(pte_t *ptep)
84{
85 pte_t entry;
86
87 entry = *ptep;
88
89 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
90 pte_clear(pte);
91 pte++;
92 }
93
94 return entry;
95}
96
83/* 97/*
84 * This function checks for proper alignment of input addr and len parameters. 98 * This function checks for proper alignment of input addr and len parameters.
85 */ 99 */
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 5a1f831b2de1..625cbb336a23 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -22,7 +22,7 @@
22#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
23#include <asm/mmu_context.h> 23#include <asm/mmu_context.h>
24 24
25static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 25pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
26{ 26{
27 pgd_t *pgd; 27 pgd_t *pgd;
28 pud_t *pud; 28 pud_t *pud;
@@ -41,7 +41,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
41 return pte; 41 return pte;
42} 42}
43 43
44static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 44pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
45{ 45{
46 pgd_t *pgd; 46 pgd_t *pgd;
47 pud_t *pud; 47 pud_t *pud;
@@ -62,30 +62,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
62 62
63#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) 63#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
64 64
65static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, 65void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
66 unsigned long addr, 66 pte_t *ptep, pte_t entry)
67 struct page *page, pte_t * page_table, int write_access)
68{ 67{
69 unsigned long i; 68 int i;
70 pte_t entry; 69
70 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
71 set_pte_at(mm, addr, ptep, entry);
72 ptep++;
73 addr += PAGE_SIZE;
74 pte_val(entry) += PAGE_SIZE;
75 }
76}
71 77
72 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); 78pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
79 pte_t *ptep)
80{
81 pte_t entry;
82 int i;
73 83
74 if (write_access) 84 entry = *ptep;
75 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
76 vma->vm_page_prot)));
77 else
78 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
79 entry = pte_mkyoung(entry);
80 mk_pte_huge(entry);
81 85
82 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 86 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
83 set_pte_at(mm, addr, page_table, entry); 87 pte_clear(mm, addr, ptep);
84 page_table++;
85 addr += PAGE_SIZE; 88 addr += PAGE_SIZE;
86 89 ptep++;
87 pte_val(entry) += PAGE_SIZE;
88 } 90 }
91
92 return entry;
89} 93}
90 94
91/* 95/*
@@ -100,79 +104,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
100 return 0; 104 return 0;
101} 105}
102 106
103int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
104 struct vm_area_struct *vma)
105{
106 pte_t *src_pte, *dst_pte, entry;
107 struct page *ptepage;
108 unsigned long addr = vma->vm_start;
109 unsigned long end = vma->vm_end;
110 int i;
111
112 while (addr < end) {
113 dst_pte = huge_pte_alloc(dst, addr);
114 if (!dst_pte)
115 goto nomem;
116 src_pte = huge_pte_offset(src, addr);
117 BUG_ON(!src_pte || pte_none(*src_pte));
118 entry = *src_pte;
119 ptepage = pte_page(entry);
120 get_page(ptepage);
121 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
122 set_pte_at(dst, addr, dst_pte, entry);
123 pte_val(entry) += PAGE_SIZE;
124 dst_pte++;
125 addr += PAGE_SIZE;
126 }
127 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
128 }
129 return 0;
130
131nomem:
132 return -ENOMEM;
133}
134
135int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
136 struct page **pages, struct vm_area_struct **vmas,
137 unsigned long *position, int *length, int i)
138{
139 unsigned long vaddr = *position;
140 int remainder = *length;
141
142 WARN_ON(!is_vm_hugetlb_page(vma));
143
144 while (vaddr < vma->vm_end && remainder) {
145 if (pages) {
146 pte_t *pte;
147 struct page *page;
148
149 pte = huge_pte_offset(mm, vaddr);
150
151 /* hugetlb should be locked, and hence, prefaulted */
152 BUG_ON(!pte || pte_none(*pte));
153
154 page = pte_page(*pte);
155
156 WARN_ON(!PageCompound(page));
157
158 get_page(page);
159 pages[i] = page;
160 }
161
162 if (vmas)
163 vmas[i] = vma;
164
165 vaddr += PAGE_SIZE;
166 --remainder;
167 ++i;
168 }
169
170 *length = remainder;
171 *position = vaddr;
172
173 return i;
174}
175
176struct page *follow_huge_addr(struct mm_struct *mm, 107struct page *follow_huge_addr(struct mm_struct *mm,
177 unsigned long address, int write) 108 unsigned long address, int write)
178{ 109{
@@ -190,34 +121,6 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
190 return NULL; 121 return NULL;
191} 122}
192 123
193void unmap_hugepage_range(struct vm_area_struct *vma,
194 unsigned long start, unsigned long end)
195{
196 struct mm_struct *mm = vma->vm_mm;
197 unsigned long address;
198 pte_t *pte;
199 struct page *page;
200 int i;
201
202 BUG_ON(start & (HPAGE_SIZE - 1));
203 BUG_ON(end & (HPAGE_SIZE - 1));
204
205 for (address = start; address < end; address += HPAGE_SIZE) {
206 pte = huge_pte_offset(mm, address);
207 BUG_ON(!pte);
208 if (pte_none(*pte))
209 continue;
210 page = pte_page(*pte);
211 put_page(page);
212 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
213 pte_clear(mm, address+(i*PAGE_SIZE), pte);
214 pte++;
215 }
216 }
217 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
218 flush_tlb_range(vma, start, end);
219}
220
221static void context_reload(void *__data) 124static void context_reload(void *__data)
222{ 125{
223 struct mm_struct *mm = __data; 126 struct mm_struct *mm = __data;
@@ -226,12 +129,8 @@ static void context_reload(void *__data)
226 load_secondary_context(mm); 129 load_secondary_context(mm);
227} 130}
228 131
229int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) 132void hugetlb_prefault_arch_hook(struct mm_struct *mm)
230{ 133{
231 struct mm_struct *mm = current->mm;
232 unsigned long addr;
233 int ret = 0;
234
235 /* On UltraSPARC-III+ and later, configure the second half of 134 /* On UltraSPARC-III+ and later, configure the second half of
236 * the Data-TLB for huge pages. 135 * the Data-TLB for huge pages.
237 */ 136 */
@@ -261,50 +160,4 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
261 } 160 }
262 spin_unlock(&ctx_alloc_lock); 161 spin_unlock(&ctx_alloc_lock);
263 } 162 }
264
265 BUG_ON(vma->vm_start & ~HPAGE_MASK);
266 BUG_ON(vma->vm_end & ~HPAGE_MASK);
267
268 spin_lock(&mm->page_table_lock);
269 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
270 unsigned long idx;
271 pte_t *pte = huge_pte_alloc(mm, addr);
272 struct page *page;
273
274 if (!pte) {
275 ret = -ENOMEM;
276 goto out;
277 }
278 if (!pte_none(*pte))
279 continue;
280
281 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
282 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
283 page = find_get_page(mapping, idx);
284 if (!page) {
285 /* charge the fs quota first */
286 if (hugetlb_get_quota(mapping)) {
287 ret = -ENOMEM;
288 goto out;
289 }
290 page = alloc_huge_page();
291 if (!page) {
292 hugetlb_put_quota(mapping);
293 ret = -ENOMEM;
294 goto out;
295 }
296 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
297 if (! ret) {
298 unlock_page(page);
299 } else {
300 hugetlb_put_quota(mapping);
301 free_huge_page(page);
302 goto out;
303 }
304 }
305 set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
306 }
307out:
308 spin_unlock(&mm->page_table_lock);
309 return ret;
310} 163}