aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2005-06-21 20:14:44 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:15 -0400
commit63551ae0feaaa23807ebea60de1901564bbef32e (patch)
treef6f97f60f83c3e9813bdfcc6039c499997b1ea10
parent1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 (diff)
[PATCH] Hugepage consolidation
A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/mm/hugetlbpage.c170
-rw-r--r--arch/ia64/mm/hugetlbpage.c158
-rw-r--r--arch/ppc64/mm/hugetlbpage.c180
-rw-r--r--arch/sh/mm/hugetlbpage.c196
-rw-r--r--arch/sh64/mm/hugetlbpage.c18
-rw-r--r--arch/sparc64/mm/hugetlbpage.c195
-rw-r--r--include/asm-i386/page.h1
-rw-r--r--include/asm-i386/pgtable.h2
-rw-r--r--include/asm-ia64/pgtable.h1
-rw-r--r--include/asm-sh/page.h1
-rw-r--r--include/asm-sh/pgtable.h1
-rw-r--r--include/asm-sh64/page.h1
-rw-r--r--include/asm-sh64/pgtable.h2
-rw-r--r--include/asm-sparc64/page.h2
-rw-r--r--include/asm-sparc64/pgtable.h1
-rw-r--r--include/asm-x86_64/page.h1
-rw-r--r--include/asm-x86_64/pgtable.h3
-rw-r--r--include/linux/hugetlb.h40
-rw-r--r--mm/hugetlb.c177
19 files changed, 300 insertions, 850 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 171fc925e1e4..5aa06001a4bd 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -18,7 +18,7 @@
18#include <asm/tlb.h> 18#include <asm/tlb.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20 20
21static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 21pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22{ 22{
23 pgd_t *pgd; 23 pgd_t *pgd;
24 pud_t *pud; 24 pud_t *pud;
@@ -30,7 +30,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
30 return (pte_t *) pmd; 30 return (pte_t *) pmd;
31} 31}
32 32
33static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 33pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
34{ 34{
35 pgd_t *pgd; 35 pgd_t *pgd;
36 pud_t *pud; 36 pud_t *pud;
@@ -42,21 +42,6 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
42 return (pte_t *) pmd; 42 return (pte_t *) pmd;
43} 43}
44 44
45static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
46{
47 pte_t entry;
48
49 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
50 if (write_access) {
51 entry =
52 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
53 } else
54 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
55 entry = pte_mkyoung(entry);
56 mk_pte_huge(entry);
57 set_pte(page_table, entry);
58}
59
60/* 45/*
61 * This function checks for proper alignment of input addr and len parameters. 46 * This function checks for proper alignment of input addr and len parameters.
62 */ 47 */
@@ -69,77 +54,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
69 return 0; 54 return 0;
70} 55}
71 56
72int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
73 struct vm_area_struct *vma)
74{
75 pte_t *src_pte, *dst_pte, entry;
76 struct page *ptepage;
77 unsigned long addr = vma->vm_start;
78 unsigned long end = vma->vm_end;
79
80 while (addr < end) {
81 dst_pte = huge_pte_alloc(dst, addr);
82 if (!dst_pte)
83 goto nomem;
84 src_pte = huge_pte_offset(src, addr);
85 entry = *src_pte;
86 ptepage = pte_page(entry);
87 get_page(ptepage);
88 set_pte(dst_pte, entry);
89 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
90 addr += HPAGE_SIZE;
91 }
92 return 0;
93
94nomem:
95 return -ENOMEM;
96}
97
98int
99follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
100 struct page **pages, struct vm_area_struct **vmas,
101 unsigned long *position, int *length, int i)
102{
103 unsigned long vpfn, vaddr = *position;
104 int remainder = *length;
105
106 WARN_ON(!is_vm_hugetlb_page(vma));
107
108 vpfn = vaddr/PAGE_SIZE;
109 while (vaddr < vma->vm_end && remainder) {
110
111 if (pages) {
112 pte_t *pte;
113 struct page *page;
114
115 pte = huge_pte_offset(mm, vaddr);
116
117 /* hugetlb should be locked, and hence, prefaulted */
118 WARN_ON(!pte || pte_none(*pte));
119
120 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
121
122 WARN_ON(!PageCompound(page));
123
124 get_page(page);
125 pages[i] = page;
126 }
127
128 if (vmas)
129 vmas[i] = vma;
130
131 vaddr += PAGE_SIZE;
132 ++vpfn;
133 --remainder;
134 ++i;
135 }
136
137 *length = remainder;
138 *position = vaddr;
139
140 return i;
141}
142
143#if 0 /* This is just for testing */ 57#if 0 /* This is just for testing */
144struct page * 58struct page *
145follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 59follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -204,83 +118,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
204} 118}
205#endif 119#endif
206 120
207void unmap_hugepage_range(struct vm_area_struct *vma, 121void hugetlb_clean_stale_pgtable(pte_t *pte)
208 unsigned long start, unsigned long end)
209{ 122{
210 struct mm_struct *mm = vma->vm_mm; 123 pmd_t *pmd = (pmd_t *) pte;
211 unsigned long address;
212 pte_t pte, *ptep;
213 struct page *page; 124 struct page *page;
214 125
215 BUG_ON(start & (HPAGE_SIZE - 1)); 126 page = pmd_page(*pmd);
216 BUG_ON(end & (HPAGE_SIZE - 1)); 127 pmd_clear(pmd);
217 128 dec_page_state(nr_page_table_pages);
218 for (address = start; address < end; address += HPAGE_SIZE) { 129 page_cache_release(page);
219 ptep = huge_pte_offset(mm, address);
220 if (!ptep)
221 continue;
222 pte = ptep_get_and_clear(mm, address, ptep);
223 if (pte_none(pte))
224 continue;
225 page = pte_page(pte);
226 put_page(page);
227 }
228 add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT));
229 flush_tlb_range(vma, start, end);
230}
231
232int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
233{
234 struct mm_struct *mm = current->mm;
235 unsigned long addr;
236 int ret = 0;
237
238 BUG_ON(vma->vm_start & ~HPAGE_MASK);
239 BUG_ON(vma->vm_end & ~HPAGE_MASK);
240
241 spin_lock(&mm->page_table_lock);
242 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
243 unsigned long idx;
244 pte_t *pte = huge_pte_alloc(mm, addr);
245 struct page *page;
246
247 if (!pte) {
248 ret = -ENOMEM;
249 goto out;
250 }
251
252 if (!pte_none(*pte))
253 continue;
254
255 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
256 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
257 page = find_get_page(mapping, idx);
258 if (!page) {
259 /* charge the fs quota first */
260 if (hugetlb_get_quota(mapping)) {
261 ret = -ENOMEM;
262 goto out;
263 }
264 page = alloc_huge_page();
265 if (!page) {
266 hugetlb_put_quota(mapping);
267 ret = -ENOMEM;
268 goto out;
269 }
270 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
271 if (! ret) {
272 unlock_page(page);
273 } else {
274 hugetlb_put_quota(mapping);
275 free_huge_page(page);
276 goto out;
277 }
278 }
279 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
280 }
281out:
282 spin_unlock(&mm->page_table_lock);
283 return ret;
284} 130}
285 131
286/* x86_64 also uses this file */ 132/* x86_64 also uses this file */
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index df08ae7634b6..e0a776a3044c 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24 24
25unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; 25unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
26 26
27static pte_t * 27pte_t *
28huge_pte_alloc (struct mm_struct *mm, unsigned long addr) 28huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
29{ 29{
30 unsigned long taddr = htlbpage_to_page(addr); 30 unsigned long taddr = htlbpage_to_page(addr);
@@ -43,7 +43,7 @@ huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
43 return pte; 43 return pte;
44} 44}
45 45
46static pte_t * 46pte_t *
47huge_pte_offset (struct mm_struct *mm, unsigned long addr) 47huge_pte_offset (struct mm_struct *mm, unsigned long addr)
48{ 48{
49 unsigned long taddr = htlbpage_to_page(addr); 49 unsigned long taddr = htlbpage_to_page(addr);
@@ -67,23 +67,6 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
67 67
68#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } 68#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
69 69
70static void
71set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
72 struct page *page, pte_t * page_table, int write_access)
73{
74 pte_t entry;
75
76 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
77 if (write_access) {
78 entry =
79 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
80 } else
81 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
82 entry = pte_mkyoung(entry);
83 mk_pte_huge(entry);
84 set_pte(page_table, entry);
85 return;
86}
87/* 70/*
88 * This function checks for proper alignment of input addr and len parameters. 71 * This function checks for proper alignment of input addr and len parameters.
89 */ 72 */
@@ -99,68 +82,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
99 return 0; 82 return 0;
100} 83}
101 84
102int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
103 struct vm_area_struct *vma)
104{
105 pte_t *src_pte, *dst_pte, entry;
106 struct page *ptepage;
107 unsigned long addr = vma->vm_start;
108 unsigned long end = vma->vm_end;
109
110 while (addr < end) {
111 dst_pte = huge_pte_alloc(dst, addr);
112 if (!dst_pte)
113 goto nomem;
114 src_pte = huge_pte_offset(src, addr);
115 entry = *src_pte;
116 ptepage = pte_page(entry);
117 get_page(ptepage);
118 set_pte(dst_pte, entry);
119 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
120 addr += HPAGE_SIZE;
121 }
122 return 0;
123nomem:
124 return -ENOMEM;
125}
126
127int
128follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
129 struct page **pages, struct vm_area_struct **vmas,
130 unsigned long *st, int *length, int i)
131{
132 pte_t *ptep, pte;
133 unsigned long start = *st;
134 unsigned long pstart;
135 int len = *length;
136 struct page *page;
137
138 do {
139 pstart = start & HPAGE_MASK;
140 ptep = huge_pte_offset(mm, start);
141 pte = *ptep;
142
143back1:
144 page = pte_page(pte);
145 if (pages) {
146 page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
147 get_page(page);
148 pages[i] = page;
149 }
150 if (vmas)
151 vmas[i] = vma;
152 i++;
153 len--;
154 start += PAGE_SIZE;
155 if (((start & HPAGE_MASK) == pstart) && len &&
156 (start < vma->vm_end))
157 goto back1;
158 } while (len && start < vma->vm_end);
159 *length = len;
160 *st = start;
161 return i;
162}
163
164struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) 85struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
165{ 86{
166 struct page *page; 87 struct page *page;
@@ -212,81 +133,6 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
212 free_pgd_range(tlb, addr, end, floor, ceiling); 133 free_pgd_range(tlb, addr, end, floor, ceiling);
213} 134}
214 135
215void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
216{
217 struct mm_struct *mm = vma->vm_mm;
218 unsigned long address;
219 pte_t *pte;
220 struct page *page;
221
222 BUG_ON(start & (HPAGE_SIZE - 1));
223 BUG_ON(end & (HPAGE_SIZE - 1));
224
225 for (address = start; address < end; address += HPAGE_SIZE) {
226 pte = huge_pte_offset(mm, address);
227 if (pte_none(*pte))
228 continue;
229 page = pte_page(*pte);
230 put_page(page);
231 pte_clear(mm, address, pte);
232 }
233 add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT));
234 flush_tlb_range(vma, start, end);
235}
236
237int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
238{
239 struct mm_struct *mm = current->mm;
240 unsigned long addr;
241 int ret = 0;
242
243 BUG_ON(vma->vm_start & ~HPAGE_MASK);
244 BUG_ON(vma->vm_end & ~HPAGE_MASK);
245
246 spin_lock(&mm->page_table_lock);
247 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
248 unsigned long idx;
249 pte_t *pte = huge_pte_alloc(mm, addr);
250 struct page *page;
251
252 if (!pte) {
253 ret = -ENOMEM;
254 goto out;
255 }
256 if (!pte_none(*pte))
257 continue;
258
259 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
260 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
261 page = find_get_page(mapping, idx);
262 if (!page) {
263 /* charge the fs quota first */
264 if (hugetlb_get_quota(mapping)) {
265 ret = -ENOMEM;
266 goto out;
267 }
268 page = alloc_huge_page();
269 if (!page) {
270 hugetlb_put_quota(mapping);
271 ret = -ENOMEM;
272 goto out;
273 }
274 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
275 if (! ret) {
276 unlock_page(page);
277 } else {
278 hugetlb_put_quota(mapping);
279 page_cache_release(page);
280 goto out;
281 }
282 }
283 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
284 }
285out:
286 spin_unlock(&mm->page_table_lock);
287 return ret;
288}
289
290unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, 136unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
291 unsigned long pgoff, unsigned long flags) 137 unsigned long pgoff, unsigned long flags)
292{ 138{
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index d3bf86a5c1ad..b4ab766f5980 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr
121 return hugepte_offset(dir, addr); 121 return hugepte_offset(dir, addr);
122} 122}
123 123
124static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 124pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
125{ 125{
126 pud_t *pud; 126 pud_t *pud;
127 127
@@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
134 return hugepte_offset(pud, addr); 134 return hugepte_offset(pud, addr);
135} 135}
136 136
137static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 137pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
138{ 138{
139 pud_t *pud; 139 pud_t *pud;
140 140
@@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
147 return hugepte_alloc(mm, pud, addr); 147 return hugepte_alloc(mm, pud, addr);
148} 148}
149 149
150static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
151 unsigned long addr, struct page *page,
152 pte_t *ptep, int write_access)
153{
154 pte_t entry;
155
156 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
157 if (write_access) {
158 entry =
159 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
160 } else {
161 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
162 }
163 entry = pte_mkyoung(entry);
164 entry = pte_mkhuge(entry);
165
166 set_pte_at(mm, addr, ptep, entry);
167}
168
169/* 150/*
170 * This function checks for proper alignment of input addr and len parameters. 151 * This function checks for proper alignment of input addr and len parameters.
171 */ 152 */
@@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len)
259 return -EINVAL; 240 return -EINVAL;
260} 241}
261 242
262int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
263 struct vm_area_struct *vma)
264{
265 pte_t *src_pte, *dst_pte, entry;
266 struct page *ptepage;
267 unsigned long addr = vma->vm_start;
268 unsigned long end = vma->vm_end;
269 int err = -ENOMEM;
270
271 while (addr < end) {
272 dst_pte = huge_pte_alloc(dst, addr);
273 if (!dst_pte)
274 goto out;
275
276 src_pte = huge_pte_offset(src, addr);
277 entry = *src_pte;
278
279 ptepage = pte_page(entry);
280 get_page(ptepage);
281 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
282 set_pte_at(dst, addr, dst_pte, entry);
283
284 addr += HPAGE_SIZE;
285 }
286
287 err = 0;
288 out:
289 return err;
290}
291
292int
293follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
294 struct page **pages, struct vm_area_struct **vmas,
295 unsigned long *position, int *length, int i)
296{
297 unsigned long vpfn, vaddr = *position;
298 int remainder = *length;
299
300 WARN_ON(!is_vm_hugetlb_page(vma));
301
302 vpfn = vaddr/PAGE_SIZE;
303 while (vaddr < vma->vm_end && remainder) {
304 if (pages) {
305 pte_t *pte;
306 struct page *page;
307
308 pte = huge_pte_offset(mm, vaddr);
309
310 /* hugetlb should be locked, and hence, prefaulted */
311 WARN_ON(!pte || pte_none(*pte));
312
313 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
314
315 WARN_ON(!PageCompound(page));
316
317 get_page(page);
318 pages[i] = page;
319 }
320
321 if (vmas)
322 vmas[i] = vma;
323
324 vaddr += PAGE_SIZE;
325 ++vpfn;
326 --remainder;
327 ++i;
328 }
329
330 *length = remainder;
331 *position = vaddr;
332
333 return i;
334}
335
336struct page * 243struct page *
337follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 244follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
338{ 245{
@@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
363 return NULL; 270 return NULL;
364} 271}
365 272
366void unmap_hugepage_range(struct vm_area_struct *vma,
367 unsigned long start, unsigned long end)
368{
369 struct mm_struct *mm = vma->vm_mm;
370 unsigned long addr;
371 pte_t *ptep;
372 struct page *page;
373
374 WARN_ON(!is_vm_hugetlb_page(vma));
375 BUG_ON((start % HPAGE_SIZE) != 0);
376 BUG_ON((end % HPAGE_SIZE) != 0);
377
378 for (addr = start; addr < end; addr += HPAGE_SIZE) {
379 pte_t pte;
380
381 ptep = huge_pte_offset(mm, addr);
382 if (!ptep || pte_none(*ptep))
383 continue;
384
385 pte = *ptep;
386 page = pte_page(pte);
387 pte_clear(mm, addr, ptep);
388
389 put_page(page);
390 }
391 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
392 flush_tlb_pending();
393}
394
395int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
396{
397 struct mm_struct *mm = current->mm;
398 unsigned long addr;
399 int ret = 0;
400
401 WARN_ON(!is_vm_hugetlb_page(vma));
402 BUG_ON((vma->vm_start % HPAGE_SIZE) != 0);
403 BUG_ON((vma->vm_end % HPAGE_SIZE) != 0);
404
405 spin_lock(&mm->page_table_lock);
406 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
407 unsigned long idx;
408 pte_t *pte = huge_pte_alloc(mm, addr);
409 struct page *page;
410
411 if (!pte) {
412 ret = -ENOMEM;
413 goto out;
414 }
415 if (! pte_none(*pte))
416 continue;
417
418 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
419 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
420 page = find_get_page(mapping, idx);
421 if (!page) {
422 /* charge the fs quota first */
423 if (hugetlb_get_quota(mapping)) {
424 ret = -ENOMEM;
425 goto out;
426 }
427 page = alloc_huge_page();
428 if (!page) {
429 hugetlb_put_quota(mapping);
430 ret = -ENOMEM;
431 goto out;
432 }
433 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
434 if (! ret) {
435 unlock_page(page);
436 } else {
437 hugetlb_put_quota(mapping);
438 free_huge_page(page);
439 goto out;
440 }
441 }
442 set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
443 }
444out:
445 spin_unlock(&mm->page_table_lock);
446 return ret;
447}
448
449/* Because we have an exclusive hugepage region which lies within the 273/* Because we have an exclusive hugepage region which lies within the
450 * normal user address space, we have to take special measures to make 274 * normal user address space, we have to take special measures to make
451 * non-huge mmap()s evade the hugepage reserved regions. */ 275 * non-huge mmap()s evade the hugepage reserved regions. */
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 1f897bab2318..95bb1a6c6060 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
26 26
27static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 27pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
28{ 28{
29 pgd_t *pgd; 29 pgd_t *pgd;
30 pmd_t *pmd; 30 pmd_t *pmd;
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
39 return pte; 39 return pte;
40} 40}
41 41
42static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 42pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43{ 43{
44 pgd_t *pgd; 44 pgd_t *pgd;
45 pmd_t *pmd; 45 pmd_t *pmd;
@@ -56,28 +56,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
56 56
57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) 57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
58 58
59static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, 59void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
60 struct page *page, pte_t * page_table, int write_access) 60 pte_t *ptep, pte_t entry)
61{ 61{
62 unsigned long i; 62 int i;
63 pte_t entry;
64 63
65 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); 64 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
65 set_pte_at(mm, addr, ptep, entry);
66 ptep++;
67 addr += PAGE_SIZE;
68 pte_val(entry) += PAGE_SIZE;
69 }
70}
66 71
67 if (write_access) 72pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
68 entry = pte_mkwrite(pte_mkdirty(mk_pte(page, 73 pte_t *ptep)
69 vma->vm_page_prot))); 74{
70 else 75 pte_t entry;
71 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 76 int i;
72 entry = pte_mkyoung(entry);
73 mk_pte_huge(entry);
74 77
75 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 78 entry = *ptep;
76 set_pte(page_table, entry);
77 page_table++;
78 79
79 pte_val(entry) += PAGE_SIZE; 80 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
81 pte_clear(mm, addr, ptep);
82 addr += PAGE_SIZE;
83 ptep++;
80 } 84 }
85
86 return entry;
81} 87}
82 88
83/* 89/*
@@ -92,79 +98,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
92 return 0; 98 return 0;
93} 99}
94 100
95int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
96 struct vm_area_struct *vma)
97{
98 pte_t *src_pte, *dst_pte, entry;
99 struct page *ptepage;
100 unsigned long addr = vma->vm_start;
101 unsigned long end = vma->vm_end;
102 int i;
103
104 while (addr < end) {
105 dst_pte = huge_pte_alloc(dst, addr);
106 if (!dst_pte)
107 goto nomem;
108 src_pte = huge_pte_offset(src, addr);
109 BUG_ON(!src_pte || pte_none(*src_pte));
110 entry = *src_pte;
111 ptepage = pte_page(entry);
112 get_page(ptepage);
113 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
114 set_pte(dst_pte, entry);
115 pte_val(entry) += PAGE_SIZE;
116 dst_pte++;
117 }
118 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
119 addr += HPAGE_SIZE;
120 }
121 return 0;
122
123nomem:
124 return -ENOMEM;
125}
126
127int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
128 struct page **pages, struct vm_area_struct **vmas,
129 unsigned long *position, int *length, int i)
130{
131 unsigned long vaddr = *position;
132 int remainder = *length;
133
134 WARN_ON(!is_vm_hugetlb_page(vma));
135
136 while (vaddr < vma->vm_end && remainder) {
137 if (pages) {
138 pte_t *pte;
139 struct page *page;
140
141 pte = huge_pte_offset(mm, vaddr);
142
143 /* hugetlb should be locked, and hence, prefaulted */
144 BUG_ON(!pte || pte_none(*pte));
145
146 page = pte_page(*pte);
147
148 WARN_ON(!PageCompound(page));
149
150 get_page(page);
151 pages[i] = page;
152 }
153
154 if (vmas)
155 vmas[i] = vma;
156
157 vaddr += PAGE_SIZE;
158 --remainder;
159 ++i;
160 }
161
162 *length = remainder;
163 *position = vaddr;
164
165 return i;
166}
167
168struct page *follow_huge_addr(struct mm_struct *mm, 101struct page *follow_huge_addr(struct mm_struct *mm,
169 unsigned long address, int write) 102 unsigned long address, int write)
170{ 103{
@@ -181,84 +114,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
181{ 114{
182 return NULL; 115 return NULL;
183} 116}
184
185void unmap_hugepage_range(struct vm_area_struct *vma,
186 unsigned long start, unsigned long end)
187{
188 struct mm_struct *mm = vma->vm_mm;
189 unsigned long address;
190 pte_t *pte;
191 struct page *page;
192 int i;
193
194 BUG_ON(start & (HPAGE_SIZE - 1));
195 BUG_ON(end & (HPAGE_SIZE - 1));
196
197 for (address = start; address < end; address += HPAGE_SIZE) {
198 pte = huge_pte_offset(mm, address);
199 BUG_ON(!pte);
200 if (pte_none(*pte))
201 continue;
202 page = pte_page(*pte);
203 put_page(page);
204 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
205 pte_clear(mm, address+(i*PAGE_SIZE), pte);
206 pte++;
207 }
208 }
209 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
210 flush_tlb_range(vma, start, end);
211}
212
213int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
214{
215 struct mm_struct *mm = current->mm;
216 unsigned long addr;
217 int ret = 0;
218
219 BUG_ON(vma->vm_start & ~HPAGE_MASK);
220 BUG_ON(vma->vm_end & ~HPAGE_MASK);
221
222 spin_lock(&mm->page_table_lock);
223 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
224 unsigned long idx;
225 pte_t *pte = huge_pte_alloc(mm, addr);
226 struct page *page;
227
228 if (!pte) {
229 ret = -ENOMEM;
230 goto out;
231 }
232 if (!pte_none(*pte))
233 continue;
234
235 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
236 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
237 page = find_get_page(mapping, idx);
238 if (!page) {
239 /* charge the fs quota first */
240 if (hugetlb_get_quota(mapping)) {
241 ret = -ENOMEM;
242 goto out;
243 }
244 page = alloc_huge_page();
245 if (!page) {
246 hugetlb_put_quota(mapping);
247 ret = -ENOMEM;
248 goto out;
249 }
250 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
251 if (! ret) {
252 unlock_page(page);
253 } else {
254 hugetlb_put_quota(mapping);
255 free_huge_page(page);
256 goto out;
257 }
258 }
259 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
260 }
261out:
262 spin_unlock(&mm->page_table_lock);
263 return ret;
264}
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index bcad2aefa4ee..dcd9c8a8baf8 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
26 26
27static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 27pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
28{ 28{
29 pgd_t *pgd; 29 pgd_t *pgd;
30 pmd_t *pmd; 30 pmd_t *pmd;
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
39 return pte; 39 return pte;
40} 40}
41 41
42static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 42pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43{ 43{
44 pgd_t *pgd; 44 pgd_t *pgd;
45 pmd_t *pmd; 45 pmd_t *pmd;
@@ -80,6 +80,20 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
80 } 80 }
81} 81}
82 82
83pte_t huge_ptep_get_and_clear(pte_t *ptep)
84{
85 pte_t entry;
86
87 entry = *ptep;
88
89 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
90 pte_clear(pte);
91 pte++;
92 }
93
94 return entry;
95}
96
83/* 97/*
84 * This function checks for proper alignment of input addr and len parameters. 98 * This function checks for proper alignment of input addr and len parameters.
85 */ 99 */
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 5a1f831b2de1..625cbb336a23 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -22,7 +22,7 @@
22#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
23#include <asm/mmu_context.h> 23#include <asm/mmu_context.h>
24 24
25static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 25pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
26{ 26{
27 pgd_t *pgd; 27 pgd_t *pgd;
28 pud_t *pud; 28 pud_t *pud;
@@ -41,7 +41,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
41 return pte; 41 return pte;
42} 42}
43 43
44static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 44pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
45{ 45{
46 pgd_t *pgd; 46 pgd_t *pgd;
47 pud_t *pud; 47 pud_t *pud;
@@ -62,30 +62,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
62 62
63#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) 63#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
64 64
65static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, 65void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
66 unsigned long addr, 66 pte_t *ptep, pte_t entry)
67 struct page *page, pte_t * page_table, int write_access)
68{ 67{
69 unsigned long i; 68 int i;
70 pte_t entry; 69
70 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
71 set_pte_at(mm, addr, ptep, entry);
72 ptep++;
73 addr += PAGE_SIZE;
74 pte_val(entry) += PAGE_SIZE;
75 }
76}
71 77
72 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); 78pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
79 pte_t *ptep)
80{
81 pte_t entry;
82 int i;
73 83
74 if (write_access) 84 entry = *ptep;
75 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
76 vma->vm_page_prot)));
77 else
78 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
79 entry = pte_mkyoung(entry);
80 mk_pte_huge(entry);
81 85
82 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 86 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
83 set_pte_at(mm, addr, page_table, entry); 87 pte_clear(mm, addr, ptep);
84 page_table++;
85 addr += PAGE_SIZE; 88 addr += PAGE_SIZE;
86 89 ptep++;
87 pte_val(entry) += PAGE_SIZE;
88 } 90 }
91
92 return entry;
89} 93}
90 94
91/* 95/*
@@ -100,79 +104,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
100 return 0; 104 return 0;
101} 105}
102 106
103int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
104 struct vm_area_struct *vma)
105{
106 pte_t *src_pte, *dst_pte, entry;
107 struct page *ptepage;
108 unsigned long addr = vma->vm_start;
109 unsigned long end = vma->vm_end;
110 int i;
111
112 while (addr < end) {
113 dst_pte = huge_pte_alloc(dst, addr);
114 if (!dst_pte)
115 goto nomem;
116 src_pte = huge_pte_offset(src, addr);
117 BUG_ON(!src_pte || pte_none(*src_pte));
118 entry = *src_pte;
119 ptepage = pte_page(entry);
120 get_page(ptepage);
121 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
122 set_pte_at(dst, addr, dst_pte, entry);
123 pte_val(entry) += PAGE_SIZE;
124 dst_pte++;
125 addr += PAGE_SIZE;
126 }
127 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
128 }
129 return 0;
130
131nomem:
132 return -ENOMEM;
133}
134
135int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
136 struct page **pages, struct vm_area_struct **vmas,
137 unsigned long *position, int *length, int i)
138{
139 unsigned long vaddr = *position;
140 int remainder = *length;
141
142 WARN_ON(!is_vm_hugetlb_page(vma));
143
144 while (vaddr < vma->vm_end && remainder) {
145 if (pages) {
146 pte_t *pte;
147 struct page *page;
148
149 pte = huge_pte_offset(mm, vaddr);
150
151 /* hugetlb should be locked, and hence, prefaulted */
152 BUG_ON(!pte || pte_none(*pte));
153
154 page = pte_page(*pte);
155
156 WARN_ON(!PageCompound(page));
157
158 get_page(page);
159 pages[i] = page;
160 }
161
162 if (vmas)
163 vmas[i] = vma;
164
165 vaddr += PAGE_SIZE;
166 --remainder;
167 ++i;
168 }
169
170 *length = remainder;
171 *position = vaddr;
172
173 return i;
174}
175
176struct page *follow_huge_addr(struct mm_struct *mm, 107struct page *follow_huge_addr(struct mm_struct *mm,
177 unsigned long address, int write) 108 unsigned long address, int write)
178{ 109{
@@ -190,34 +121,6 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
190 return NULL; 121 return NULL;
191} 122}
192 123
193void unmap_hugepage_range(struct vm_area_struct *vma,
194 unsigned long start, unsigned long end)
195{
196 struct mm_struct *mm = vma->vm_mm;
197 unsigned long address;
198 pte_t *pte;
199 struct page *page;
200 int i;
201
202 BUG_ON(start & (HPAGE_SIZE - 1));
203 BUG_ON(end & (HPAGE_SIZE - 1));
204
205 for (address = start; address < end; address += HPAGE_SIZE) {
206 pte = huge_pte_offset(mm, address);
207 BUG_ON(!pte);
208 if (pte_none(*pte))
209 continue;
210 page = pte_page(*pte);
211 put_page(page);
212 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
213 pte_clear(mm, address+(i*PAGE_SIZE), pte);
214 pte++;
215 }
216 }
217 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
218 flush_tlb_range(vma, start, end);
219}
220
221static void context_reload(void *__data) 124static void context_reload(void *__data)
222{ 125{
223 struct mm_struct *mm = __data; 126 struct mm_struct *mm = __data;
@@ -226,12 +129,8 @@ static void context_reload(void *__data)
226 load_secondary_context(mm); 129 load_secondary_context(mm);
227} 130}
228 131
229int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) 132void hugetlb_prefault_arch_hook(struct mm_struct *mm)
230{ 133{
231 struct mm_struct *mm = current->mm;
232 unsigned long addr;
233 int ret = 0;
234
235 /* On UltraSPARC-III+ and later, configure the second half of 134 /* On UltraSPARC-III+ and later, configure the second half of
236 * the Data-TLB for huge pages. 135 * the Data-TLB for huge pages.
237 */ 136 */
@@ -261,50 +160,4 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
261 } 160 }
262 spin_unlock(&ctx_alloc_lock); 161 spin_unlock(&ctx_alloc_lock);
263 } 162 }
264
265 BUG_ON(vma->vm_start & ~HPAGE_MASK);
266 BUG_ON(vma->vm_end & ~HPAGE_MASK);
267
268 spin_lock(&mm->page_table_lock);
269 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
270 unsigned long idx;
271 pte_t *pte = huge_pte_alloc(mm, addr);
272 struct page *page;
273
274 if (!pte) {
275 ret = -ENOMEM;
276 goto out;
277 }
278 if (!pte_none(*pte))
279 continue;
280
281 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
282 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
283 page = find_get_page(mapping, idx);
284 if (!page) {
285 /* charge the fs quota first */
286 if (hugetlb_get_quota(mapping)) {
287 ret = -ENOMEM;
288 goto out;
289 }
290 page = alloc_huge_page();
291 if (!page) {
292 hugetlb_put_quota(mapping);
293 ret = -ENOMEM;
294 goto out;
295 }
296 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
297 if (! ret) {
298 unlock_page(page);
299 } else {
300 hugetlb_put_quota(mapping);
301 free_huge_page(page);
302 goto out;
303 }
304 }
305 set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
306 }
307out:
308 spin_unlock(&mm->page_table_lock);
309 return ret;
310} 163}
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index ed13969fa2d6..41400d342d44 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -68,6 +68,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
68#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 68#define HPAGE_MASK (~(HPAGE_SIZE - 1))
69#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 69#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
70#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 70#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
71#define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE
71#endif 72#endif
72 73
73#define pgd_val(x) ((x).pgd) 74#define pgd_val(x) ((x).pgd)
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 8d60c2b4b003..e9efe148fdf7 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -236,6 +236,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return
236static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } 236static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
237static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } 237static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
238static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } 238static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
239static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; }
239 240
240#ifdef CONFIG_X86_PAE 241#ifdef CONFIG_X86_PAE
241# include <asm/pgtable-3level.h> 242# include <asm/pgtable-3level.h>
@@ -275,7 +276,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
275 */ 276 */
276 277
277#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) 278#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
278#define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
279 279
280static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 280static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
281{ 281{
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index fcc9c3344ab4..48586e08f432 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -283,6 +283,7 @@ ia64_phys_addr_valid (unsigned long addr)
283#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) 283#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
284#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) 284#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
285#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) 285#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
286#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P))
286 287
287/* 288/*
288 * Macro to a page protection value as "uncacheable". Note that "protection" is really a 289 * Macro to a page protection value as "uncacheable". Note that "protection" is really a
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index 4c6d129e7d91..180467be8e7b 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -31,6 +31,7 @@
31#define HPAGE_SIZE (1UL << HPAGE_SHIFT) 31#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
32#define HPAGE_MASK (~(HPAGE_SIZE-1)) 32#define HPAGE_MASK (~(HPAGE_SIZE-1))
33#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) 33#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT)
34#define ARCH_HAS_SETCLEAR_HUGE_PTE
34#endif 35#endif
35 36
36#ifdef __KERNEL__ 37#ifdef __KERNEL__
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index cd847a47a9aa..ecb909572d3f 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -196,6 +196,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _
196static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } 196static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
197static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } 197static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
198static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } 198static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
199static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; }
199 200
200/* 201/*
201 * Macro and implementation to make a page protection as uncachable. 202 * Macro and implementation to make a page protection as uncachable.
diff --git a/include/asm-sh64/page.h b/include/asm-sh64/page.h
index e1f7f5a41210..d6167f1c0e99 100644
--- a/include/asm-sh64/page.h
+++ b/include/asm-sh64/page.h
@@ -41,6 +41,7 @@
41#define HPAGE_SIZE (1UL << HPAGE_SHIFT) 41#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
42#define HPAGE_MASK (~(HPAGE_SIZE-1)) 42#define HPAGE_MASK (~(HPAGE_SIZE-1))
43#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) 43#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT)
44#define ARCH_HAS_SETCLEAR_HUGE_PTE
44#endif 45#endif
45 46
46#ifdef __KERNEL__ 47#ifdef __KERNEL__
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h
index 525e1523ef5f..78ac6be2d9ef 100644
--- a/include/asm-sh64/pgtable.h
+++ b/include/asm-sh64/pgtable.h
@@ -430,6 +430,8 @@ extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) |
430extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); return pte; } 430extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); return pte; }
431extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } 431extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
432extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } 432extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
433extern inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; }
434
433 435
434/* 436/*
435 * Conversion functions: convert a page and protection to a page entry. 437 * Conversion functions: convert a page and protection to a page entry.
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 219ea043a14a..b87dbbd64bc9 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -95,6 +95,8 @@ typedef unsigned long pgprot_t;
95#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) 95#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
96#define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) 96#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
97#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 97#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
98#define ARCH_HAS_SETCLEAR_HUGE_PTE
99#define ARCH_HAS_HUGETLB_PREFAULT_HOOK
98#endif 100#endif
99 101
100#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ 102#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index ae2cd5b09a7c..1ae00c5087f1 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -286,6 +286,7 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
286#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) 286#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R))
287#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) 287#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE))
288#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) 288#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W))
289#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE))
289 290
290/* to find an entry in a page-table-directory. */ 291/* to find an entry in a page-table-directory. */
291#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) 292#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index f43048035a03..9ce338c3a71e 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -28,6 +28,7 @@
28#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 28#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
29#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 29#define HPAGE_MASK (~(HPAGE_SIZE - 1))
30#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 30#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
31#define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE
31 32
32#ifdef __KERNEL__ 33#ifdef __KERNEL__
33#ifndef __ASSEMBLY__ 34#ifndef __ASSEMBLY__
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index db2a0efbf573..4eec176c3c39 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -253,6 +253,7 @@ extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;
253extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } 253extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
254static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } 254static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
255 255
256#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
256extern inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } 257extern inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
257extern inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } 258extern inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
258extern inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } 259extern inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; }
@@ -263,6 +264,7 @@ extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _
263extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } 264extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
264extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } 265extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
265extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } 266extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
267extern inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; }
266 268
267struct vm_area_struct; 269struct vm_area_struct;
268 270
@@ -290,7 +292,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
290 */ 292 */
291#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) 293#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
292 294
293#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
294static inline int pmd_large(pmd_t pte) { 295static inline int pmd_large(pmd_t pte) {
295 return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; 296 return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE;
296} 297}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6af1ae4a8211..f529d1442815 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -4,6 +4,7 @@
4#ifdef CONFIG_HUGETLB_PAGE 4#ifdef CONFIG_HUGETLB_PAGE
5 5
6#include <linux/mempolicy.h> 6#include <linux/mempolicy.h>
7#include <asm/tlbflush.h>
7 8
8struct ctl_table; 9struct ctl_table;
9 10
@@ -22,12 +23,6 @@ int hugetlb_report_meminfo(char *);
22int hugetlb_report_node_meminfo(int, char *); 23int hugetlb_report_node_meminfo(int, char *);
23int is_hugepage_mem_enough(size_t); 24int is_hugepage_mem_enough(size_t);
24unsigned long hugetlb_total_pages(void); 25unsigned long hugetlb_total_pages(void);
25struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
26 int write);
27struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
28 pmd_t *pmd, int write);
29int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
30int pmd_huge(pmd_t pmd);
31struct page *alloc_huge_page(void); 26struct page *alloc_huge_page(void);
32void free_huge_page(struct page *); 27void free_huge_page(struct page *);
33 28
@@ -35,6 +30,17 @@ extern unsigned long max_huge_pages;
35extern const unsigned long hugetlb_zero, hugetlb_infinity; 30extern const unsigned long hugetlb_zero, hugetlb_infinity;
36extern int sysctl_hugetlb_shm_group; 31extern int sysctl_hugetlb_shm_group;
37 32
33/* arch callbacks */
34
35pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
36pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
37struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
38 int write);
39struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
40 pmd_t *pmd, int write);
41int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
42int pmd_huge(pmd_t pmd);
43
38#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE 44#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
39#define is_hugepage_only_range(mm, addr, len) 0 45#define is_hugepage_only_range(mm, addr, len) 0
40#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 46#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
@@ -48,6 +54,28 @@ extern int sysctl_hugetlb_shm_group;
48int prepare_hugepage_range(unsigned long addr, unsigned long len); 54int prepare_hugepage_range(unsigned long addr, unsigned long len);
49#endif 55#endif
50 56
57#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
58#define set_huge_pte_at(mm, addr, ptep, pte) set_pte_at(mm, addr, ptep, pte)
59#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep)
60#else
61void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
62 pte_t *ptep, pte_t pte);
63pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
64 pte_t *ptep);
65#endif
66
67#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK
68#define hugetlb_prefault_arch_hook(mm) do { } while (0)
69#else
70void hugetlb_prefault_arch_hook(struct mm_struct *mm);
71#endif
72
73#ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE
74#define hugetlb_clean_stale_pgtable(pte) BUG()
75#else
76void hugetlb_clean_stale_pgtable(pte_t *pte);
77#endif
78
51#else /* !CONFIG_HUGETLB_PAGE */ 79#else /* !CONFIG_HUGETLB_PAGE */
52 80
53static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) 81static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4eb5ae3fbe10..fbd1111ea119 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7,10 +7,14 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/hugetlb.h>
11#include <linux/sysctl.h> 10#include <linux/sysctl.h>
12#include <linux/highmem.h> 11#include <linux/highmem.h>
13#include <linux/nodemask.h> 12#include <linux/nodemask.h>
13#include <linux/pagemap.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
16
17#include <linux/hugetlb.h>
14 18
15const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 19const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
16static unsigned long nr_huge_pages, free_huge_pages; 20static unsigned long nr_huge_pages, free_huge_pages;
@@ -249,6 +253,72 @@ struct vm_operations_struct hugetlb_vm_ops = {
249 .nopage = hugetlb_nopage, 253 .nopage = hugetlb_nopage,
250}; 254};
251 255
256static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page)
257{
258 pte_t entry;
259
260 if (vma->vm_flags & VM_WRITE) {
261 entry =
262 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
263 } else {
264 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
265 }
266 entry = pte_mkyoung(entry);
267 entry = pte_mkhuge(entry);
268
269 return entry;
270}
271
272int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
273 struct vm_area_struct *vma)
274{
275 pte_t *src_pte, *dst_pte, entry;
276 struct page *ptepage;
277 unsigned long addr = vma->vm_start;
278 unsigned long end = vma->vm_end;
279
280 while (addr < end) {
281 dst_pte = huge_pte_alloc(dst, addr);
282 if (!dst_pte)
283 goto nomem;
284 src_pte = huge_pte_offset(src, addr);
285 BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */
286 entry = *src_pte;
287 ptepage = pte_page(entry);
288 get_page(ptepage);
289 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
290 set_huge_pte_at(dst, addr, dst_pte, entry);
291 addr += HPAGE_SIZE;
292 }
293 return 0;
294
295nomem:
296 return -ENOMEM;
297}
298
299void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
300 unsigned long end)
301{
302 struct mm_struct *mm = vma->vm_mm;
303 unsigned long address;
304 pte_t pte;
305 struct page *page;
306
307 WARN_ON(!is_vm_hugetlb_page(vma));
308 BUG_ON(start & ~HPAGE_MASK);
309 BUG_ON(end & ~HPAGE_MASK);
310
311 for (address = start; address < end; address += HPAGE_SIZE) {
312 pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address));
313 if (pte_none(pte))
314 continue;
315 page = pte_page(pte);
316 put_page(page);
317 }
318 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
319 flush_tlb_range(vma, start, end);
320}
321
252void zap_hugepage_range(struct vm_area_struct *vma, 322void zap_hugepage_range(struct vm_area_struct *vma,
253 unsigned long start, unsigned long length) 323 unsigned long start, unsigned long length)
254{ 324{
@@ -258,3 +328,108 @@ void zap_hugepage_range(struct vm_area_struct *vma,
258 unmap_hugepage_range(vma, start, start + length); 328 unmap_hugepage_range(vma, start, start + length);
259 spin_unlock(&mm->page_table_lock); 329 spin_unlock(&mm->page_table_lock);
260} 330}
331
332int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
333{
334 struct mm_struct *mm = current->mm;
335 unsigned long addr;
336 int ret = 0;
337
338 WARN_ON(!is_vm_hugetlb_page(vma));
339 BUG_ON(vma->vm_start & ~HPAGE_MASK);
340 BUG_ON(vma->vm_end & ~HPAGE_MASK);
341
342 hugetlb_prefault_arch_hook(mm);
343
344 spin_lock(&mm->page_table_lock);
345 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
346 unsigned long idx;
347 pte_t *pte = huge_pte_alloc(mm, addr);
348 struct page *page;
349
350 if (!pte) {
351 ret = -ENOMEM;
352 goto out;
353 }
354 if (! pte_none(*pte))
355 hugetlb_clean_stale_pgtable(pte);
356
357 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
358 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
359 page = find_get_page(mapping, idx);
360 if (!page) {
361 /* charge the fs quota first */
362 if (hugetlb_get_quota(mapping)) {
363 ret = -ENOMEM;
364 goto out;
365 }
366 page = alloc_huge_page();
367 if (!page) {
368 hugetlb_put_quota(mapping);
369 ret = -ENOMEM;
370 goto out;
371 }
372 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
373 if (! ret) {
374 unlock_page(page);
375 } else {
376 hugetlb_put_quota(mapping);
377 free_huge_page(page);
378 goto out;
379 }
380 }
381 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
382 set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page));
383 }
384out:
385 spin_unlock(&mm->page_table_lock);
386 return ret;
387}
388
389int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
390 struct page **pages, struct vm_area_struct **vmas,
391 unsigned long *position, int *length, int i)
392{
393 unsigned long vpfn, vaddr = *position;
394 int remainder = *length;
395
396 BUG_ON(!is_vm_hugetlb_page(vma));
397
398 vpfn = vaddr/PAGE_SIZE;
399 while (vaddr < vma->vm_end && remainder) {
400
401 if (pages) {
402 pte_t *pte;
403 struct page *page;
404
405 /* Some archs (sparc64, sh*) have multiple
406 * pte_ts to each hugepage. We have to make
407 * sure we get the first, for the page
408 * indexing below to work. */
409 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
410
411 /* hugetlb should be locked, and hence, prefaulted */
412 WARN_ON(!pte || pte_none(*pte));
413
414 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
415
416 WARN_ON(!PageCompound(page));
417
418 get_page(page);
419 pages[i] = page;
420 }
421
422 if (vmas)
423 vmas[i] = vma;
424
425 vaddr += PAGE_SIZE;
426 ++vpfn;
427 --remainder;
428 ++i;
429 }
430
431 *length = remainder;
432 *position = vaddr;
433
434 return i;
435}