diff options
author | David Gibson <david@gibson.dropbear.id.au> | 2005-06-21 20:14:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:15 -0400 |
commit | 63551ae0feaaa23807ebea60de1901564bbef32e (patch) | |
tree | f6f97f60f83c3e9813bdfcc6039c499997b1ea10 /mm/hugetlb.c | |
parent | 1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 (diff) |
[PATCH] Hugepage consolidation
A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch
attempts to consolidate a lot of the code across the arch's, putting the
combined version in mm/hugetlb.c. There are a couple of uglyish hacks in
order to covert all the hugepage archs, but the result is a very large
reduction in the total amount of code. It also means things like hugepage
lazy allocation could be implemented in one place, instead of six.
Tested, at least a little, on ppc64, i386 and x86_64.
Notes:
- this patch changes the meaning of set_huge_pte() to be more
analagous to set_pte()
- does SH4 need s special huge_ptep_get_and_clear()??
Acked-by: William Lee Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 177 |
1 files changed, 176 insertions, 1 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4eb5ae3fbe10..fbd1111ea119 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -7,10 +7,14 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/hugetlb.h> | ||
11 | #include <linux/sysctl.h> | 10 | #include <linux/sysctl.h> |
12 | #include <linux/highmem.h> | 11 | #include <linux/highmem.h> |
13 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
13 | #include <linux/pagemap.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | |||
17 | #include <linux/hugetlb.h> | ||
14 | 18 | ||
15 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 19 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
16 | static unsigned long nr_huge_pages, free_huge_pages; | 20 | static unsigned long nr_huge_pages, free_huge_pages; |
@@ -249,6 +253,72 @@ struct vm_operations_struct hugetlb_vm_ops = { | |||
249 | .nopage = hugetlb_nopage, | 253 | .nopage = hugetlb_nopage, |
250 | }; | 254 | }; |
251 | 255 | ||
256 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page) | ||
257 | { | ||
258 | pte_t entry; | ||
259 | |||
260 | if (vma->vm_flags & VM_WRITE) { | ||
261 | entry = | ||
262 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | ||
263 | } else { | ||
264 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
265 | } | ||
266 | entry = pte_mkyoung(entry); | ||
267 | entry = pte_mkhuge(entry); | ||
268 | |||
269 | return entry; | ||
270 | } | ||
271 | |||
272 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
273 | struct vm_area_struct *vma) | ||
274 | { | ||
275 | pte_t *src_pte, *dst_pte, entry; | ||
276 | struct page *ptepage; | ||
277 | unsigned long addr = vma->vm_start; | ||
278 | unsigned long end = vma->vm_end; | ||
279 | |||
280 | while (addr < end) { | ||
281 | dst_pte = huge_pte_alloc(dst, addr); | ||
282 | if (!dst_pte) | ||
283 | goto nomem; | ||
284 | src_pte = huge_pte_offset(src, addr); | ||
285 | BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ | ||
286 | entry = *src_pte; | ||
287 | ptepage = pte_page(entry); | ||
288 | get_page(ptepage); | ||
289 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
290 | set_huge_pte_at(dst, addr, dst_pte, entry); | ||
291 | addr += HPAGE_SIZE; | ||
292 | } | ||
293 | return 0; | ||
294 | |||
295 | nomem: | ||
296 | return -ENOMEM; | ||
297 | } | ||
298 | |||
299 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | ||
300 | unsigned long end) | ||
301 | { | ||
302 | struct mm_struct *mm = vma->vm_mm; | ||
303 | unsigned long address; | ||
304 | pte_t pte; | ||
305 | struct page *page; | ||
306 | |||
307 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
308 | BUG_ON(start & ~HPAGE_MASK); | ||
309 | BUG_ON(end & ~HPAGE_MASK); | ||
310 | |||
311 | for (address = start; address < end; address += HPAGE_SIZE) { | ||
312 | pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address)); | ||
313 | if (pte_none(pte)) | ||
314 | continue; | ||
315 | page = pte_page(pte); | ||
316 | put_page(page); | ||
317 | } | ||
318 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
319 | flush_tlb_range(vma, start, end); | ||
320 | } | ||
321 | |||
252 | void zap_hugepage_range(struct vm_area_struct *vma, | 322 | void zap_hugepage_range(struct vm_area_struct *vma, |
253 | unsigned long start, unsigned long length) | 323 | unsigned long start, unsigned long length) |
254 | { | 324 | { |
@@ -258,3 +328,108 @@ void zap_hugepage_range(struct vm_area_struct *vma, | |||
258 | unmap_hugepage_range(vma, start, start + length); | 328 | unmap_hugepage_range(vma, start, start + length); |
259 | spin_unlock(&mm->page_table_lock); | 329 | spin_unlock(&mm->page_table_lock); |
260 | } | 330 | } |
331 | |||
332 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
333 | { | ||
334 | struct mm_struct *mm = current->mm; | ||
335 | unsigned long addr; | ||
336 | int ret = 0; | ||
337 | |||
338 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
339 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
340 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
341 | |||
342 | hugetlb_prefault_arch_hook(mm); | ||
343 | |||
344 | spin_lock(&mm->page_table_lock); | ||
345 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
346 | unsigned long idx; | ||
347 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
348 | struct page *page; | ||
349 | |||
350 | if (!pte) { | ||
351 | ret = -ENOMEM; | ||
352 | goto out; | ||
353 | } | ||
354 | if (! pte_none(*pte)) | ||
355 | hugetlb_clean_stale_pgtable(pte); | ||
356 | |||
357 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
358 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
359 | page = find_get_page(mapping, idx); | ||
360 | if (!page) { | ||
361 | /* charge the fs quota first */ | ||
362 | if (hugetlb_get_quota(mapping)) { | ||
363 | ret = -ENOMEM; | ||
364 | goto out; | ||
365 | } | ||
366 | page = alloc_huge_page(); | ||
367 | if (!page) { | ||
368 | hugetlb_put_quota(mapping); | ||
369 | ret = -ENOMEM; | ||
370 | goto out; | ||
371 | } | ||
372 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
373 | if (! ret) { | ||
374 | unlock_page(page); | ||
375 | } else { | ||
376 | hugetlb_put_quota(mapping); | ||
377 | free_huge_page(page); | ||
378 | goto out; | ||
379 | } | ||
380 | } | ||
381 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | ||
382 | set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page)); | ||
383 | } | ||
384 | out: | ||
385 | spin_unlock(&mm->page_table_lock); | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
390 | struct page **pages, struct vm_area_struct **vmas, | ||
391 | unsigned long *position, int *length, int i) | ||
392 | { | ||
393 | unsigned long vpfn, vaddr = *position; | ||
394 | int remainder = *length; | ||
395 | |||
396 | BUG_ON(!is_vm_hugetlb_page(vma)); | ||
397 | |||
398 | vpfn = vaddr/PAGE_SIZE; | ||
399 | while (vaddr < vma->vm_end && remainder) { | ||
400 | |||
401 | if (pages) { | ||
402 | pte_t *pte; | ||
403 | struct page *page; | ||
404 | |||
405 | /* Some archs (sparc64, sh*) have multiple | ||
406 | * pte_ts to each hugepage. We have to make | ||
407 | * sure we get the first, for the page | ||
408 | * indexing below to work. */ | ||
409 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | ||
410 | |||
411 | /* hugetlb should be locked, and hence, prefaulted */ | ||
412 | WARN_ON(!pte || pte_none(*pte)); | ||
413 | |||
414 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | ||
415 | |||
416 | WARN_ON(!PageCompound(page)); | ||
417 | |||
418 | get_page(page); | ||
419 | pages[i] = page; | ||
420 | } | ||
421 | |||
422 | if (vmas) | ||
423 | vmas[i] = vma; | ||
424 | |||
425 | vaddr += PAGE_SIZE; | ||
426 | ++vpfn; | ||
427 | --remainder; | ||
428 | ++i; | ||
429 | } | ||
430 | |||
431 | *length = remainder; | ||
432 | *position = vaddr; | ||
433 | |||
434 | return i; | ||
435 | } | ||