diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 124 |
1 files changed, 99 insertions, 25 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 67a71191136e..6058b53dcb89 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/seq_file.h> | ||
10 | #include <linux/sysctl.h> | 11 | #include <linux/sysctl.h> |
11 | #include <linux/highmem.h> | 12 | #include <linux/highmem.h> |
12 | #include <linux/mmu_notifier.h> | 13 | #include <linux/mmu_notifier.h> |
@@ -262,7 +263,7 @@ struct resv_map { | |||
262 | struct list_head regions; | 263 | struct list_head regions; |
263 | }; | 264 | }; |
264 | 265 | ||
265 | struct resv_map *resv_map_alloc(void) | 266 | static struct resv_map *resv_map_alloc(void) |
266 | { | 267 | { |
267 | struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); | 268 | struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); |
268 | if (!resv_map) | 269 | if (!resv_map) |
@@ -274,7 +275,7 @@ struct resv_map *resv_map_alloc(void) | |||
274 | return resv_map; | 275 | return resv_map; |
275 | } | 276 | } |
276 | 277 | ||
277 | void resv_map_release(struct kref *ref) | 278 | static void resv_map_release(struct kref *ref) |
278 | { | 279 | { |
279 | struct resv_map *resv_map = container_of(ref, struct resv_map, refs); | 280 | struct resv_map *resv_map = container_of(ref, struct resv_map, refs); |
280 | 281 | ||
@@ -289,7 +290,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) | |||
289 | if (!(vma->vm_flags & VM_SHARED)) | 290 | if (!(vma->vm_flags & VM_SHARED)) |
290 | return (struct resv_map *)(get_vma_private_data(vma) & | 291 | return (struct resv_map *)(get_vma_private_data(vma) & |
291 | ~HPAGE_RESV_MASK); | 292 | ~HPAGE_RESV_MASK); |
292 | return 0; | 293 | return NULL; |
293 | } | 294 | } |
294 | 295 | ||
295 | static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) | 296 | static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) |
@@ -353,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) | |||
353 | return 0; | 354 | return 0; |
354 | } | 355 | } |
355 | 356 | ||
357 | static void clear_gigantic_page(struct page *page, | ||
358 | unsigned long addr, unsigned long sz) | ||
359 | { | ||
360 | int i; | ||
361 | struct page *p = page; | ||
362 | |||
363 | might_sleep(); | ||
364 | for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { | ||
365 | cond_resched(); | ||
366 | clear_user_highpage(p, addr + i * PAGE_SIZE); | ||
367 | } | ||
368 | } | ||
356 | static void clear_huge_page(struct page *page, | 369 | static void clear_huge_page(struct page *page, |
357 | unsigned long addr, unsigned long sz) | 370 | unsigned long addr, unsigned long sz) |
358 | { | 371 | { |
359 | int i; | 372 | int i; |
360 | 373 | ||
374 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) | ||
375 | return clear_gigantic_page(page, addr, sz); | ||
376 | |||
361 | might_sleep(); | 377 | might_sleep(); |
362 | for (i = 0; i < sz/PAGE_SIZE; i++) { | 378 | for (i = 0; i < sz/PAGE_SIZE; i++) { |
363 | cond_resched(); | 379 | cond_resched(); |
@@ -365,12 +381,32 @@ static void clear_huge_page(struct page *page, | |||
365 | } | 381 | } |
366 | } | 382 | } |
367 | 383 | ||
384 | static void copy_gigantic_page(struct page *dst, struct page *src, | ||
385 | unsigned long addr, struct vm_area_struct *vma) | ||
386 | { | ||
387 | int i; | ||
388 | struct hstate *h = hstate_vma(vma); | ||
389 | struct page *dst_base = dst; | ||
390 | struct page *src_base = src; | ||
391 | might_sleep(); | ||
392 | for (i = 0; i < pages_per_huge_page(h); ) { | ||
393 | cond_resched(); | ||
394 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | ||
395 | |||
396 | i++; | ||
397 | dst = mem_map_next(dst, dst_base, i); | ||
398 | src = mem_map_next(src, src_base, i); | ||
399 | } | ||
400 | } | ||
368 | static void copy_huge_page(struct page *dst, struct page *src, | 401 | static void copy_huge_page(struct page *dst, struct page *src, |
369 | unsigned long addr, struct vm_area_struct *vma) | 402 | unsigned long addr, struct vm_area_struct *vma) |
370 | { | 403 | { |
371 | int i; | 404 | int i; |
372 | struct hstate *h = hstate_vma(vma); | 405 | struct hstate *h = hstate_vma(vma); |
373 | 406 | ||
407 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) | ||
408 | return copy_gigantic_page(dst, src, addr, vma); | ||
409 | |||
374 | might_sleep(); | 410 | might_sleep(); |
375 | for (i = 0; i < pages_per_huge_page(h); i++) { | 411 | for (i = 0; i < pages_per_huge_page(h); i++) { |
376 | cond_resched(); | 412 | cond_resched(); |
@@ -455,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) | |||
455 | { | 491 | { |
456 | int i; | 492 | int i; |
457 | 493 | ||
494 | VM_BUG_ON(h->order >= MAX_ORDER); | ||
495 | |||
458 | h->nr_huge_pages--; | 496 | h->nr_huge_pages--; |
459 | h->nr_huge_pages_node[page_to_nid(page)]--; | 497 | h->nr_huge_pages_node[page_to_nid(page)]--; |
460 | for (i = 0; i < pages_per_huge_page(h); i++) { | 498 | for (i = 0; i < pages_per_huge_page(h); i++) { |
@@ -969,6 +1007,14 @@ found: | |||
969 | return 1; | 1007 | return 1; |
970 | } | 1008 | } |
971 | 1009 | ||
1010 | static void prep_compound_huge_page(struct page *page, int order) | ||
1011 | { | ||
1012 | if (unlikely(order > (MAX_ORDER - 1))) | ||
1013 | prep_compound_gigantic_page(page, order); | ||
1014 | else | ||
1015 | prep_compound_page(page, order); | ||
1016 | } | ||
1017 | |||
972 | /* Put bootmem huge pages into the standard lists after mem_map is up */ | 1018 | /* Put bootmem huge pages into the standard lists after mem_map is up */ |
973 | static void __init gather_bootmem_prealloc(void) | 1019 | static void __init gather_bootmem_prealloc(void) |
974 | { | 1020 | { |
@@ -979,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) | |||
979 | struct hstate *h = m->hstate; | 1025 | struct hstate *h = m->hstate; |
980 | __ClearPageReserved(page); | 1026 | __ClearPageReserved(page); |
981 | WARN_ON(page_count(page) != 1); | 1027 | WARN_ON(page_count(page) != 1); |
982 | prep_compound_page(page, h->order); | 1028 | prep_compound_huge_page(page, h->order); |
983 | prep_new_huge_page(h, page, page_to_nid(page)); | 1029 | prep_new_huge_page(h, page, page_to_nid(page)); |
984 | } | 1030 | } |
985 | } | 1031 | } |
@@ -1455,15 +1501,15 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1455 | 1501 | ||
1456 | #endif /* CONFIG_SYSCTL */ | 1502 | #endif /* CONFIG_SYSCTL */ |
1457 | 1503 | ||
1458 | int hugetlb_report_meminfo(char *buf) | 1504 | void hugetlb_report_meminfo(struct seq_file *m) |
1459 | { | 1505 | { |
1460 | struct hstate *h = &default_hstate; | 1506 | struct hstate *h = &default_hstate; |
1461 | return sprintf(buf, | 1507 | seq_printf(m, |
1462 | "HugePages_Total: %5lu\n" | 1508 | "HugePages_Total: %5lu\n" |
1463 | "HugePages_Free: %5lu\n" | 1509 | "HugePages_Free: %5lu\n" |
1464 | "HugePages_Rsvd: %5lu\n" | 1510 | "HugePages_Rsvd: %5lu\n" |
1465 | "HugePages_Surp: %5lu\n" | 1511 | "HugePages_Surp: %5lu\n" |
1466 | "Hugepagesize: %5lu kB\n", | 1512 | "Hugepagesize: %8lu kB\n", |
1467 | h->nr_huge_pages, | 1513 | h->nr_huge_pages, |
1468 | h->free_huge_pages, | 1514 | h->free_huge_pages, |
1469 | h->resv_huge_pages, | 1515 | h->resv_huge_pages, |
@@ -1747,11 +1793,10 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1747 | * from other VMAs and let the children be SIGKILLed if they are faulting the | 1793 | * from other VMAs and let the children be SIGKILLed if they are faulting the |
1748 | * same region. | 1794 | * same region. |
1749 | */ | 1795 | */ |
1750 | int unmap_ref_private(struct mm_struct *mm, | 1796 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, |
1751 | struct vm_area_struct *vma, | 1797 | struct page *page, unsigned long address) |
1752 | struct page *page, | ||
1753 | unsigned long address) | ||
1754 | { | 1798 | { |
1799 | struct hstate *h = hstate_vma(vma); | ||
1755 | struct vm_area_struct *iter_vma; | 1800 | struct vm_area_struct *iter_vma; |
1756 | struct address_space *mapping; | 1801 | struct address_space *mapping; |
1757 | struct prio_tree_iter iter; | 1802 | struct prio_tree_iter iter; |
@@ -1761,7 +1806,7 @@ int unmap_ref_private(struct mm_struct *mm, | |||
1761 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation | 1806 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation |
1762 | * from page cache lookup which is in HPAGE_SIZE units. | 1807 | * from page cache lookup which is in HPAGE_SIZE units. |
1763 | */ | 1808 | */ |
1764 | address = address & huge_page_mask(hstate_vma(vma)); | 1809 | address = address & huge_page_mask(h); |
1765 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) | 1810 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) |
1766 | + (vma->vm_pgoff >> PAGE_SHIFT); | 1811 | + (vma->vm_pgoff >> PAGE_SHIFT); |
1767 | mapping = (struct address_space *)page_private(page); | 1812 | mapping = (struct address_space *)page_private(page); |
@@ -1780,7 +1825,7 @@ int unmap_ref_private(struct mm_struct *mm, | |||
1780 | */ | 1825 | */ |
1781 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) | 1826 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) |
1782 | unmap_hugepage_range(iter_vma, | 1827 | unmap_hugepage_range(iter_vma, |
1783 | address, address + HPAGE_SIZE, | 1828 | address, address + huge_page_size(h), |
1784 | page); | 1829 | page); |
1785 | } | 1830 | } |
1786 | 1831 | ||
@@ -2008,7 +2053,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2008 | entry = huge_ptep_get(ptep); | 2053 | entry = huge_ptep_get(ptep); |
2009 | if (huge_pte_none(entry)) { | 2054 | if (huge_pte_none(entry)) { |
2010 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); | 2055 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
2011 | goto out_unlock; | 2056 | goto out_mutex; |
2012 | } | 2057 | } |
2013 | 2058 | ||
2014 | ret = 0; | 2059 | ret = 0; |
@@ -2024,7 +2069,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2024 | if (write_access && !pte_write(entry)) { | 2069 | if (write_access && !pte_write(entry)) { |
2025 | if (vma_needs_reservation(h, vma, address) < 0) { | 2070 | if (vma_needs_reservation(h, vma, address) < 0) { |
2026 | ret = VM_FAULT_OOM; | 2071 | ret = VM_FAULT_OOM; |
2027 | goto out_unlock; | 2072 | goto out_mutex; |
2028 | } | 2073 | } |
2029 | 2074 | ||
2030 | if (!(vma->vm_flags & VM_SHARED)) | 2075 | if (!(vma->vm_flags & VM_SHARED)) |
@@ -2034,10 +2079,23 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2034 | 2079 | ||
2035 | spin_lock(&mm->page_table_lock); | 2080 | spin_lock(&mm->page_table_lock); |
2036 | /* Check for a racing update before calling hugetlb_cow */ | 2081 | /* Check for a racing update before calling hugetlb_cow */ |
2037 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) | 2082 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) |
2038 | if (write_access && !pte_write(entry)) | 2083 | goto out_page_table_lock; |
2084 | |||
2085 | |||
2086 | if (write_access) { | ||
2087 | if (!pte_write(entry)) { | ||
2039 | ret = hugetlb_cow(mm, vma, address, ptep, entry, | 2088 | ret = hugetlb_cow(mm, vma, address, ptep, entry, |
2040 | pagecache_page); | 2089 | pagecache_page); |
2090 | goto out_page_table_lock; | ||
2091 | } | ||
2092 | entry = pte_mkdirty(entry); | ||
2093 | } | ||
2094 | entry = pte_mkyoung(entry); | ||
2095 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access)) | ||
2096 | update_mmu_cache(vma, address, entry); | ||
2097 | |||
2098 | out_page_table_lock: | ||
2041 | spin_unlock(&mm->page_table_lock); | 2099 | spin_unlock(&mm->page_table_lock); |
2042 | 2100 | ||
2043 | if (pagecache_page) { | 2101 | if (pagecache_page) { |
@@ -2045,7 +2103,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2045 | put_page(pagecache_page); | 2103 | put_page(pagecache_page); |
2046 | } | 2104 | } |
2047 | 2105 | ||
2048 | out_unlock: | 2106 | out_mutex: |
2049 | mutex_unlock(&hugetlb_instantiation_mutex); | 2107 | mutex_unlock(&hugetlb_instantiation_mutex); |
2050 | 2108 | ||
2051 | return ret; | 2109 | return ret; |
@@ -2060,6 +2118,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, | |||
2060 | return NULL; | 2118 | return NULL; |
2061 | } | 2119 | } |
2062 | 2120 | ||
2121 | static int huge_zeropage_ok(pte_t *ptep, int write, int shared) | ||
2122 | { | ||
2123 | if (!ptep || write || shared) | ||
2124 | return 0; | ||
2125 | else | ||
2126 | return huge_pte_none(huge_ptep_get(ptep)); | ||
2127 | } | ||
2128 | |||
2063 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2129 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2064 | struct page **pages, struct vm_area_struct **vmas, | 2130 | struct page **pages, struct vm_area_struct **vmas, |
2065 | unsigned long *position, int *length, int i, | 2131 | unsigned long *position, int *length, int i, |
@@ -2069,6 +2135,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2069 | unsigned long vaddr = *position; | 2135 | unsigned long vaddr = *position; |
2070 | int remainder = *length; | 2136 | int remainder = *length; |
2071 | struct hstate *h = hstate_vma(vma); | 2137 | struct hstate *h = hstate_vma(vma); |
2138 | int zeropage_ok = 0; | ||
2139 | int shared = vma->vm_flags & VM_SHARED; | ||
2072 | 2140 | ||
2073 | spin_lock(&mm->page_table_lock); | 2141 | spin_lock(&mm->page_table_lock); |
2074 | while (vaddr < vma->vm_end && remainder) { | 2142 | while (vaddr < vma->vm_end && remainder) { |
@@ -2081,8 +2149,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2081 | * first, for the page indexing below to work. | 2149 | * first, for the page indexing below to work. |
2082 | */ | 2150 | */ |
2083 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); | 2151 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); |
2152 | if (huge_zeropage_ok(pte, write, shared)) | ||
2153 | zeropage_ok = 1; | ||
2084 | 2154 | ||
2085 | if (!pte || huge_pte_none(huge_ptep_get(pte)) || | 2155 | if (!pte || |
2156 | (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || | ||
2086 | (write && !pte_write(huge_ptep_get(pte)))) { | 2157 | (write && !pte_write(huge_ptep_get(pte)))) { |
2087 | int ret; | 2158 | int ret; |
2088 | 2159 | ||
@@ -2102,8 +2173,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2102 | page = pte_page(huge_ptep_get(pte)); | 2173 | page = pte_page(huge_ptep_get(pte)); |
2103 | same_page: | 2174 | same_page: |
2104 | if (pages) { | 2175 | if (pages) { |
2105 | get_page(page); | 2176 | if (zeropage_ok) |
2106 | pages[i] = page + pfn_offset; | 2177 | pages[i] = ZERO_PAGE(0); |
2178 | else | ||
2179 | pages[i] = mem_map_offset(page, pfn_offset); | ||
2180 | get_page(pages[i]); | ||
2107 | } | 2181 | } |
2108 | 2182 | ||
2109 | if (vmas) | 2183 | if (vmas) |