aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c124
1 files changed, 99 insertions, 25 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 67a71191136e..6058b53dcb89 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/seq_file.h>
10#include <linux/sysctl.h> 11#include <linux/sysctl.h>
11#include <linux/highmem.h> 12#include <linux/highmem.h>
12#include <linux/mmu_notifier.h> 13#include <linux/mmu_notifier.h>
@@ -262,7 +263,7 @@ struct resv_map {
262 struct list_head regions; 263 struct list_head regions;
263}; 264};
264 265
265struct resv_map *resv_map_alloc(void) 266static struct resv_map *resv_map_alloc(void)
266{ 267{
267 struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); 268 struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
268 if (!resv_map) 269 if (!resv_map)
@@ -274,7 +275,7 @@ struct resv_map *resv_map_alloc(void)
274 return resv_map; 275 return resv_map;
275} 276}
276 277
277void resv_map_release(struct kref *ref) 278static void resv_map_release(struct kref *ref)
278{ 279{
279 struct resv_map *resv_map = container_of(ref, struct resv_map, refs); 280 struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
280 281
@@ -289,7 +290,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
289 if (!(vma->vm_flags & VM_SHARED)) 290 if (!(vma->vm_flags & VM_SHARED))
290 return (struct resv_map *)(get_vma_private_data(vma) & 291 return (struct resv_map *)(get_vma_private_data(vma) &
291 ~HPAGE_RESV_MASK); 292 ~HPAGE_RESV_MASK);
292 return 0; 293 return NULL;
293} 294}
294 295
295static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) 296static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -353,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma)
353 return 0; 354 return 0;
354} 355}
355 356
357static void clear_gigantic_page(struct page *page,
358 unsigned long addr, unsigned long sz)
359{
360 int i;
361 struct page *p = page;
362
363 might_sleep();
364 for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
365 cond_resched();
366 clear_user_highpage(p, addr + i * PAGE_SIZE);
367 }
368}
356static void clear_huge_page(struct page *page, 369static void clear_huge_page(struct page *page,
357 unsigned long addr, unsigned long sz) 370 unsigned long addr, unsigned long sz)
358{ 371{
359 int i; 372 int i;
360 373
374 if (unlikely(sz > MAX_ORDER_NR_PAGES))
375 return clear_gigantic_page(page, addr, sz);
376
361 might_sleep(); 377 might_sleep();
362 for (i = 0; i < sz/PAGE_SIZE; i++) { 378 for (i = 0; i < sz/PAGE_SIZE; i++) {
363 cond_resched(); 379 cond_resched();
@@ -365,12 +381,32 @@ static void clear_huge_page(struct page *page,
365 } 381 }
366} 382}
367 383
384static void copy_gigantic_page(struct page *dst, struct page *src,
385 unsigned long addr, struct vm_area_struct *vma)
386{
387 int i;
388 struct hstate *h = hstate_vma(vma);
389 struct page *dst_base = dst;
390 struct page *src_base = src;
391 might_sleep();
392 for (i = 0; i < pages_per_huge_page(h); ) {
393 cond_resched();
394 copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
395
396 i++;
397 dst = mem_map_next(dst, dst_base, i);
398 src = mem_map_next(src, src_base, i);
399 }
400}
368static void copy_huge_page(struct page *dst, struct page *src, 401static void copy_huge_page(struct page *dst, struct page *src,
369 unsigned long addr, struct vm_area_struct *vma) 402 unsigned long addr, struct vm_area_struct *vma)
370{ 403{
371 int i; 404 int i;
372 struct hstate *h = hstate_vma(vma); 405 struct hstate *h = hstate_vma(vma);
373 406
407 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
408 return copy_gigantic_page(dst, src, addr, vma);
409
374 might_sleep(); 410 might_sleep();
375 for (i = 0; i < pages_per_huge_page(h); i++) { 411 for (i = 0; i < pages_per_huge_page(h); i++) {
376 cond_resched(); 412 cond_resched();
@@ -455,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page)
455{ 491{
456 int i; 492 int i;
457 493
494 VM_BUG_ON(h->order >= MAX_ORDER);
495
458 h->nr_huge_pages--; 496 h->nr_huge_pages--;
459 h->nr_huge_pages_node[page_to_nid(page)]--; 497 h->nr_huge_pages_node[page_to_nid(page)]--;
460 for (i = 0; i < pages_per_huge_page(h); i++) { 498 for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -969,6 +1007,14 @@ found:
969 return 1; 1007 return 1;
970} 1008}
971 1009
1010static void prep_compound_huge_page(struct page *page, int order)
1011{
1012 if (unlikely(order > (MAX_ORDER - 1)))
1013 prep_compound_gigantic_page(page, order);
1014 else
1015 prep_compound_page(page, order);
1016}
1017
972/* Put bootmem huge pages into the standard lists after mem_map is up */ 1018/* Put bootmem huge pages into the standard lists after mem_map is up */
973static void __init gather_bootmem_prealloc(void) 1019static void __init gather_bootmem_prealloc(void)
974{ 1020{
@@ -979,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void)
979 struct hstate *h = m->hstate; 1025 struct hstate *h = m->hstate;
980 __ClearPageReserved(page); 1026 __ClearPageReserved(page);
981 WARN_ON(page_count(page) != 1); 1027 WARN_ON(page_count(page) != 1);
982 prep_compound_page(page, h->order); 1028 prep_compound_huge_page(page, h->order);
983 prep_new_huge_page(h, page, page_to_nid(page)); 1029 prep_new_huge_page(h, page, page_to_nid(page));
984 } 1030 }
985} 1031}
@@ -1455,15 +1501,15 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1455 1501
1456#endif /* CONFIG_SYSCTL */ 1502#endif /* CONFIG_SYSCTL */
1457 1503
1458int hugetlb_report_meminfo(char *buf) 1504void hugetlb_report_meminfo(struct seq_file *m)
1459{ 1505{
1460 struct hstate *h = &default_hstate; 1506 struct hstate *h = &default_hstate;
1461 return sprintf(buf, 1507 seq_printf(m,
1462 "HugePages_Total: %5lu\n" 1508 "HugePages_Total: %5lu\n"
1463 "HugePages_Free: %5lu\n" 1509 "HugePages_Free: %5lu\n"
1464 "HugePages_Rsvd: %5lu\n" 1510 "HugePages_Rsvd: %5lu\n"
1465 "HugePages_Surp: %5lu\n" 1511 "HugePages_Surp: %5lu\n"
1466 "Hugepagesize: %5lu kB\n", 1512 "Hugepagesize: %8lu kB\n",
1467 h->nr_huge_pages, 1513 h->nr_huge_pages,
1468 h->free_huge_pages, 1514 h->free_huge_pages,
1469 h->resv_huge_pages, 1515 h->resv_huge_pages,
@@ -1747,11 +1793,10 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1747 * from other VMAs and let the children be SIGKILLed if they are faulting the 1793 * from other VMAs and let the children be SIGKILLed if they are faulting the
1748 * same region. 1794 * same region.
1749 */ 1795 */
1750int unmap_ref_private(struct mm_struct *mm, 1796static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1751 struct vm_area_struct *vma, 1797 struct page *page, unsigned long address)
1752 struct page *page,
1753 unsigned long address)
1754{ 1798{
1799 struct hstate *h = hstate_vma(vma);
1755 struct vm_area_struct *iter_vma; 1800 struct vm_area_struct *iter_vma;
1756 struct address_space *mapping; 1801 struct address_space *mapping;
1757 struct prio_tree_iter iter; 1802 struct prio_tree_iter iter;
@@ -1761,7 +1806,7 @@ int unmap_ref_private(struct mm_struct *mm,
1761 * vm_pgoff is in PAGE_SIZE units, hence the different calculation 1806 * vm_pgoff is in PAGE_SIZE units, hence the different calculation
1762 * from page cache lookup which is in HPAGE_SIZE units. 1807 * from page cache lookup which is in HPAGE_SIZE units.
1763 */ 1808 */
1764 address = address & huge_page_mask(hstate_vma(vma)); 1809 address = address & huge_page_mask(h);
1765 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) 1810 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
1766 + (vma->vm_pgoff >> PAGE_SHIFT); 1811 + (vma->vm_pgoff >> PAGE_SHIFT);
1767 mapping = (struct address_space *)page_private(page); 1812 mapping = (struct address_space *)page_private(page);
@@ -1780,7 +1825,7 @@ int unmap_ref_private(struct mm_struct *mm,
1780 */ 1825 */
1781 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) 1826 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
1782 unmap_hugepage_range(iter_vma, 1827 unmap_hugepage_range(iter_vma,
1783 address, address + HPAGE_SIZE, 1828 address, address + huge_page_size(h),
1784 page); 1829 page);
1785 } 1830 }
1786 1831
@@ -2008,7 +2053,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2008 entry = huge_ptep_get(ptep); 2053 entry = huge_ptep_get(ptep);
2009 if (huge_pte_none(entry)) { 2054 if (huge_pte_none(entry)) {
2010 ret = hugetlb_no_page(mm, vma, address, ptep, write_access); 2055 ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
2011 goto out_unlock; 2056 goto out_mutex;
2012 } 2057 }
2013 2058
2014 ret = 0; 2059 ret = 0;
@@ -2024,7 +2069,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2024 if (write_access && !pte_write(entry)) { 2069 if (write_access && !pte_write(entry)) {
2025 if (vma_needs_reservation(h, vma, address) < 0) { 2070 if (vma_needs_reservation(h, vma, address) < 0) {
2026 ret = VM_FAULT_OOM; 2071 ret = VM_FAULT_OOM;
2027 goto out_unlock; 2072 goto out_mutex;
2028 } 2073 }
2029 2074
2030 if (!(vma->vm_flags & VM_SHARED)) 2075 if (!(vma->vm_flags & VM_SHARED))
@@ -2034,10 +2079,23 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2034 2079
2035 spin_lock(&mm->page_table_lock); 2080 spin_lock(&mm->page_table_lock);
2036 /* Check for a racing update before calling hugetlb_cow */ 2081 /* Check for a racing update before calling hugetlb_cow */
2037 if (likely(pte_same(entry, huge_ptep_get(ptep)))) 2082 if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
2038 if (write_access && !pte_write(entry)) 2083 goto out_page_table_lock;
2084
2085
2086 if (write_access) {
2087 if (!pte_write(entry)) {
2039 ret = hugetlb_cow(mm, vma, address, ptep, entry, 2088 ret = hugetlb_cow(mm, vma, address, ptep, entry,
2040 pagecache_page); 2089 pagecache_page);
2090 goto out_page_table_lock;
2091 }
2092 entry = pte_mkdirty(entry);
2093 }
2094 entry = pte_mkyoung(entry);
2095 if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access))
2096 update_mmu_cache(vma, address, entry);
2097
2098out_page_table_lock:
2041 spin_unlock(&mm->page_table_lock); 2099 spin_unlock(&mm->page_table_lock);
2042 2100
2043 if (pagecache_page) { 2101 if (pagecache_page) {
@@ -2045,7 +2103,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2045 put_page(pagecache_page); 2103 put_page(pagecache_page);
2046 } 2104 }
2047 2105
2048out_unlock: 2106out_mutex:
2049 mutex_unlock(&hugetlb_instantiation_mutex); 2107 mutex_unlock(&hugetlb_instantiation_mutex);
2050 2108
2051 return ret; 2109 return ret;
@@ -2060,6 +2118,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
2060 return NULL; 2118 return NULL;
2061} 2119}
2062 2120
2121static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
2122{
2123 if (!ptep || write || shared)
2124 return 0;
2125 else
2126 return huge_pte_none(huge_ptep_get(ptep));
2127}
2128
2063int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 2129int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2064 struct page **pages, struct vm_area_struct **vmas, 2130 struct page **pages, struct vm_area_struct **vmas,
2065 unsigned long *position, int *length, int i, 2131 unsigned long *position, int *length, int i,
@@ -2069,6 +2135,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2069 unsigned long vaddr = *position; 2135 unsigned long vaddr = *position;
2070 int remainder = *length; 2136 int remainder = *length;
2071 struct hstate *h = hstate_vma(vma); 2137 struct hstate *h = hstate_vma(vma);
2138 int zeropage_ok = 0;
2139 int shared = vma->vm_flags & VM_SHARED;
2072 2140
2073 spin_lock(&mm->page_table_lock); 2141 spin_lock(&mm->page_table_lock);
2074 while (vaddr < vma->vm_end && remainder) { 2142 while (vaddr < vma->vm_end && remainder) {
@@ -2081,8 +2149,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2081 * first, for the page indexing below to work. 2149 * first, for the page indexing below to work.
2082 */ 2150 */
2083 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); 2151 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
2152 if (huge_zeropage_ok(pte, write, shared))
2153 zeropage_ok = 1;
2084 2154
2085 if (!pte || huge_pte_none(huge_ptep_get(pte)) || 2155 if (!pte ||
2156 (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
2086 (write && !pte_write(huge_ptep_get(pte)))) { 2157 (write && !pte_write(huge_ptep_get(pte)))) {
2087 int ret; 2158 int ret;
2088 2159
@@ -2102,8 +2173,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2102 page = pte_page(huge_ptep_get(pte)); 2173 page = pte_page(huge_ptep_get(pte));
2103same_page: 2174same_page:
2104 if (pages) { 2175 if (pages) {
2105 get_page(page); 2176 if (zeropage_ok)
2106 pages[i] = page + pfn_offset; 2177 pages[i] = ZERO_PAGE(0);
2178 else
2179 pages[i] = mem_map_offset(page, pfn_offset);
2180 get_page(pages[i]);
2107 } 2181 }
2108 2182
2109 if (vmas) 2183 if (vmas)