aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--include/linux/hugetlb.h9
-rw-r--r--kernel/fork.c9
-rw-r--r--mm/hugetlb.c158
4 files changed, 140 insertions, 44 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..1576bbecd084 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -103,9 +103,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
103 ret = -ENOMEM; 103 ret = -ENOMEM;
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 105
106 if (vma->vm_flags & VM_MAYSHARE && 106 if (hugetlb_reserve_pages(inode,
107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 107 vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
108 len >> HPAGE_SHIFT)) 108 len >> HPAGE_SHIFT, vma))
109 goto out; 109 goto out;
110 110
111 ret = 0; 111 ret = 0;
@@ -942,7 +942,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 942 goto out_dentry;
943 943
944 error = -ENOMEM; 944 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
946 goto out_inode; 946 goto out_inode;
947 947
948 d_instantiate(dentry, inode); 948 d_instantiate(dentry, inode);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a79e80b689d8..185b14c9f021 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
17 return vma->vm_flags & VM_HUGETLB; 17 return vma->vm_flags & VM_HUGETLB;
18} 18}
19 19
20void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
20int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 21int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
21int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 22int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
22int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 23int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
@@ -30,7 +31,8 @@ int hugetlb_report_node_meminfo(int, char *);
30unsigned long hugetlb_total_pages(void); 31unsigned long hugetlb_total_pages(void);
31int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 32int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
32 unsigned long address, int write_access); 33 unsigned long address, int write_access);
33int hugetlb_reserve_pages(struct inode *inode, long from, long to); 34int hugetlb_reserve_pages(struct inode *inode, long from, long to,
35 struct vm_area_struct *vma);
34void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); 36void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
35 37
36extern unsigned long max_huge_pages; 38extern unsigned long max_huge_pages;
@@ -58,6 +60,11 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
58{ 60{
59 return 0; 61 return 0;
60} 62}
63
64static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
65{
66}
67
61static inline unsigned long hugetlb_total_pages(void) 68static inline unsigned long hugetlb_total_pages(void)
62{ 69{
63 return 0; 70 return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..552c8d8e77ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
33#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/cgroup.h> 34#include <linux/cgroup.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h>
36#include <linux/swap.h> 37#include <linux/swap.h>
37#include <linux/syscalls.h> 38#include <linux/syscalls.h>
38#include <linux/jiffies.h> 39#include <linux/jiffies.h>
@@ -307,6 +308,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
307 } 308 }
308 309
309 /* 310 /*
311 * Clear hugetlb-related page reserves for children. This only
312 * affects MAP_PRIVATE mappings. Faults generated by the child
313 * are not guaranteed to succeed, even if read-only
314 */
315 if (is_vm_hugetlb_page(tmp))
316 reset_vma_resv_huge_pages(tmp);
317
318 /*
310 * Link in the new vma and copy the page table entries. 319 * Link in the new vma and copy the page table entries.
311 */ 320 */
312 *pprev = tmp; 321 *pprev = tmp;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a4dbba8965f3..0af500db3632 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,69 @@ static int hugetlb_next_nid;
40 */ 40 */
41static DEFINE_SPINLOCK(hugetlb_lock); 41static DEFINE_SPINLOCK(hugetlb_lock);
42 42
43/*
44 * These helpers are used to track how many pages are reserved for
45 * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
46 * is guaranteed to have their future faults succeed.
47 *
48 * With the exception of reset_vma_resv_huge_pages() which is called at fork(),
49 * the reserve counters are updated with the hugetlb_lock held. It is safe
50 * to reset the VMA at fork() time as it is not in use yet and there is no
51 * chance of the global counters getting corrupted as a result of the values.
52 */
53static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
54{
55 VM_BUG_ON(!is_vm_hugetlb_page(vma));
56 if (!(vma->vm_flags & VM_SHARED))
57 return (unsigned long)vma->vm_private_data;
58 return 0;
59}
60
61static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
62 unsigned long reserve)
63{
64 VM_BUG_ON(!is_vm_hugetlb_page(vma));
65 VM_BUG_ON(vma->vm_flags & VM_SHARED);
66
67 vma->vm_private_data = (void *)reserve;
68}
69
70/* Decrement the reserved pages in the hugepage pool by one */
71static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
72{
73 if (vma->vm_flags & VM_SHARED) {
74 /* Shared mappings always use reserves */
75 resv_huge_pages--;
76 } else {
77 /*
78 * Only the process that called mmap() has reserves for
79 * private mappings.
80 */
81 if (vma_resv_huge_pages(vma)) {
82 resv_huge_pages--;
83 reserve = (unsigned long)vma->vm_private_data - 1;
84 vma->vm_private_data = (void *)reserve;
85 }
86 }
87}
88
89void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
90{
91 VM_BUG_ON(!is_vm_hugetlb_page(vma));
92 if (!(vma->vm_flags & VM_SHARED))
93 vma->vm_private_data = (void *)0;
94}
95
96/* Returns true if the VMA has associated reserve pages */
97static int vma_has_private_reserves(struct vm_area_struct *vma)
98{
99 if (vma->vm_flags & VM_SHARED)
100 return 0;
101 if (!vma_resv_huge_pages(vma))
102 return 0;
103 return 1;
104}
105
43static void clear_huge_page(struct page *page, unsigned long addr) 106static void clear_huge_page(struct page *page, unsigned long addr)
44{ 107{
45 int i; 108 int i;
@@ -101,6 +164,15 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
101 struct zone *zone; 164 struct zone *zone;
102 struct zoneref *z; 165 struct zoneref *z;
103 166
167 /*
168 * A child process with MAP_PRIVATE mappings created by their parent
169 * have no page reserves. This check ensures that reservations are
170 * not "stolen". The child may still get SIGKILLed
171 */
172 if (!vma_has_private_reserves(vma) &&
173 free_huge_pages - resv_huge_pages == 0)
174 return NULL;
175
104 for_each_zone_zonelist_nodemask(zone, z, zonelist, 176 for_each_zone_zonelist_nodemask(zone, z, zonelist,
105 MAX_NR_ZONES - 1, nodemask) { 177 MAX_NR_ZONES - 1, nodemask) {
106 nid = zone_to_nid(zone); 178 nid = zone_to_nid(zone);
@@ -111,8 +183,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
111 list_del(&page->lru); 183 list_del(&page->lru);
112 free_huge_pages--; 184 free_huge_pages--;
113 free_huge_pages_node[nid]--; 185 free_huge_pages_node[nid]--;
114 if (vma && vma->vm_flags & VM_MAYSHARE) 186 decrement_hugepage_resv_vma(vma);
115 resv_huge_pages--; 187
116 break; 188 break;
117 } 189 }
118 } 190 }
@@ -461,55 +533,40 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
461 } 533 }
462} 534}
463 535
464 536static struct page *alloc_huge_page(struct vm_area_struct *vma,
465static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, 537 unsigned long addr)
466 unsigned long addr)
467{ 538{
468 struct page *page; 539 struct page *page;
540 struct address_space *mapping = vma->vm_file->f_mapping;
541 struct inode *inode = mapping->host;
542 unsigned int chg = 0;
543
544 /*
545 * Processes that did not create the mapping will have no reserves and
546 * will not have accounted against quota. Check that the quota can be
547 * made before satisfying the allocation
548 */
549 if (!vma_has_private_reserves(vma)) {
550 chg = 1;
551 if (hugetlb_get_quota(inode->i_mapping, chg))
552 return ERR_PTR(-ENOSPC);
553 }
469 554
470 spin_lock(&hugetlb_lock); 555 spin_lock(&hugetlb_lock);
471 page = dequeue_huge_page_vma(vma, addr); 556 page = dequeue_huge_page_vma(vma, addr);
472 spin_unlock(&hugetlb_lock); 557 spin_unlock(&hugetlb_lock);
473 return page ? page : ERR_PTR(-VM_FAULT_OOM);
474}
475 558
476static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
477 unsigned long addr)
478{
479 struct page *page = NULL;
480
481 if (hugetlb_get_quota(vma->vm_file->f_mapping, 1))
482 return ERR_PTR(-VM_FAULT_SIGBUS);
483
484 spin_lock(&hugetlb_lock);
485 if (free_huge_pages > resv_huge_pages)
486 page = dequeue_huge_page_vma(vma, addr);
487 spin_unlock(&hugetlb_lock);
488 if (!page) { 559 if (!page) {
489 page = alloc_buddy_huge_page(vma, addr); 560 page = alloc_buddy_huge_page(vma, addr);
490 if (!page) { 561 if (!page) {
491 hugetlb_put_quota(vma->vm_file->f_mapping, 1); 562 hugetlb_put_quota(inode->i_mapping, chg);
492 return ERR_PTR(-VM_FAULT_OOM); 563 return ERR_PTR(-VM_FAULT_OOM);
493 } 564 }
494 } 565 }
495 return page;
496}
497 566
498static struct page *alloc_huge_page(struct vm_area_struct *vma, 567 set_page_refcounted(page);
499 unsigned long addr) 568 set_page_private(page, (unsigned long) mapping);
500{
501 struct page *page;
502 struct address_space *mapping = vma->vm_file->f_mapping;
503
504 if (vma->vm_flags & VM_MAYSHARE)
505 page = alloc_huge_page_shared(vma, addr);
506 else
507 page = alloc_huge_page_private(vma, addr);
508 569
509 if (!IS_ERR(page)) {
510 set_page_refcounted(page);
511 set_page_private(page, (unsigned long) mapping);
512 }
513 return page; 570 return page;
514} 571}
515 572
@@ -757,6 +814,13 @@ out:
757 return ret; 814 return ret;
758} 815}
759 816
817static void hugetlb_vm_op_close(struct vm_area_struct *vma)
818{
819 unsigned long reserve = vma_resv_huge_pages(vma);
820 if (reserve)
821 hugetlb_acct_memory(-reserve);
822}
823
760/* 824/*
761 * We cannot handle pagefaults against hugetlb pages at all. They cause 825 * We cannot handle pagefaults against hugetlb pages at all. They cause
762 * handle_mm_fault() to try to instantiate regular-sized pages in the 826 * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -771,6 +835,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
771 835
772struct vm_operations_struct hugetlb_vm_ops = { 836struct vm_operations_struct hugetlb_vm_ops = {
773 .fault = hugetlb_vm_op_fault, 837 .fault = hugetlb_vm_op_fault,
838 .close = hugetlb_vm_op_close,
774}; 839};
775 840
776static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, 841static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@@ -1289,11 +1354,25 @@ static long region_truncate(struct list_head *head, long end)
1289 return chg; 1354 return chg;
1290} 1355}
1291 1356
1292int hugetlb_reserve_pages(struct inode *inode, long from, long to) 1357int hugetlb_reserve_pages(struct inode *inode,
1358 long from, long to,
1359 struct vm_area_struct *vma)
1293{ 1360{
1294 long ret, chg; 1361 long ret, chg;
1295 1362
1296 chg = region_chg(&inode->i_mapping->private_list, from, to); 1363 /*
1364 * Shared mappings base their reservation on the number of pages that
1365 * are already allocated on behalf of the file. Private mappings need
1366 * to reserve the full area even if read-only as mprotect() may be
1367 * called to make the mapping read-write. Assume !vma is a shm mapping
1368 */
1369 if (!vma || vma->vm_flags & VM_SHARED)
1370 chg = region_chg(&inode->i_mapping->private_list, from, to);
1371 else {
1372 chg = to - from;
1373 set_vma_resv_huge_pages(vma, chg);
1374 }
1375
1297 if (chg < 0) 1376 if (chg < 0)
1298 return chg; 1377 return chg;
1299 1378
@@ -1304,7 +1383,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
1304 hugetlb_put_quota(inode->i_mapping, chg); 1383 hugetlb_put_quota(inode->i_mapping, chg);
1305 return ret; 1384 return ret;
1306 } 1385 }
1307 region_add(&inode->i_mapping->private_list, from, to); 1386 if (!vma || vma->vm_flags & VM_SHARED)
1387 region_add(&inode->i_mapping->private_list, from, to);
1308 return 0; 1388 return 0;
1309} 1389}
1310 1390