aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2008-07-24 00:27:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 13:47:17 -0400
commita5516438959d90b071ff0a484ce4f3f523dc3152 (patch)
treee356ba9364c76b93c176b4d4a262b7aca3ee8f91
parentb7ba30c679ed1eb7ed3ed8f281f6493282042bd4 (diff)
hugetlb: modular state for hugetlb page size
The goal of this patchset is to support multiple hugetlb page sizes. This is achieved by introducing a new struct hstate structure, which encapsulates the important hugetlb state and constants (eg. huge page size, number of huge pages currently allocated, etc). The hstate structure is then passed around the code which requires these fields, they will do the right thing regardless of the exact hstate they are operating on. This patch adds the hstate structure, with a single global instance of it (default_hstate), and does the basic work of converting hugetlb to use the hstate. Future patches will add more hstate structures to allow for different hugetlbfs mounts to have different page sizes. [akpm@linux-foundation.org: coding-style fixes] Acked-by: Adam Litke <agl@us.ibm.com> Acked-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/ia64/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/s390/mm/hugetlbpage.c3
-rw-r--r--arch/sh/mm/hugetlbpage.c3
-rw-r--r--arch/sparc64/mm/hugetlbpage.c5
-rw-r--r--arch/x86/mm/hugetlbpage.c5
-rw-r--r--fs/hugetlbfs/inode.c52
-rw-r--r--include/asm-ia64/hugetlb.h3
-rw-r--r--include/asm-powerpc/hugetlb.h3
-rw-r--r--include/asm-s390/hugetlb.h3
-rw-r--r--include/asm-sh/hugetlb.h3
-rw-r--r--include/asm-sparc/hugetlb.h3
-rw-r--r--include/asm-x86/hugetlb.h8
-rw-r--r--include/linux/hugetlb.h88
-rw-r--r--ipc/shm.c3
-rw-r--r--mm/hugetlb.c368
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/mmap.c3
19 files changed, 356 insertions, 218 deletions
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index cd49e2860eef..6170f097d255 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
24unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; 24unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
25 25
26pte_t * 26pte_t *
27huge_pte_alloc (struct mm_struct *mm, unsigned long addr) 27huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
28{ 28{
29 unsigned long taddr = htlbpage_to_page(addr); 29 unsigned long taddr = htlbpage_to_page(addr);
30 pgd_t *pgd; 30 pgd_t *pgd;
@@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
75 * Don't actually need to do any preparation, but need to make sure 75 * Don't actually need to do any preparation, but need to make sure
76 * the address is in the right region. 76 * the address is in the right region.
77 */ 77 */
78int prepare_hugepage_range(unsigned long addr, unsigned long len) 78int prepare_hugepage_range(struct file *file,
79 unsigned long addr, unsigned long len)
79{ 80{
80 if (len & ~HPAGE_MASK) 81 if (len & ~HPAGE_MASK)
81 return -EINVAL; 82 return -EINVAL;
@@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
149 150
150 /* Handle MAP_FIXED */ 151 /* Handle MAP_FIXED */
151 if (flags & MAP_FIXED) { 152 if (flags & MAP_FIXED) {
152 if (prepare_hugepage_range(addr, len)) 153 if (prepare_hugepage_range(file, addr, len))
153 return -EINVAL; 154 return -EINVAL;
154 return addr; 155 return addr;
155 } 156 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a96cc891cf5..c94dc71af989 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
128 return NULL; 128 return NULL;
129} 129}
130 130
131pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 131pte_t *huge_pte_alloc(struct mm_struct *mm,
132 unsigned long addr, unsigned long sz)
132{ 133{
133 pgd_t *pg; 134 pgd_t *pg;
134 pud_t *pu; 135 pud_t *pu;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f4b6124fdb75..9162dc84f77f 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page)
72 page[1].index = 0; 72 page[1].index = 0;
73} 73}
74 74
75pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 75pte_t *huge_pte_alloc(struct mm_struct *mm,
76 unsigned long addr, unsigned long sz)
76{ 77{
77 pgd_t *pgdp; 78 pgd_t *pgdp;
78 pud_t *pudp; 79 pud_t *pudp;
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index ae8c321d6e2a..2f9dbe0ef4ac 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -22,7 +22,8 @@
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
24 24
25pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 25pte_t *huge_pte_alloc(struct mm_struct *mm,
26 unsigned long addr, unsigned long sz)
26{ 27{
27 pgd_t *pgd; 28 pgd_t *pgd;
28 pud_t *pud; 29 pud_t *pud;
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index ebefd2a14375..1307b23f6a76 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
175 return -ENOMEM; 175 return -ENOMEM;
176 176
177 if (flags & MAP_FIXED) { 177 if (flags & MAP_FIXED) {
178 if (prepare_hugepage_range(addr, len)) 178 if (prepare_hugepage_range(file, addr, len))
179 return -EINVAL; 179 return -EINVAL;
180 return addr; 180 return addr;
181 } 181 }
@@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
195 pgoff, flags); 195 pgoff, flags);
196} 196}
197 197
198pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 198pte_t *huge_pte_alloc(struct mm_struct *mm,
199 unsigned long addr, unsigned long sz)
199{ 200{
200 pgd_t *pgd; 201 pgd_t *pgd;
201 pud_t *pud; 202 pud_t *pud;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 0b3d567e686d..52476fde8996 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
124 return 1; 124 return 1;
125} 125}
126 126
127pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 127pte_t *huge_pte_alloc(struct mm_struct *mm,
128 unsigned long addr, unsigned long sz)
128{ 129{
129 pgd_t *pgd; 130 pgd_t *pgd;
130 pud_t *pud; 131 pud_t *pud;
@@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
368 return -ENOMEM; 369 return -ENOMEM;
369 370
370 if (flags & MAP_FIXED) { 371 if (flags & MAP_FIXED) {
371 if (prepare_hugepage_range(addr, len)) 372 if (prepare_hugepage_range(file, addr, len))
372 return -EINVAL; 373 return -EINVAL;
373 return addr; 374 return addr;
374 } 375 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 428eff5b73f3..516c581b5371 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
80 struct inode *inode = file->f_path.dentry->d_inode; 80 struct inode *inode = file->f_path.dentry->d_inode;
81 loff_t len, vma_len; 81 loff_t len, vma_len;
82 int ret; 82 int ret;
83 struct hstate *h = hstate_file(file);
83 84
84 /* 85 /*
85 * vma address alignment (but not the pgoff alignment) has 86 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 93 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
93 vma->vm_ops = &hugetlb_vm_ops; 94 vma->vm_ops = &hugetlb_vm_ops;
94 95
95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 96 if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
96 return -EINVAL; 97 return -EINVAL;
97 98
98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 99 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 105 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 106
106 if (hugetlb_reserve_pages(inode, 107 if (hugetlb_reserve_pages(inode,
107 vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 108 vma->vm_pgoff >> huge_page_order(h),
108 len >> HPAGE_SHIFT, vma)) 109 len >> huge_page_shift(h), vma))
109 goto out; 110 goto out;
110 111
111 ret = 0; 112 ret = 0;
@@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
130 struct mm_struct *mm = current->mm; 131 struct mm_struct *mm = current->mm;
131 struct vm_area_struct *vma; 132 struct vm_area_struct *vma;
132 unsigned long start_addr; 133 unsigned long start_addr;
134 struct hstate *h = hstate_file(file);
133 135
134 if (len & ~HPAGE_MASK) 136 if (len & ~huge_page_mask(h))
135 return -EINVAL; 137 return -EINVAL;
136 if (len > TASK_SIZE) 138 if (len > TASK_SIZE)
137 return -ENOMEM; 139 return -ENOMEM;
138 140
139 if (flags & MAP_FIXED) { 141 if (flags & MAP_FIXED) {
140 if (prepare_hugepage_range(addr, len)) 142 if (prepare_hugepage_range(file, addr, len))
141 return -EINVAL; 143 return -EINVAL;
142 return addr; 144 return addr;
143 } 145 }
144 146
145 if (addr) { 147 if (addr) {
146 addr = ALIGN(addr, HPAGE_SIZE); 148 addr = ALIGN(addr, huge_page_size(h));
147 vma = find_vma(mm, addr); 149 vma = find_vma(mm, addr);
148 if (TASK_SIZE - len >= addr && 150 if (TASK_SIZE - len >= addr &&
149 (!vma || addr + len <= vma->vm_start)) 151 (!vma || addr + len <= vma->vm_start))
@@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
156 start_addr = TASK_UNMAPPED_BASE; 158 start_addr = TASK_UNMAPPED_BASE;
157 159
158full_search: 160full_search:
159 addr = ALIGN(start_addr, HPAGE_SIZE); 161 addr = ALIGN(start_addr, huge_page_size(h));
160 162
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 163 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */ 164 /* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +176,7 @@ full_search:
174 176
175 if (!vma || addr + len <= vma->vm_start) 177 if (!vma || addr + len <= vma->vm_start)
176 return addr; 178 return addr;
177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 179 addr = ALIGN(vma->vm_end, huge_page_size(h));
178 } 180 }
179} 181}
180#endif 182#endif
@@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 227static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos) 228 size_t len, loff_t *ppos)
227{ 229{
230 struct hstate *h = hstate_file(filp);
228 struct address_space *mapping = filp->f_mapping; 231 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host; 232 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT; 233 unsigned long index = *ppos >> huge_page_shift(h);
231 unsigned long offset = *ppos & ~HPAGE_MASK; 234 unsigned long offset = *ppos & ~huge_page_mask(h);
232 unsigned long end_index; 235 unsigned long end_index;
233 loff_t isize; 236 loff_t isize;
234 ssize_t retval = 0; 237 ssize_t retval = 0;
@@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
243 if (!isize) 246 if (!isize)
244 goto out; 247 goto out;
245 248
246 end_index = (isize - 1) >> HPAGE_SHIFT; 249 end_index = (isize - 1) >> huge_page_shift(h);
247 for (;;) { 250 for (;;) {
248 struct page *page; 251 struct page *page;
249 int nr, ret; 252 unsigned long nr, ret;
250 253
251 /* nr is the maximum number of bytes to copy from this page */ 254 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE; 255 nr = huge_page_size(h);
253 if (index >= end_index) { 256 if (index >= end_index) {
254 if (index > end_index) 257 if (index > end_index)
255 goto out; 258 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1; 259 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
257 if (nr <= offset) { 260 if (nr <= offset) {
258 goto out; 261 goto out;
259 } 262 }
@@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
287 offset += ret; 290 offset += ret;
288 retval += ret; 291 retval += ret;
289 len -= ret; 292 len -= ret;
290 index += offset >> HPAGE_SHIFT; 293 index += offset >> huge_page_shift(h);
291 offset &= ~HPAGE_MASK; 294 offset &= ~huge_page_mask(h);
292 295
293 if (page) 296 if (page)
294 page_cache_release(page); 297 page_cache_release(page);
@@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
298 break; 301 break;
299 } 302 }
300out: 303out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; 304 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
302 mutex_unlock(&inode->i_mutex); 305 mutex_unlock(&inode->i_mutex);
303 return retval; 306 return retval;
304} 307}
@@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page)
339 342
340static void truncate_hugepages(struct inode *inode, loff_t lstart) 343static void truncate_hugepages(struct inode *inode, loff_t lstart)
341{ 344{
345 struct hstate *h = hstate_inode(inode);
342 struct address_space *mapping = &inode->i_data; 346 struct address_space *mapping = &inode->i_data;
343 const pgoff_t start = lstart >> HPAGE_SHIFT; 347 const pgoff_t start = lstart >> huge_page_shift(h);
344 struct pagevec pvec; 348 struct pagevec pvec;
345 pgoff_t next; 349 pgoff_t next;
346 int i, freed = 0; 350 int i, freed = 0;
@@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
449{ 453{
450 pgoff_t pgoff; 454 pgoff_t pgoff;
451 struct address_space *mapping = inode->i_mapping; 455 struct address_space *mapping = inode->i_mapping;
456 struct hstate *h = hstate_inode(inode);
452 457
453 BUG_ON(offset & ~HPAGE_MASK); 458 BUG_ON(offset & ~huge_page_mask(h));
454 pgoff = offset >> PAGE_SHIFT; 459 pgoff = offset >> PAGE_SHIFT;
455 460
456 i_size_write(inode, offset); 461 i_size_write(inode, offset);
@@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
465static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 470static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 471{
467 struct inode *inode = dentry->d_inode; 472 struct inode *inode = dentry->d_inode;
473 struct hstate *h = hstate_inode(inode);
468 int error; 474 int error;
469 unsigned int ia_valid = attr->ia_valid; 475 unsigned int ia_valid = attr->ia_valid;
470 476
@@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
476 482
477 if (ia_valid & ATTR_SIZE) { 483 if (ia_valid & ATTR_SIZE) {
478 error = -EINVAL; 484 error = -EINVAL;
479 if (!(attr->ia_size & ~HPAGE_MASK)) 485 if (!(attr->ia_size & ~huge_page_mask(h)))
480 error = hugetlb_vmtruncate(inode, attr->ia_size); 486 error = hugetlb_vmtruncate(inode, attr->ia_size);
481 if (error) 487 if (error)
482 goto out; 488 goto out;
@@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
610static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 616static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
611{ 617{
612 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 618 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
619 struct hstate *h = hstate_inode(dentry->d_inode);
613 620
614 buf->f_type = HUGETLBFS_MAGIC; 621 buf->f_type = HUGETLBFS_MAGIC;
615 buf->f_bsize = HPAGE_SIZE; 622 buf->f_bsize = huge_page_size(h);
616 if (sbinfo) { 623 if (sbinfo) {
617 spin_lock(&sbinfo->stat_lock); 624 spin_lock(&sbinfo->stat_lock);
618 /* If no limits set, just report 0 for max/free/used 625 /* If no limits set, just report 0 for max/free/used
@@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 949 goto out_dentry;
943 950
944 error = -ENOMEM; 951 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL)) 952 if (hugetlb_reserve_pages(inode, 0,
953 size >> huge_page_shift(hstate_inode(inode)), NULL))
946 goto out_inode; 954 goto out_inode;
947 955
948 d_instantiate(dentry, inode); 956 d_instantiate(dentry, inode);
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h
index e9d1e5e2382d..da55c63728e0 100644
--- a/include/asm-ia64/hugetlb.h
+++ b/include/asm-ia64/hugetlb.h
@@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
8 unsigned long end, unsigned long floor, 8 unsigned long end, unsigned long floor,
9 unsigned long ceiling); 9 unsigned long ceiling);
10 10
11int prepare_hugepage_range(unsigned long addr, unsigned long len); 11int prepare_hugepage_range(struct file *file,
12 unsigned long addr, unsigned long len);
12 13
13static inline int is_hugepage_only_range(struct mm_struct *mm, 14static inline int is_hugepage_only_range(struct mm_struct *mm,
14 unsigned long addr, 15 unsigned long addr,
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index 0a37aa5ecaa5..ca37c4af27b1 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
21 * If the arch doesn't supply something else, assume that hugepage 21 * If the arch doesn't supply something else, assume that hugepage
22 * size aligned regions are ok without further preparation. 22 * size aligned regions are ok without further preparation.
23 */ 23 */
24static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 24static inline int prepare_hugepage_range(struct file *file,
25 unsigned long addr, unsigned long len)
25{ 26{
26 if (len & ~HPAGE_MASK) 27 if (len & ~HPAGE_MASK)
27 return -EINVAL; 28 return -EINVAL;
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h
index 600a776f8f75..670a1d1745d2 100644
--- a/include/asm-s390/hugetlb.h
+++ b/include/asm-s390/hugetlb.h
@@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
22 * If the arch doesn't supply something else, assume that hugepage 22 * If the arch doesn't supply something else, assume that hugepage
23 * size aligned regions are ok without further preparation. 23 * size aligned regions are ok without further preparation.
24 */ 24 */
25static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 25static inline int prepare_hugepage_range(struct file *file,
26 unsigned long addr, unsigned long len)
26{ 27{
27 if (len & ~HPAGE_MASK) 28 if (len & ~HPAGE_MASK)
28 return -EINVAL; 29 return -EINVAL;
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h
index fb30018938c7..967068fb79ac 100644
--- a/include/asm-sh/hugetlb.h
+++ b/include/asm-sh/hugetlb.h
@@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
14 * If the arch doesn't supply something else, assume that hugepage 14 * If the arch doesn't supply something else, assume that hugepage
15 * size aligned regions are ok without further preparation. 15 * size aligned regions are ok without further preparation.
16 */ 16 */
17static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 17static inline int prepare_hugepage_range(struct file *file,
18 unsigned long addr, unsigned long len)
18{ 19{
19 if (len & ~HPAGE_MASK) 20 if (len & ~HPAGE_MASK)
20 return -EINVAL; 21 return -EINVAL;
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h
index aeb92374ca3d..177061064ee6 100644
--- a/include/asm-sparc/hugetlb.h
+++ b/include/asm-sparc/hugetlb.h
@@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
22 * If the arch doesn't supply something else, assume that hugepage 22 * If the arch doesn't supply something else, assume that hugepage
23 * size aligned regions are ok without further preparation. 23 * size aligned regions are ok without further preparation.
24 */ 24 */
25static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 25static inline int prepare_hugepage_range(struct file *file,
26 unsigned long addr, unsigned long len)
26{ 27{
27 if (len & ~HPAGE_MASK) 28 if (len & ~HPAGE_MASK)
28 return -EINVAL; 29 return -EINVAL;
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 7eed6e0883bf..439a9acc132d 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
14 * If the arch doesn't supply something else, assume that hugepage 14 * If the arch doesn't supply something else, assume that hugepage
15 * size aligned regions are ok without further preparation. 15 * size aligned regions are ok without further preparation.
16 */ 16 */
17static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 17static inline int prepare_hugepage_range(struct file *file,
18 unsigned long addr, unsigned long len)
18{ 19{
19 if (len & ~HPAGE_MASK) 20 struct hstate *h = hstate_file(file);
21 if (len & ~huge_page_mask(h))
20 return -EINVAL; 22 return -EINVAL;
21 if (addr & ~HPAGE_MASK) 23 if (addr & ~huge_page_mask(h))
22 return -EINVAL; 24 return -EINVAL;
23 return 0; 25 return 0;
24} 26}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index abbc187193a1..ad2271e11f9b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -8,7 +8,6 @@
8#include <linux/mempolicy.h> 8#include <linux/mempolicy.h>
9#include <linux/shm.h> 9#include <linux/shm.h>
10#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
11#include <asm/hugetlb.h>
12 11
13struct ctl_table; 12struct ctl_table;
14 13
@@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group;
45 44
46/* arch callbacks */ 45/* arch callbacks */
47 46
48pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); 47pte_t *huge_pte_alloc(struct mm_struct *mm,
48 unsigned long addr, unsigned long sz);
49pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); 49pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
50int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); 50int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
51struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 51struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void)
80#define hugetlb_report_meminfo(buf) 0 80#define hugetlb_report_meminfo(buf) 0
81#define hugetlb_report_node_meminfo(n, buf) 0 81#define hugetlb_report_node_meminfo(n, buf) 0
82#define follow_huge_pmd(mm, addr, pmd, write) NULL 82#define follow_huge_pmd(mm, addr, pmd, write) NULL
83#define prepare_hugepage_range(addr,len) (-EINVAL) 83#define prepare_hugepage_range(file, addr, len) (-EINVAL)
84#define pmd_huge(x) 0 84#define pmd_huge(x) 0
85#define is_hugepage_only_range(mm, addr, len) 0 85#define is_hugepage_only_range(mm, addr, len) 0
86#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) 86#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
@@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t);
134int hugetlb_get_quota(struct address_space *mapping, long delta); 134int hugetlb_get_quota(struct address_space *mapping, long delta);
135void hugetlb_put_quota(struct address_space *mapping, long delta); 135void hugetlb_put_quota(struct address_space *mapping, long delta);
136 136
137#define BLOCKS_PER_HUGEPAGE (HPAGE_SIZE / 512)
138
139static inline int is_file_hugepages(struct file *file) 137static inline int is_file_hugepages(struct file *file)
140{ 138{
141 if (file->f_op == &hugetlbfs_file_operations) 139 if (file->f_op == &hugetlbfs_file_operations)
@@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
164 unsigned long flags); 162 unsigned long flags);
165#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 163#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
166 164
165#ifdef CONFIG_HUGETLB_PAGE
166
167/* Defines one hugetlb page size */
168struct hstate {
169 int hugetlb_next_nid;
170 unsigned int order;
171 unsigned long mask;
172 unsigned long max_huge_pages;
173 unsigned long nr_huge_pages;
174 unsigned long free_huge_pages;
175 unsigned long resv_huge_pages;
176 unsigned long surplus_huge_pages;
177 unsigned long nr_overcommit_huge_pages;
178 struct list_head hugepage_freelists[MAX_NUMNODES];
179 unsigned int nr_huge_pages_node[MAX_NUMNODES];
180 unsigned int free_huge_pages_node[MAX_NUMNODES];
181 unsigned int surplus_huge_pages_node[MAX_NUMNODES];
182};
183
184extern struct hstate default_hstate;
185
186static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
187{
188 return &default_hstate;
189}
190
191static inline struct hstate *hstate_file(struct file *f)
192{
193 return &default_hstate;
194}
195
196static inline struct hstate *hstate_inode(struct inode *i)
197{
198 return &default_hstate;
199}
200
201static inline unsigned long huge_page_size(struct hstate *h)
202{
203 return (unsigned long)PAGE_SIZE << h->order;
204}
205
206static inline unsigned long huge_page_mask(struct hstate *h)
207{
208 return h->mask;
209}
210
211static inline unsigned int huge_page_order(struct hstate *h)
212{
213 return h->order;
214}
215
216static inline unsigned huge_page_shift(struct hstate *h)
217{
218 return h->order + PAGE_SHIFT;
219}
220
221static inline unsigned int pages_per_huge_page(struct hstate *h)
222{
223 return 1 << h->order;
224}
225
226static inline unsigned int blocks_per_huge_page(struct hstate *h)
227{
228 return huge_page_size(h) / 512;
229}
230
231#include <asm/hugetlb.h>
232
233#else
234struct hstate {};
235#define hstate_file(f) NULL
236#define hstate_vma(v) NULL
237#define hstate_inode(i) NULL
238#define huge_page_size(h) PAGE_SIZE
239#define huge_page_mask(h) PAGE_MASK
240#define huge_page_order(h) 0
241#define huge_page_shift(h) PAGE_SHIFT
242#define pages_per_huge_page(h) 1
243#endif
244
167#endif /* _LINUX_HUGETLB_H */ 245#endif /* _LINUX_HUGETLB_H */
diff --git a/ipc/shm.c b/ipc/shm.c
index 790240cd067f..a726aebce7d7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
577 577
578 if (is_file_hugepages(shp->shm_file)) { 578 if (is_file_hugepages(shp->shm_file)) {
579 struct address_space *mapping = inode->i_mapping; 579 struct address_space *mapping = inode->i_mapping;
580 *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; 580 struct hstate *h = hstate_file(shp->shm_file);
581 *rss += pages_per_huge_page(h) * mapping->nrpages;
581 } else { 582 } else {
582 struct shmem_inode_info *info = SHMEM_I(inode); 583 struct shmem_inode_info *info = SHMEM_I(inode);
583 spin_lock(&info->lock); 584 spin_lock(&info->lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 32dff4290c66..0d8153e25f09 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,18 +22,12 @@
22#include "internal.h" 22#include "internal.h"
23 23
24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
25static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
26static unsigned long surplus_huge_pages;
27static unsigned long nr_overcommit_huge_pages;
28unsigned long max_huge_pages; 25unsigned long max_huge_pages;
29unsigned long sysctl_overcommit_huge_pages; 26unsigned long sysctl_overcommit_huge_pages;
30static struct list_head hugepage_freelists[MAX_NUMNODES];
31static unsigned int nr_huge_pages_node[MAX_NUMNODES];
32static unsigned int free_huge_pages_node[MAX_NUMNODES];
33static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
34static gfp_t htlb_alloc_mask = GFP_HIGHUSER; 27static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
35unsigned long hugepages_treat_as_movable; 28unsigned long hugepages_treat_as_movable;
36static int hugetlb_next_nid; 29
30struct hstate default_hstate;
37 31
38/* 32/*
39 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages 33 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -203,11 +197,11 @@ static long region_count(struct list_head *head, long f, long t)
203 * Convert the address within this vma to the page offset within 197 * Convert the address within this vma to the page offset within
204 * the mapping, in pagecache page units; huge pages here. 198 * the mapping, in pagecache page units; huge pages here.
205 */ 199 */
206static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma, 200static pgoff_t vma_hugecache_offset(struct hstate *h,
207 unsigned long address) 201 struct vm_area_struct *vma, unsigned long address)
208{ 202{
209 return ((address - vma->vm_start) >> HPAGE_SHIFT) + 203 return ((address - vma->vm_start) >> huge_page_shift(h)) +
210 (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 204 (vma->vm_pgoff >> huge_page_order(h));
211} 205}
212 206
213/* 207/*
@@ -309,20 +303,21 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
309} 303}
310 304
311/* Decrement the reserved pages in the hugepage pool by one */ 305/* Decrement the reserved pages in the hugepage pool by one */
312static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) 306static void decrement_hugepage_resv_vma(struct hstate *h,
307 struct vm_area_struct *vma)
313{ 308{
314 if (vma->vm_flags & VM_NORESERVE) 309 if (vma->vm_flags & VM_NORESERVE)
315 return; 310 return;
316 311
317 if (vma->vm_flags & VM_SHARED) { 312 if (vma->vm_flags & VM_SHARED) {
318 /* Shared mappings always use reserves */ 313 /* Shared mappings always use reserves */
319 resv_huge_pages--; 314 h->resv_huge_pages--;
320 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { 315 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
321 /* 316 /*
322 * Only the process that called mmap() has reserves for 317 * Only the process that called mmap() has reserves for
323 * private mappings. 318 * private mappings.
324 */ 319 */
325 resv_huge_pages--; 320 h->resv_huge_pages--;
326 } 321 }
327} 322}
328 323
@@ -344,12 +339,13 @@ static int vma_has_private_reserves(struct vm_area_struct *vma)
344 return 1; 339 return 1;
345} 340}
346 341
347static void clear_huge_page(struct page *page, unsigned long addr) 342static void clear_huge_page(struct page *page,
343 unsigned long addr, unsigned long sz)
348{ 344{
349 int i; 345 int i;
350 346
351 might_sleep(); 347 might_sleep();
352 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { 348 for (i = 0; i < sz/PAGE_SIZE; i++) {
353 cond_resched(); 349 cond_resched();
354 clear_user_highpage(page + i, addr + i * PAGE_SIZE); 350 clear_user_highpage(page + i, addr + i * PAGE_SIZE);
355 } 351 }
@@ -359,41 +355,43 @@ static void copy_huge_page(struct page *dst, struct page *src,
359 unsigned long addr, struct vm_area_struct *vma) 355 unsigned long addr, struct vm_area_struct *vma)
360{ 356{
361 int i; 357 int i;
358 struct hstate *h = hstate_vma(vma);
362 359
363 might_sleep(); 360 might_sleep();
364 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { 361 for (i = 0; i < pages_per_huge_page(h); i++) {
365 cond_resched(); 362 cond_resched();
366 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); 363 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
367 } 364 }
368} 365}
369 366
370static void enqueue_huge_page(struct page *page) 367static void enqueue_huge_page(struct hstate *h, struct page *page)
371{ 368{
372 int nid = page_to_nid(page); 369 int nid = page_to_nid(page);
373 list_add(&page->lru, &hugepage_freelists[nid]); 370 list_add(&page->lru, &h->hugepage_freelists[nid]);
374 free_huge_pages++; 371 h->free_huge_pages++;
375 free_huge_pages_node[nid]++; 372 h->free_huge_pages_node[nid]++;
376} 373}
377 374
378static struct page *dequeue_huge_page(void) 375static struct page *dequeue_huge_page(struct hstate *h)
379{ 376{
380 int nid; 377 int nid;
381 struct page *page = NULL; 378 struct page *page = NULL;
382 379
383 for (nid = 0; nid < MAX_NUMNODES; ++nid) { 380 for (nid = 0; nid < MAX_NUMNODES; ++nid) {
384 if (!list_empty(&hugepage_freelists[nid])) { 381 if (!list_empty(&h->hugepage_freelists[nid])) {
385 page = list_entry(hugepage_freelists[nid].next, 382 page = list_entry(h->hugepage_freelists[nid].next,
386 struct page, lru); 383 struct page, lru);
387 list_del(&page->lru); 384 list_del(&page->lru);
388 free_huge_pages--; 385 h->free_huge_pages--;
389 free_huge_pages_node[nid]--; 386 h->free_huge_pages_node[nid]--;
390 break; 387 break;
391 } 388 }
392 } 389 }
393 return page; 390 return page;
394} 391}
395 392
396static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, 393static struct page *dequeue_huge_page_vma(struct hstate *h,
394 struct vm_area_struct *vma,
397 unsigned long address, int avoid_reserve) 395 unsigned long address, int avoid_reserve)
398{ 396{
399 int nid; 397 int nid;
@@ -411,26 +409,26 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
411 * not "stolen". The child may still get SIGKILLed 409 * not "stolen". The child may still get SIGKILLed
412 */ 410 */
413 if (!vma_has_private_reserves(vma) && 411 if (!vma_has_private_reserves(vma) &&
414 free_huge_pages - resv_huge_pages == 0) 412 h->free_huge_pages - h->resv_huge_pages == 0)
415 return NULL; 413 return NULL;
416 414
417 /* If reserves cannot be used, ensure enough pages are in the pool */ 415 /* If reserves cannot be used, ensure enough pages are in the pool */
418 if (avoid_reserve && free_huge_pages - resv_huge_pages == 0) 416 if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
419 return NULL; 417 return NULL;
420 418
421 for_each_zone_zonelist_nodemask(zone, z, zonelist, 419 for_each_zone_zonelist_nodemask(zone, z, zonelist,
422 MAX_NR_ZONES - 1, nodemask) { 420 MAX_NR_ZONES - 1, nodemask) {
423 nid = zone_to_nid(zone); 421 nid = zone_to_nid(zone);
424 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && 422 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
425 !list_empty(&hugepage_freelists[nid])) { 423 !list_empty(&h->hugepage_freelists[nid])) {
426 page = list_entry(hugepage_freelists[nid].next, 424 page = list_entry(h->hugepage_freelists[nid].next,
427 struct page, lru); 425 struct page, lru);
428 list_del(&page->lru); 426 list_del(&page->lru);
429 free_huge_pages--; 427 h->free_huge_pages--;
430 free_huge_pages_node[nid]--; 428 h->free_huge_pages_node[nid]--;
431 429
432 if (!avoid_reserve) 430 if (!avoid_reserve)
433 decrement_hugepage_resv_vma(vma); 431 decrement_hugepage_resv_vma(h, vma);
434 432
435 break; 433 break;
436 } 434 }
@@ -439,12 +437,13 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
439 return page; 437 return page;
440} 438}
441 439
442static void update_and_free_page(struct page *page) 440static void update_and_free_page(struct hstate *h, struct page *page)
443{ 441{
444 int i; 442 int i;
445 nr_huge_pages--; 443
446 nr_huge_pages_node[page_to_nid(page)]--; 444 h->nr_huge_pages--;
447 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 445 h->nr_huge_pages_node[page_to_nid(page)]--;
446 for (i = 0; i < pages_per_huge_page(h); i++) {
448 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 447 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
449 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 448 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
450 1 << PG_private | 1<< PG_writeback); 449 1 << PG_private | 1<< PG_writeback);
@@ -452,11 +451,16 @@ static void update_and_free_page(struct page *page)
452 set_compound_page_dtor(page, NULL); 451 set_compound_page_dtor(page, NULL);
453 set_page_refcounted(page); 452 set_page_refcounted(page);
454 arch_release_hugepage(page); 453 arch_release_hugepage(page);
455 __free_pages(page, HUGETLB_PAGE_ORDER); 454 __free_pages(page, huge_page_order(h));
456} 455}
457 456
458static void free_huge_page(struct page *page) 457static void free_huge_page(struct page *page)
459{ 458{
459 /*
460 * Can't pass hstate in here because it is called from the
461 * compound page destructor.
462 */
463 struct hstate *h = &default_hstate;
460 int nid = page_to_nid(page); 464 int nid = page_to_nid(page);
461 struct address_space *mapping; 465 struct address_space *mapping;
462 466
@@ -466,12 +470,12 @@ static void free_huge_page(struct page *page)
466 INIT_LIST_HEAD(&page->lru); 470 INIT_LIST_HEAD(&page->lru);
467 471
468 spin_lock(&hugetlb_lock); 472 spin_lock(&hugetlb_lock);
469 if (surplus_huge_pages_node[nid]) { 473 if (h->surplus_huge_pages_node[nid]) {
470 update_and_free_page(page); 474 update_and_free_page(h, page);
471 surplus_huge_pages--; 475 h->surplus_huge_pages--;
472 surplus_huge_pages_node[nid]--; 476 h->surplus_huge_pages_node[nid]--;
473 } else { 477 } else {
474 enqueue_huge_page(page); 478 enqueue_huge_page(h, page);
475 } 479 }
476 spin_unlock(&hugetlb_lock); 480 spin_unlock(&hugetlb_lock);
477 if (mapping) 481 if (mapping)
@@ -483,7 +487,7 @@ static void free_huge_page(struct page *page)
483 * balanced by operating on them in a round-robin fashion. 487 * balanced by operating on them in a round-robin fashion.
484 * Returns 1 if an adjustment was made. 488 * Returns 1 if an adjustment was made.
485 */ 489 */
486static int adjust_pool_surplus(int delta) 490static int adjust_pool_surplus(struct hstate *h, int delta)
487{ 491{
488 static int prev_nid; 492 static int prev_nid;
489 int nid = prev_nid; 493 int nid = prev_nid;
@@ -496,15 +500,15 @@ static int adjust_pool_surplus(int delta)
496 nid = first_node(node_online_map); 500 nid = first_node(node_online_map);
497 501
498 /* To shrink on this node, there must be a surplus page */ 502 /* To shrink on this node, there must be a surplus page */
499 if (delta < 0 && !surplus_huge_pages_node[nid]) 503 if (delta < 0 && !h->surplus_huge_pages_node[nid])
500 continue; 504 continue;
501 /* Surplus cannot exceed the total number of pages */ 505 /* Surplus cannot exceed the total number of pages */
502 if (delta > 0 && surplus_huge_pages_node[nid] >= 506 if (delta > 0 && h->surplus_huge_pages_node[nid] >=
503 nr_huge_pages_node[nid]) 507 h->nr_huge_pages_node[nid])
504 continue; 508 continue;
505 509
506 surplus_huge_pages += delta; 510 h->surplus_huge_pages += delta;
507 surplus_huge_pages_node[nid] += delta; 511 h->surplus_huge_pages_node[nid] += delta;
508 ret = 1; 512 ret = 1;
509 break; 513 break;
510 } while (nid != prev_nid); 514 } while (nid != prev_nid);
@@ -513,46 +517,46 @@ static int adjust_pool_surplus(int delta)
513 return ret; 517 return ret;
514} 518}
515 519
516static void prep_new_huge_page(struct page *page, int nid) 520static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
517{ 521{
518 set_compound_page_dtor(page, free_huge_page); 522 set_compound_page_dtor(page, free_huge_page);
519 spin_lock(&hugetlb_lock); 523 spin_lock(&hugetlb_lock);
520 nr_huge_pages++; 524 h->nr_huge_pages++;
521 nr_huge_pages_node[nid]++; 525 h->nr_huge_pages_node[nid]++;
522 spin_unlock(&hugetlb_lock); 526 spin_unlock(&hugetlb_lock);
523 put_page(page); /* free it into the hugepage allocator */ 527 put_page(page); /* free it into the hugepage allocator */
524} 528}
525 529
526static struct page *alloc_fresh_huge_page_node(int nid) 530static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
527{ 531{
528 struct page *page; 532 struct page *page;
529 533
530 page = alloc_pages_node(nid, 534 page = alloc_pages_node(nid,
531 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| 535 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
532 __GFP_REPEAT|__GFP_NOWARN, 536 __GFP_REPEAT|__GFP_NOWARN,
533 HUGETLB_PAGE_ORDER); 537 huge_page_order(h));
534 if (page) { 538 if (page) {
535 if (arch_prepare_hugepage(page)) { 539 if (arch_prepare_hugepage(page)) {
536 __free_pages(page, HUGETLB_PAGE_ORDER); 540 __free_pages(page, HUGETLB_PAGE_ORDER);
537 return NULL; 541 return NULL;
538 } 542 }
539 prep_new_huge_page(page, nid); 543 prep_new_huge_page(h, page, nid);
540 } 544 }
541 545
542 return page; 546 return page;
543} 547}
544 548
545static int alloc_fresh_huge_page(void) 549static int alloc_fresh_huge_page(struct hstate *h)
546{ 550{
547 struct page *page; 551 struct page *page;
548 int start_nid; 552 int start_nid;
549 int next_nid; 553 int next_nid;
550 int ret = 0; 554 int ret = 0;
551 555
552 start_nid = hugetlb_next_nid; 556 start_nid = h->hugetlb_next_nid;
553 557
554 do { 558 do {
555 page = alloc_fresh_huge_page_node(hugetlb_next_nid); 559 page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
556 if (page) 560 if (page)
557 ret = 1; 561 ret = 1;
558 /* 562 /*
@@ -566,11 +570,11 @@ static int alloc_fresh_huge_page(void)
566 * if we just successfully allocated a hugepage so that 570 * if we just successfully allocated a hugepage so that
567 * the next caller gets hugepages on the next node. 571 * the next caller gets hugepages on the next node.
568 */ 572 */
569 next_nid = next_node(hugetlb_next_nid, node_online_map); 573 next_nid = next_node(h->hugetlb_next_nid, node_online_map);
570 if (next_nid == MAX_NUMNODES) 574 if (next_nid == MAX_NUMNODES)
571 next_nid = first_node(node_online_map); 575 next_nid = first_node(node_online_map);
572 hugetlb_next_nid = next_nid; 576 h->hugetlb_next_nid = next_nid;
573 } while (!page && hugetlb_next_nid != start_nid); 577 } while (!page && h->hugetlb_next_nid != start_nid);
574 578
575 if (ret) 579 if (ret)
576 count_vm_event(HTLB_BUDDY_PGALLOC); 580 count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -580,8 +584,8 @@ static int alloc_fresh_huge_page(void)
580 return ret; 584 return ret;
581} 585}
582 586
583static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, 587static struct page *alloc_buddy_huge_page(struct hstate *h,
584 unsigned long address) 588 struct vm_area_struct *vma, unsigned long address)
585{ 589{
586 struct page *page; 590 struct page *page;
587 unsigned int nid; 591 unsigned int nid;
@@ -610,18 +614,18 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
610 * per-node value is checked there. 614 * per-node value is checked there.
611 */ 615 */
612 spin_lock(&hugetlb_lock); 616 spin_lock(&hugetlb_lock);
613 if (surplus_huge_pages >= nr_overcommit_huge_pages) { 617 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
614 spin_unlock(&hugetlb_lock); 618 spin_unlock(&hugetlb_lock);
615 return NULL; 619 return NULL;
616 } else { 620 } else {
617 nr_huge_pages++; 621 h->nr_huge_pages++;
618 surplus_huge_pages++; 622 h->surplus_huge_pages++;
619 } 623 }
620 spin_unlock(&hugetlb_lock); 624 spin_unlock(&hugetlb_lock);
621 625
622 page = alloc_pages(htlb_alloc_mask|__GFP_COMP| 626 page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
623 __GFP_REPEAT|__GFP_NOWARN, 627 __GFP_REPEAT|__GFP_NOWARN,
624 HUGETLB_PAGE_ORDER); 628 huge_page_order(h));
625 629
626 spin_lock(&hugetlb_lock); 630 spin_lock(&hugetlb_lock);
627 if (page) { 631 if (page) {
@@ -636,12 +640,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
636 /* 640 /*
637 * We incremented the global counters already 641 * We incremented the global counters already
638 */ 642 */
639 nr_huge_pages_node[nid]++; 643 h->nr_huge_pages_node[nid]++;
640 surplus_huge_pages_node[nid]++; 644 h->surplus_huge_pages_node[nid]++;
641 __count_vm_event(HTLB_BUDDY_PGALLOC); 645 __count_vm_event(HTLB_BUDDY_PGALLOC);
642 } else { 646 } else {
643 nr_huge_pages--; 647 h->nr_huge_pages--;
644 surplus_huge_pages--; 648 h->surplus_huge_pages--;
645 __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); 649 __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
646 } 650 }
647 spin_unlock(&hugetlb_lock); 651 spin_unlock(&hugetlb_lock);
@@ -653,16 +657,16 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
653 * Increase the hugetlb pool such that it can accomodate a reservation 657 * Increase the hugetlb pool such that it can accomodate a reservation
654 * of size 'delta'. 658 * of size 'delta'.
655 */ 659 */
656static int gather_surplus_pages(int delta) 660static int gather_surplus_pages(struct hstate *h, int delta)
657{ 661{
658 struct list_head surplus_list; 662 struct list_head surplus_list;
659 struct page *page, *tmp; 663 struct page *page, *tmp;
660 int ret, i; 664 int ret, i;
661 int needed, allocated; 665 int needed, allocated;
662 666
663 needed = (resv_huge_pages + delta) - free_huge_pages; 667 needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
664 if (needed <= 0) { 668 if (needed <= 0) {
665 resv_huge_pages += delta; 669 h->resv_huge_pages += delta;
666 return 0; 670 return 0;
667 } 671 }
668 672
@@ -673,7 +677,7 @@ static int gather_surplus_pages(int delta)
673retry: 677retry:
674 spin_unlock(&hugetlb_lock); 678 spin_unlock(&hugetlb_lock);
675 for (i = 0; i < needed; i++) { 679 for (i = 0; i < needed; i++) {
676 page = alloc_buddy_huge_page(NULL, 0); 680 page = alloc_buddy_huge_page(h, NULL, 0);
677 if (!page) { 681 if (!page) {
678 /* 682 /*
679 * We were not able to allocate enough pages to 683 * We were not able to allocate enough pages to
@@ -694,7 +698,8 @@ retry:
694 * because either resv_huge_pages or free_huge_pages may have changed. 698 * because either resv_huge_pages or free_huge_pages may have changed.
695 */ 699 */
696 spin_lock(&hugetlb_lock); 700 spin_lock(&hugetlb_lock);
697 needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); 701 needed = (h->resv_huge_pages + delta) -
702 (h->free_huge_pages + allocated);
698 if (needed > 0) 703 if (needed > 0)
699 goto retry; 704 goto retry;
700 705
@@ -707,7 +712,7 @@ retry:
707 * before they are reserved. 712 * before they are reserved.
708 */ 713 */
709 needed += allocated; 714 needed += allocated;
710 resv_huge_pages += delta; 715 h->resv_huge_pages += delta;
711 ret = 0; 716 ret = 0;
712free: 717free:
713 /* Free the needed pages to the hugetlb pool */ 718 /* Free the needed pages to the hugetlb pool */
@@ -715,7 +720,7 @@ free:
715 if ((--needed) < 0) 720 if ((--needed) < 0)
716 break; 721 break;
717 list_del(&page->lru); 722 list_del(&page->lru);
718 enqueue_huge_page(page); 723 enqueue_huge_page(h, page);
719 } 724 }
720 725
721 /* Free unnecessary surplus pages to the buddy allocator */ 726 /* Free unnecessary surplus pages to the buddy allocator */
@@ -743,7 +748,8 @@ free:
743 * allocated to satisfy the reservation must be explicitly freed if they were 748 * allocated to satisfy the reservation must be explicitly freed if they were
744 * never used. 749 * never used.
745 */ 750 */
746static void return_unused_surplus_pages(unsigned long unused_resv_pages) 751static void return_unused_surplus_pages(struct hstate *h,
752 unsigned long unused_resv_pages)
747{ 753{
748 static int nid = -1; 754 static int nid = -1;
749 struct page *page; 755 struct page *page;
@@ -758,27 +764,27 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
758 unsigned long remaining_iterations = num_online_nodes(); 764 unsigned long remaining_iterations = num_online_nodes();
759 765
760 /* Uncommit the reservation */ 766 /* Uncommit the reservation */
761 resv_huge_pages -= unused_resv_pages; 767 h->resv_huge_pages -= unused_resv_pages;
762 768
763 nr_pages = min(unused_resv_pages, surplus_huge_pages); 769 nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
764 770
765 while (remaining_iterations-- && nr_pages) { 771 while (remaining_iterations-- && nr_pages) {
766 nid = next_node(nid, node_online_map); 772 nid = next_node(nid, node_online_map);
767 if (nid == MAX_NUMNODES) 773 if (nid == MAX_NUMNODES)
768 nid = first_node(node_online_map); 774 nid = first_node(node_online_map);
769 775
770 if (!surplus_huge_pages_node[nid]) 776 if (!h->surplus_huge_pages_node[nid])
771 continue; 777 continue;
772 778
773 if (!list_empty(&hugepage_freelists[nid])) { 779 if (!list_empty(&h->hugepage_freelists[nid])) {
774 page = list_entry(hugepage_freelists[nid].next, 780 page = list_entry(h->hugepage_freelists[nid].next,
775 struct page, lru); 781 struct page, lru);
776 list_del(&page->lru); 782 list_del(&page->lru);
777 update_and_free_page(page); 783 update_and_free_page(h, page);
778 free_huge_pages--; 784 h->free_huge_pages--;
779 free_huge_pages_node[nid]--; 785 h->free_huge_pages_node[nid]--;
780 surplus_huge_pages--; 786 h->surplus_huge_pages--;
781 surplus_huge_pages_node[nid]--; 787 h->surplus_huge_pages_node[nid]--;
782 nr_pages--; 788 nr_pages--;
783 remaining_iterations = num_online_nodes(); 789 remaining_iterations = num_online_nodes();
784 } 790 }
@@ -794,13 +800,14 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
794 * an instantiated the change should be committed via vma_commit_reservation. 800 * an instantiated the change should be committed via vma_commit_reservation.
795 * No action is required on failure. 801 * No action is required on failure.
796 */ 802 */
797static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) 803static int vma_needs_reservation(struct hstate *h,
804 struct vm_area_struct *vma, unsigned long addr)
798{ 805{
799 struct address_space *mapping = vma->vm_file->f_mapping; 806 struct address_space *mapping = vma->vm_file->f_mapping;
800 struct inode *inode = mapping->host; 807 struct inode *inode = mapping->host;
801 808
802 if (vma->vm_flags & VM_SHARED) { 809 if (vma->vm_flags & VM_SHARED) {
803 pgoff_t idx = vma_hugecache_offset(vma, addr); 810 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
804 return region_chg(&inode->i_mapping->private_list, 811 return region_chg(&inode->i_mapping->private_list,
805 idx, idx + 1); 812 idx, idx + 1);
806 813
@@ -809,7 +816,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
809 816
810 } else { 817 } else {
811 int err; 818 int err;
812 pgoff_t idx = vma_hugecache_offset(vma, addr); 819 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
813 struct resv_map *reservations = vma_resv_map(vma); 820 struct resv_map *reservations = vma_resv_map(vma);
814 821
815 err = region_chg(&reservations->regions, idx, idx + 1); 822 err = region_chg(&reservations->regions, idx, idx + 1);
@@ -818,18 +825,18 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
818 return 0; 825 return 0;
819 } 826 }
820} 827}
821static void vma_commit_reservation(struct vm_area_struct *vma, 828static void vma_commit_reservation(struct hstate *h,
822 unsigned long addr) 829 struct vm_area_struct *vma, unsigned long addr)
823{ 830{
824 struct address_space *mapping = vma->vm_file->f_mapping; 831 struct address_space *mapping = vma->vm_file->f_mapping;
825 struct inode *inode = mapping->host; 832 struct inode *inode = mapping->host;
826 833
827 if (vma->vm_flags & VM_SHARED) { 834 if (vma->vm_flags & VM_SHARED) {
828 pgoff_t idx = vma_hugecache_offset(vma, addr); 835 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
829 region_add(&inode->i_mapping->private_list, idx, idx + 1); 836 region_add(&inode->i_mapping->private_list, idx, idx + 1);
830 837
831 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { 838 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
832 pgoff_t idx = vma_hugecache_offset(vma, addr); 839 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
833 struct resv_map *reservations = vma_resv_map(vma); 840 struct resv_map *reservations = vma_resv_map(vma);
834 841
835 /* Mark this page used in the map. */ 842 /* Mark this page used in the map. */
@@ -840,6 +847,7 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
840static struct page *alloc_huge_page(struct vm_area_struct *vma, 847static struct page *alloc_huge_page(struct vm_area_struct *vma,
841 unsigned long addr, int avoid_reserve) 848 unsigned long addr, int avoid_reserve)
842{ 849{
850 struct hstate *h = hstate_vma(vma);
843 struct page *page; 851 struct page *page;
844 struct address_space *mapping = vma->vm_file->f_mapping; 852 struct address_space *mapping = vma->vm_file->f_mapping;
845 struct inode *inode = mapping->host; 853 struct inode *inode = mapping->host;
@@ -852,7 +860,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
852 * MAP_NORESERVE mappings may also need pages and quota allocated 860 * MAP_NORESERVE mappings may also need pages and quota allocated
853 * if no reserve mapping overlaps. 861 * if no reserve mapping overlaps.
854 */ 862 */
855 chg = vma_needs_reservation(vma, addr); 863 chg = vma_needs_reservation(h, vma, addr);
856 if (chg < 0) 864 if (chg < 0)
857 return ERR_PTR(chg); 865 return ERR_PTR(chg);
858 if (chg) 866 if (chg)
@@ -860,11 +868,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
860 return ERR_PTR(-ENOSPC); 868 return ERR_PTR(-ENOSPC);
861 869
862 spin_lock(&hugetlb_lock); 870 spin_lock(&hugetlb_lock);
863 page = dequeue_huge_page_vma(vma, addr, avoid_reserve); 871 page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
864 spin_unlock(&hugetlb_lock); 872 spin_unlock(&hugetlb_lock);
865 873
866 if (!page) { 874 if (!page) {
867 page = alloc_buddy_huge_page(vma, addr); 875 page = alloc_buddy_huge_page(h, vma, addr);
868 if (!page) { 876 if (!page) {
869 hugetlb_put_quota(inode->i_mapping, chg); 877 hugetlb_put_quota(inode->i_mapping, chg);
870 return ERR_PTR(-VM_FAULT_OOM); 878 return ERR_PTR(-VM_FAULT_OOM);
@@ -874,7 +882,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
874 set_page_refcounted(page); 882 set_page_refcounted(page);
875 set_page_private(page, (unsigned long) mapping); 883 set_page_private(page, (unsigned long) mapping);
876 884
877 vma_commit_reservation(vma, addr); 885 vma_commit_reservation(h, vma, addr);
878 886
879 return page; 887 return page;
880} 888}
@@ -882,21 +890,28 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
882static int __init hugetlb_init(void) 890static int __init hugetlb_init(void)
883{ 891{
884 unsigned long i; 892 unsigned long i;
893 struct hstate *h = &default_hstate;
885 894
886 if (HPAGE_SHIFT == 0) 895 if (HPAGE_SHIFT == 0)
887 return 0; 896 return 0;
888 897
898 if (!h->order) {
899 h->order = HPAGE_SHIFT - PAGE_SHIFT;
900 h->mask = HPAGE_MASK;
901 }
902
889 for (i = 0; i < MAX_NUMNODES; ++i) 903 for (i = 0; i < MAX_NUMNODES; ++i)
890 INIT_LIST_HEAD(&hugepage_freelists[i]); 904 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
891 905
892 hugetlb_next_nid = first_node(node_online_map); 906 h->hugetlb_next_nid = first_node(node_online_map);
893 907
894 for (i = 0; i < max_huge_pages; ++i) { 908 for (i = 0; i < max_huge_pages; ++i) {
895 if (!alloc_fresh_huge_page()) 909 if (!alloc_fresh_huge_page(h))
896 break; 910 break;
897 } 911 }
898 max_huge_pages = free_huge_pages = nr_huge_pages = i; 912 max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
899 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); 913 printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
914 h->free_huge_pages);
900 return 0; 915 return 0;
901} 916}
902module_init(hugetlb_init); 917module_init(hugetlb_init);
@@ -922,34 +937,36 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
922 937
923#ifdef CONFIG_SYSCTL 938#ifdef CONFIG_SYSCTL
924#ifdef CONFIG_HIGHMEM 939#ifdef CONFIG_HIGHMEM
925static void try_to_free_low(unsigned long count) 940static void try_to_free_low(struct hstate *h, unsigned long count)
926{ 941{
927 int i; 942 int i;
928 943
929 for (i = 0; i < MAX_NUMNODES; ++i) { 944 for (i = 0; i < MAX_NUMNODES; ++i) {
930 struct page *page, *next; 945 struct page *page, *next;
931 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 946 struct list_head *freel = &h->hugepage_freelists[i];
932 if (count >= nr_huge_pages) 947 list_for_each_entry_safe(page, next, freel, lru) {
948 if (count >= h->nr_huge_pages)
933 return; 949 return;
934 if (PageHighMem(page)) 950 if (PageHighMem(page))
935 continue; 951 continue;
936 list_del(&page->lru); 952 list_del(&page->lru);
937 update_and_free_page(page); 953 update_and_free_page(page);
938 free_huge_pages--; 954 h->free_huge_pages--;
939 free_huge_pages_node[page_to_nid(page)]--; 955 h->free_huge_pages_node[page_to_nid(page)]--;
940 } 956 }
941 } 957 }
942} 958}
943#else 959#else
944static inline void try_to_free_low(unsigned long count) 960static inline void try_to_free_low(struct hstate *h, unsigned long count)
945{ 961{
946} 962}
947#endif 963#endif
948 964
949#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages) 965#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
950static unsigned long set_max_huge_pages(unsigned long count) 966static unsigned long set_max_huge_pages(unsigned long count)
951{ 967{
952 unsigned long min_count, ret; 968 unsigned long min_count, ret;
969 struct hstate *h = &default_hstate;
953 970
954 /* 971 /*
955 * Increase the pool size 972 * Increase the pool size
@@ -963,19 +980,19 @@ static unsigned long set_max_huge_pages(unsigned long count)
963 * within all the constraints specified by the sysctls. 980 * within all the constraints specified by the sysctls.
964 */ 981 */
965 spin_lock(&hugetlb_lock); 982 spin_lock(&hugetlb_lock);
966 while (surplus_huge_pages && count > persistent_huge_pages) { 983 while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
967 if (!adjust_pool_surplus(-1)) 984 if (!adjust_pool_surplus(h, -1))
968 break; 985 break;
969 } 986 }
970 987
971 while (count > persistent_huge_pages) { 988 while (count > persistent_huge_pages(h)) {
972 /* 989 /*
973 * If this allocation races such that we no longer need the 990 * If this allocation races such that we no longer need the
974 * page, free_huge_page will handle it by freeing the page 991 * page, free_huge_page will handle it by freeing the page
975 * and reducing the surplus. 992 * and reducing the surplus.
976 */ 993 */
977 spin_unlock(&hugetlb_lock); 994 spin_unlock(&hugetlb_lock);
978 ret = alloc_fresh_huge_page(); 995 ret = alloc_fresh_huge_page(h);
979 spin_lock(&hugetlb_lock); 996 spin_lock(&hugetlb_lock);
980 if (!ret) 997 if (!ret)
981 goto out; 998 goto out;
@@ -997,21 +1014,21 @@ static unsigned long set_max_huge_pages(unsigned long count)
997 * and won't grow the pool anywhere else. Not until one of the 1014 * and won't grow the pool anywhere else. Not until one of the
998 * sysctls are changed, or the surplus pages go out of use. 1015 * sysctls are changed, or the surplus pages go out of use.
999 */ 1016 */
1000 min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; 1017 min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
1001 min_count = max(count, min_count); 1018 min_count = max(count, min_count);
1002 try_to_free_low(min_count); 1019 try_to_free_low(h, min_count);
1003 while (min_count < persistent_huge_pages) { 1020 while (min_count < persistent_huge_pages(h)) {
1004 struct page *page = dequeue_huge_page(); 1021 struct page *page = dequeue_huge_page(h);
1005 if (!page) 1022 if (!page)
1006 break; 1023 break;
1007 update_and_free_page(page); 1024 update_and_free_page(h, page);
1008 } 1025 }
1009 while (count < persistent_huge_pages) { 1026 while (count < persistent_huge_pages(h)) {
1010 if (!adjust_pool_surplus(1)) 1027 if (!adjust_pool_surplus(h, 1))
1011 break; 1028 break;
1012 } 1029 }
1013out: 1030out:
1014 ret = persistent_huge_pages; 1031 ret = persistent_huge_pages(h);
1015 spin_unlock(&hugetlb_lock); 1032 spin_unlock(&hugetlb_lock);
1016 return ret; 1033 return ret;
1017} 1034}
@@ -1041,9 +1058,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1041 struct file *file, void __user *buffer, 1058 struct file *file, void __user *buffer,
1042 size_t *length, loff_t *ppos) 1059 size_t *length, loff_t *ppos)
1043{ 1060{
1061 struct hstate *h = &default_hstate;
1044 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 1062 proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
1045 spin_lock(&hugetlb_lock); 1063 spin_lock(&hugetlb_lock);
1046 nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; 1064 h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
1047 spin_unlock(&hugetlb_lock); 1065 spin_unlock(&hugetlb_lock);
1048 return 0; 1066 return 0;
1049} 1067}
@@ -1052,37 +1070,40 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1052 1070
1053int hugetlb_report_meminfo(char *buf) 1071int hugetlb_report_meminfo(char *buf)
1054{ 1072{
1073 struct hstate *h = &default_hstate;
1055 return sprintf(buf, 1074 return sprintf(buf,
1056 "HugePages_Total: %5lu\n" 1075 "HugePages_Total: %5lu\n"
1057 "HugePages_Free: %5lu\n" 1076 "HugePages_Free: %5lu\n"
1058 "HugePages_Rsvd: %5lu\n" 1077 "HugePages_Rsvd: %5lu\n"
1059 "HugePages_Surp: %5lu\n" 1078 "HugePages_Surp: %5lu\n"
1060 "Hugepagesize: %5lu kB\n", 1079 "Hugepagesize: %5lu kB\n",
1061 nr_huge_pages, 1080 h->nr_huge_pages,
1062 free_huge_pages, 1081 h->free_huge_pages,
1063 resv_huge_pages, 1082 h->resv_huge_pages,
1064 surplus_huge_pages, 1083 h->surplus_huge_pages,
1065 HPAGE_SIZE/1024); 1084 1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
1066} 1085}
1067 1086
1068int hugetlb_report_node_meminfo(int nid, char *buf) 1087int hugetlb_report_node_meminfo(int nid, char *buf)
1069{ 1088{
1089 struct hstate *h = &default_hstate;
1070 return sprintf(buf, 1090 return sprintf(buf,
1071 "Node %d HugePages_Total: %5u\n" 1091 "Node %d HugePages_Total: %5u\n"
1072 "Node %d HugePages_Free: %5u\n" 1092 "Node %d HugePages_Free: %5u\n"
1073 "Node %d HugePages_Surp: %5u\n", 1093 "Node %d HugePages_Surp: %5u\n",
1074 nid, nr_huge_pages_node[nid], 1094 nid, h->nr_huge_pages_node[nid],
1075 nid, free_huge_pages_node[nid], 1095 nid, h->free_huge_pages_node[nid],
1076 nid, surplus_huge_pages_node[nid]); 1096 nid, h->surplus_huge_pages_node[nid]);
1077} 1097}
1078 1098
1079/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 1099/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
1080unsigned long hugetlb_total_pages(void) 1100unsigned long hugetlb_total_pages(void)
1081{ 1101{
1082 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 1102 struct hstate *h = &default_hstate;
1103 return h->nr_huge_pages * pages_per_huge_page(h);
1083} 1104}
1084 1105
1085static int hugetlb_acct_memory(long delta) 1106static int hugetlb_acct_memory(struct hstate *h, long delta)
1086{ 1107{
1087 int ret = -ENOMEM; 1108 int ret = -ENOMEM;
1088 1109
@@ -1105,18 +1126,18 @@ static int hugetlb_acct_memory(long delta)
1105 * semantics that cpuset has. 1126 * semantics that cpuset has.
1106 */ 1127 */
1107 if (delta > 0) { 1128 if (delta > 0) {
1108 if (gather_surplus_pages(delta) < 0) 1129 if (gather_surplus_pages(h, delta) < 0)
1109 goto out; 1130 goto out;
1110 1131
1111 if (delta > cpuset_mems_nr(free_huge_pages_node)) { 1132 if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
1112 return_unused_surplus_pages(delta); 1133 return_unused_surplus_pages(h, delta);
1113 goto out; 1134 goto out;
1114 } 1135 }
1115 } 1136 }
1116 1137
1117 ret = 0; 1138 ret = 0;
1118 if (delta < 0) 1139 if (delta < 0)
1119 return_unused_surplus_pages((unsigned long) -delta); 1140 return_unused_surplus_pages(h, (unsigned long) -delta);
1120 1141
1121out: 1142out:
1122 spin_unlock(&hugetlb_lock); 1143 spin_unlock(&hugetlb_lock);
@@ -1141,14 +1162,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
1141 1162
1142static void hugetlb_vm_op_close(struct vm_area_struct *vma) 1163static void hugetlb_vm_op_close(struct vm_area_struct *vma)
1143{ 1164{
1165 struct hstate *h = hstate_vma(vma);
1144 struct resv_map *reservations = vma_resv_map(vma); 1166 struct resv_map *reservations = vma_resv_map(vma);
1145 unsigned long reserve; 1167 unsigned long reserve;
1146 unsigned long start; 1168 unsigned long start;
1147 unsigned long end; 1169 unsigned long end;
1148 1170
1149 if (reservations) { 1171 if (reservations) {
1150 start = vma_hugecache_offset(vma, vma->vm_start); 1172 start = vma_hugecache_offset(h, vma, vma->vm_start);
1151 end = vma_hugecache_offset(vma, vma->vm_end); 1173 end = vma_hugecache_offset(h, vma, vma->vm_end);
1152 1174
1153 reserve = (end - start) - 1175 reserve = (end - start) -
1154 region_count(&reservations->regions, start, end); 1176 region_count(&reservations->regions, start, end);
@@ -1156,7 +1178,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
1156 kref_put(&reservations->refs, resv_map_release); 1178 kref_put(&reservations->refs, resv_map_release);
1157 1179
1158 if (reserve) 1180 if (reserve)
1159 hugetlb_acct_memory(-reserve); 1181 hugetlb_acct_memory(h, -reserve);
1160 } 1182 }
1161} 1183}
1162 1184
@@ -1214,14 +1236,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
1214 struct page *ptepage; 1236 struct page *ptepage;
1215 unsigned long addr; 1237 unsigned long addr;
1216 int cow; 1238 int cow;
1239 struct hstate *h = hstate_vma(vma);
1240 unsigned long sz = huge_page_size(h);
1217 1241
1218 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 1242 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
1219 1243
1220 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { 1244 for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
1221 src_pte = huge_pte_offset(src, addr); 1245 src_pte = huge_pte_offset(src, addr);
1222 if (!src_pte) 1246 if (!src_pte)
1223 continue; 1247 continue;
1224 dst_pte = huge_pte_alloc(dst, addr); 1248 dst_pte = huge_pte_alloc(dst, addr, sz);
1225 if (!dst_pte) 1249 if (!dst_pte)
1226 goto nomem; 1250 goto nomem;
1227 1251
@@ -1257,6 +1281,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1257 pte_t pte; 1281 pte_t pte;
1258 struct page *page; 1282 struct page *page;
1259 struct page *tmp; 1283 struct page *tmp;
1284 struct hstate *h = hstate_vma(vma);
1285 unsigned long sz = huge_page_size(h);
1286
1260 /* 1287 /*
1261 * A page gathering list, protected by per file i_mmap_lock. The 1288 * A page gathering list, protected by per file i_mmap_lock. The
1262 * lock is used to avoid list corruption from multiple unmapping 1289 * lock is used to avoid list corruption from multiple unmapping
@@ -1265,11 +1292,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1265 LIST_HEAD(page_list); 1292 LIST_HEAD(page_list);
1266 1293
1267 WARN_ON(!is_vm_hugetlb_page(vma)); 1294 WARN_ON(!is_vm_hugetlb_page(vma));
1268 BUG_ON(start & ~HPAGE_MASK); 1295 BUG_ON(start & ~huge_page_mask(h));
1269 BUG_ON(end & ~HPAGE_MASK); 1296 BUG_ON(end & ~huge_page_mask(h));
1270 1297
1271 spin_lock(&mm->page_table_lock); 1298 spin_lock(&mm->page_table_lock);
1272 for (address = start; address < end; address += HPAGE_SIZE) { 1299 for (address = start; address < end; address += sz) {
1273 ptep = huge_pte_offset(mm, address); 1300 ptep = huge_pte_offset(mm, address);
1274 if (!ptep) 1301 if (!ptep)
1275 continue; 1302 continue;
@@ -1383,6 +1410,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
1383 unsigned long address, pte_t *ptep, pte_t pte, 1410 unsigned long address, pte_t *ptep, pte_t pte,
1384 struct page *pagecache_page) 1411 struct page *pagecache_page)
1385{ 1412{
1413 struct hstate *h = hstate_vma(vma);
1386 struct page *old_page, *new_page; 1414 struct page *old_page, *new_page;
1387 int avoidcopy; 1415 int avoidcopy;
1388 int outside_reserve = 0; 1416 int outside_reserve = 0;
@@ -1443,7 +1471,7 @@ retry_avoidcopy:
1443 __SetPageUptodate(new_page); 1471 __SetPageUptodate(new_page);
1444 spin_lock(&mm->page_table_lock); 1472 spin_lock(&mm->page_table_lock);
1445 1473
1446 ptep = huge_pte_offset(mm, address & HPAGE_MASK); 1474 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
1447 if (likely(pte_same(huge_ptep_get(ptep), pte))) { 1475 if (likely(pte_same(huge_ptep_get(ptep), pte))) {
1448 /* Break COW */ 1476 /* Break COW */
1449 huge_ptep_clear_flush(vma, address, ptep); 1477 huge_ptep_clear_flush(vma, address, ptep);
@@ -1458,14 +1486,14 @@ retry_avoidcopy:
1458} 1486}
1459 1487
1460/* Return the pagecache page at a given address within a VMA */ 1488/* Return the pagecache page at a given address within a VMA */
1461static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma, 1489static struct page *hugetlbfs_pagecache_page(struct hstate *h,
1462 unsigned long address) 1490 struct vm_area_struct *vma, unsigned long address)
1463{ 1491{
1464 struct address_space *mapping; 1492 struct address_space *mapping;
1465 pgoff_t idx; 1493 pgoff_t idx;
1466 1494
1467 mapping = vma->vm_file->f_mapping; 1495 mapping = vma->vm_file->f_mapping;
1468 idx = vma_hugecache_offset(vma, address); 1496 idx = vma_hugecache_offset(h, vma, address);
1469 1497
1470 return find_lock_page(mapping, idx); 1498 return find_lock_page(mapping, idx);
1471} 1499}
@@ -1473,6 +1501,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
1473static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 1501static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1474 unsigned long address, pte_t *ptep, int write_access) 1502 unsigned long address, pte_t *ptep, int write_access)
1475{ 1503{
1504 struct hstate *h = hstate_vma(vma);
1476 int ret = VM_FAULT_SIGBUS; 1505 int ret = VM_FAULT_SIGBUS;
1477 pgoff_t idx; 1506 pgoff_t idx;
1478 unsigned long size; 1507 unsigned long size;
@@ -1493,7 +1522,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1493 } 1522 }
1494 1523
1495 mapping = vma->vm_file->f_mapping; 1524 mapping = vma->vm_file->f_mapping;
1496 idx = vma_hugecache_offset(vma, address); 1525 idx = vma_hugecache_offset(h, vma, address);
1497 1526
1498 /* 1527 /*
1499 * Use page lock to guard against racing truncation 1528 * Use page lock to guard against racing truncation
@@ -1502,7 +1531,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1502retry: 1531retry:
1503 page = find_lock_page(mapping, idx); 1532 page = find_lock_page(mapping, idx);
1504 if (!page) { 1533 if (!page) {
1505 size = i_size_read(mapping->host) >> HPAGE_SHIFT; 1534 size = i_size_read(mapping->host) >> huge_page_shift(h);
1506 if (idx >= size) 1535 if (idx >= size)
1507 goto out; 1536 goto out;
1508 page = alloc_huge_page(vma, address, 0); 1537 page = alloc_huge_page(vma, address, 0);
@@ -1510,7 +1539,7 @@ retry:
1510 ret = -PTR_ERR(page); 1539 ret = -PTR_ERR(page);
1511 goto out; 1540 goto out;
1512 } 1541 }
1513 clear_huge_page(page, address); 1542 clear_huge_page(page, address, huge_page_size(h));
1514 __SetPageUptodate(page); 1543 __SetPageUptodate(page);
1515 1544
1516 if (vma->vm_flags & VM_SHARED) { 1545 if (vma->vm_flags & VM_SHARED) {
@@ -1526,14 +1555,14 @@ retry:
1526 } 1555 }
1527 1556
1528 spin_lock(&inode->i_lock); 1557 spin_lock(&inode->i_lock);
1529 inode->i_blocks += BLOCKS_PER_HUGEPAGE; 1558 inode->i_blocks += blocks_per_huge_page(h);
1530 spin_unlock(&inode->i_lock); 1559 spin_unlock(&inode->i_lock);
1531 } else 1560 } else
1532 lock_page(page); 1561 lock_page(page);
1533 } 1562 }
1534 1563
1535 spin_lock(&mm->page_table_lock); 1564 spin_lock(&mm->page_table_lock);
1536 size = i_size_read(mapping->host) >> HPAGE_SHIFT; 1565 size = i_size_read(mapping->host) >> huge_page_shift(h);
1537 if (idx >= size) 1566 if (idx >= size)
1538 goto backout; 1567 goto backout;
1539 1568
@@ -1569,8 +1598,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1569 pte_t entry; 1598 pte_t entry;
1570 int ret; 1599 int ret;
1571 static DEFINE_MUTEX(hugetlb_instantiation_mutex); 1600 static DEFINE_MUTEX(hugetlb_instantiation_mutex);
1601 struct hstate *h = hstate_vma(vma);
1572 1602
1573 ptep = huge_pte_alloc(mm, address); 1603 ptep = huge_pte_alloc(mm, address, huge_page_size(h));
1574 if (!ptep) 1604 if (!ptep)
1575 return VM_FAULT_OOM; 1605 return VM_FAULT_OOM;
1576 1606
@@ -1594,7 +1624,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1594 if (likely(pte_same(entry, huge_ptep_get(ptep)))) 1624 if (likely(pte_same(entry, huge_ptep_get(ptep))))
1595 if (write_access && !pte_write(entry)) { 1625 if (write_access && !pte_write(entry)) {
1596 struct page *page; 1626 struct page *page;
1597 page = hugetlbfs_pagecache_page(vma, address); 1627 page = hugetlbfs_pagecache_page(h, vma, address);
1598 ret = hugetlb_cow(mm, vma, address, ptep, entry, page); 1628 ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
1599 if (page) { 1629 if (page) {
1600 unlock_page(page); 1630 unlock_page(page);
@@ -1615,6 +1645,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
1615 unsigned long pfn_offset; 1645 unsigned long pfn_offset;
1616 unsigned long vaddr = *position; 1646 unsigned long vaddr = *position;
1617 int remainder = *length; 1647 int remainder = *length;
1648 struct hstate *h = hstate_vma(vma);
1618 1649
1619 spin_lock(&mm->page_table_lock); 1650 spin_lock(&mm->page_table_lock);
1620 while (vaddr < vma->vm_end && remainder) { 1651 while (vaddr < vma->vm_end && remainder) {
@@ -1626,7 +1657,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
1626 * each hugepage. We have to make * sure we get the 1657 * each hugepage. We have to make * sure we get the
1627 * first, for the page indexing below to work. 1658 * first, for the page indexing below to work.
1628 */ 1659 */
1629 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 1660 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
1630 1661
1631 if (!pte || huge_pte_none(huge_ptep_get(pte)) || 1662 if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
1632 (write && !pte_write(huge_ptep_get(pte)))) { 1663 (write && !pte_write(huge_ptep_get(pte)))) {
@@ -1644,7 +1675,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
1644 break; 1675 break;
1645 } 1676 }
1646 1677
1647 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; 1678 pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
1648 page = pte_page(huge_ptep_get(pte)); 1679 page = pte_page(huge_ptep_get(pte));
1649same_page: 1680same_page:
1650 if (pages) { 1681 if (pages) {
@@ -1660,7 +1691,7 @@ same_page:
1660 --remainder; 1691 --remainder;
1661 ++i; 1692 ++i;
1662 if (vaddr < vma->vm_end && remainder && 1693 if (vaddr < vma->vm_end && remainder &&
1663 pfn_offset < HPAGE_SIZE/PAGE_SIZE) { 1694 pfn_offset < pages_per_huge_page(h)) {
1664 /* 1695 /*
1665 * We use pfn_offset to avoid touching the pageframes 1696 * We use pfn_offset to avoid touching the pageframes
1666 * of this compound page. 1697 * of this compound page.
@@ -1682,13 +1713,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
1682 unsigned long start = address; 1713 unsigned long start = address;
1683 pte_t *ptep; 1714 pte_t *ptep;
1684 pte_t pte; 1715 pte_t pte;
1716 struct hstate *h = hstate_vma(vma);
1685 1717
1686 BUG_ON(address >= end); 1718 BUG_ON(address >= end);
1687 flush_cache_range(vma, address, end); 1719 flush_cache_range(vma, address, end);
1688 1720
1689 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); 1721 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
1690 spin_lock(&mm->page_table_lock); 1722 spin_lock(&mm->page_table_lock);
1691 for (; address < end; address += HPAGE_SIZE) { 1723 for (; address < end; address += huge_page_size(h)) {
1692 ptep = huge_pte_offset(mm, address); 1724 ptep = huge_pte_offset(mm, address);
1693 if (!ptep) 1725 if (!ptep)
1694 continue; 1726 continue;
@@ -1711,6 +1743,7 @@ int hugetlb_reserve_pages(struct inode *inode,
1711 struct vm_area_struct *vma) 1743 struct vm_area_struct *vma)
1712{ 1744{
1713 long ret, chg; 1745 long ret, chg;
1746 struct hstate *h = hstate_inode(inode);
1714 1747
1715 if (vma && vma->vm_flags & VM_NORESERVE) 1748 if (vma && vma->vm_flags & VM_NORESERVE)
1716 return 0; 1749 return 0;
@@ -1739,7 +1772,7 @@ int hugetlb_reserve_pages(struct inode *inode,
1739 1772
1740 if (hugetlb_get_quota(inode->i_mapping, chg)) 1773 if (hugetlb_get_quota(inode->i_mapping, chg))
1741 return -ENOSPC; 1774 return -ENOSPC;
1742 ret = hugetlb_acct_memory(chg); 1775 ret = hugetlb_acct_memory(h, chg);
1743 if (ret < 0) { 1776 if (ret < 0) {
1744 hugetlb_put_quota(inode->i_mapping, chg); 1777 hugetlb_put_quota(inode->i_mapping, chg);
1745 return ret; 1778 return ret;
@@ -1751,12 +1784,13 @@ int hugetlb_reserve_pages(struct inode *inode,
1751 1784
1752void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) 1785void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
1753{ 1786{
1787 struct hstate *h = hstate_inode(inode);
1754 long chg = region_truncate(&inode->i_mapping->private_list, offset); 1788 long chg = region_truncate(&inode->i_mapping->private_list, offset);
1755 1789
1756 spin_lock(&inode->i_lock); 1790 spin_lock(&inode->i_lock);
1757 inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed; 1791 inode->i_blocks -= blocks_per_huge_page(h);
1758 spin_unlock(&inode->i_lock); 1792 spin_unlock(&inode->i_lock);
1759 1793
1760 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 1794 hugetlb_put_quota(inode->i_mapping, (chg - freed));
1761 hugetlb_acct_memory(-(chg - freed)); 1795 hugetlb_acct_memory(h, -(chg - freed));
1762} 1796}
diff --git a/mm/memory.c b/mm/memory.c
index 72932489a082..c1c1d6d8c22b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
903 if (unlikely(is_vm_hugetlb_page(vma))) { 903 if (unlikely(is_vm_hugetlb_page(vma))) {
904 unmap_hugepage_range(vma, start, end, NULL); 904 unmap_hugepage_range(vma, start, end, NULL);
905 zap_work -= (end - start) / 905 zap_work -= (end - start) /
906 (HPAGE_SIZE / PAGE_SIZE); 906 pages_per_huge_page(hstate_vma(vma));
907 start = end; 907 start = end;
908 } else 908 } else
909 start = unmap_page_range(*tlbp, vma, 909 start = unmap_page_range(*tlbp, vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c94e58b192c3..e550bec20582 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1481 1481
1482 if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) { 1482 if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
1483 zl = node_zonelist(interleave_nid(*mpol, vma, addr, 1483 zl = node_zonelist(interleave_nid(*mpol, vma, addr,
1484 HPAGE_SHIFT), gfp_flags); 1484 huge_page_shift(hstate_vma(vma))), gfp_flags);
1485 } else { 1485 } else {
1486 zl = policy_zonelist(gfp_flags, *mpol); 1486 zl = policy_zonelist(gfp_flags, *mpol);
1487 if ((*mpol)->mode == MPOL_BIND) 1487 if ((*mpol)->mode == MPOL_BIND)
@@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma,
2220{ 2220{
2221 unsigned long addr; 2221 unsigned long addr;
2222 struct page *page; 2222 struct page *page;
2223 struct hstate *h = hstate_vma(vma);
2224 unsigned long sz = huge_page_size(h);
2223 2225
2224 for (addr = start; addr < end; addr += HPAGE_SIZE) { 2226 for (addr = start; addr < end; addr += sz) {
2225 pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK); 2227 pte_t *ptep = huge_pte_offset(vma->vm_mm,
2228 addr & huge_page_mask(h));
2226 pte_t pte; 2229 pte_t pte;
2227 2230
2228 if (!ptep) 2231 if (!ptep)
diff --git a/mm/mmap.c b/mm/mmap.c
index 57d3b6097deb..5e0cc99e9cd5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1812 struct mempolicy *pol; 1812 struct mempolicy *pol;
1813 struct vm_area_struct *new; 1813 struct vm_area_struct *new;
1814 1814
1815 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) 1815 if (is_vm_hugetlb_page(vma) && (addr &
1816 ~(huge_page_mask(hstate_vma(vma)))))
1816 return -EINVAL; 1817 return -EINVAL;
1817 1818
1818 if (mm->map_count >= sysctl_max_map_count) 1819 if (mm->map_count >= sysctl_max_map_count)