diff options
-rw-r--r-- | arch/ia64/mm/hugetlbpage.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 3 | ||||
-rw-r--r-- | arch/sh/mm/hugetlbpage.c | 3 | ||||
-rw-r--r-- | arch/sparc64/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 52 | ||||
-rw-r--r-- | include/asm-ia64/hugetlb.h | 3 | ||||
-rw-r--r-- | include/asm-powerpc/hugetlb.h | 3 | ||||
-rw-r--r-- | include/asm-s390/hugetlb.h | 3 | ||||
-rw-r--r-- | include/asm-sh/hugetlb.h | 3 | ||||
-rw-r--r-- | include/asm-sparc/hugetlb.h | 3 | ||||
-rw-r--r-- | include/asm-x86/hugetlb.h | 8 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 88 | ||||
-rw-r--r-- | ipc/shm.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 368 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 9 | ||||
-rw-r--r-- | mm/mmap.c | 3 |
19 files changed, 356 insertions, 218 deletions
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index cd49e2860eef..6170f097d255 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -24,7 +24,7 @@ | |||
24 | unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; | 24 | unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; |
25 | 25 | ||
26 | pte_t * | 26 | pte_t * |
27 | huge_pte_alloc (struct mm_struct *mm, unsigned long addr) | 27 | huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
28 | { | 28 | { |
29 | unsigned long taddr = htlbpage_to_page(addr); | 29 | unsigned long taddr = htlbpage_to_page(addr); |
30 | pgd_t *pgd; | 30 | pgd_t *pgd; |
@@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
75 | * Don't actually need to do any preparation, but need to make sure | 75 | * Don't actually need to do any preparation, but need to make sure |
76 | * the address is in the right region. | 76 | * the address is in the right region. |
77 | */ | 77 | */ |
78 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | 78 | int prepare_hugepage_range(struct file *file, |
79 | unsigned long addr, unsigned long len) | ||
79 | { | 80 | { |
80 | if (len & ~HPAGE_MASK) | 81 | if (len & ~HPAGE_MASK) |
81 | return -EINVAL; | 82 | return -EINVAL; |
@@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u | |||
149 | 150 | ||
150 | /* Handle MAP_FIXED */ | 151 | /* Handle MAP_FIXED */ |
151 | if (flags & MAP_FIXED) { | 152 | if (flags & MAP_FIXED) { |
152 | if (prepare_hugepage_range(addr, len)) | 153 | if (prepare_hugepage_range(file, addr, len)) |
153 | return -EINVAL; | 154 | return -EINVAL; |
154 | return addr; | 155 | return addr; |
155 | } | 156 | } |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1a96cc891cf5..c94dc71af989 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
128 | return NULL; | 128 | return NULL; |
129 | } | 129 | } |
130 | 130 | ||
131 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 131 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
132 | unsigned long addr, unsigned long sz) | ||
132 | { | 133 | { |
133 | pgd_t *pg; | 134 | pgd_t *pg; |
134 | pud_t *pu; | 135 | pud_t *pu; |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f4b6124fdb75..9162dc84f77f 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page) | |||
72 | page[1].index = 0; | 72 | page[1].index = 0; |
73 | } | 73 | } |
74 | 74 | ||
75 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 75 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
76 | unsigned long addr, unsigned long sz) | ||
76 | { | 77 | { |
77 | pgd_t *pgdp; | 78 | pgd_t *pgdp; |
78 | pud_t *pudp; | 79 | pud_t *pudp; |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index ae8c321d6e2a..2f9dbe0ef4ac 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
@@ -22,7 +22,8 @@ | |||
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/cacheflush.h> | 23 | #include <asm/cacheflush.h> |
24 | 24 | ||
25 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 25 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
26 | unsigned long addr, unsigned long sz) | ||
26 | { | 27 | { |
27 | pgd_t *pgd; | 28 | pgd_t *pgd; |
28 | pud_t *pud; | 29 | pud_t *pud; |
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index ebefd2a14375..1307b23f6a76 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c | |||
@@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
175 | return -ENOMEM; | 175 | return -ENOMEM; |
176 | 176 | ||
177 | if (flags & MAP_FIXED) { | 177 | if (flags & MAP_FIXED) { |
178 | if (prepare_hugepage_range(addr, len)) | 178 | if (prepare_hugepage_range(file, addr, len)) |
179 | return -EINVAL; | 179 | return -EINVAL; |
180 | return addr; | 180 | return addr; |
181 | } | 181 | } |
@@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
195 | pgoff, flags); | 195 | pgoff, flags); |
196 | } | 196 | } |
197 | 197 | ||
198 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 198 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
199 | unsigned long addr, unsigned long sz) | ||
199 | { | 200 | { |
200 | pgd_t *pgd; | 201 | pgd_t *pgd; |
201 | pud_t *pud; | 202 | pud_t *pud; |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686d..52476fde8996 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
128 | unsigned long addr, unsigned long sz) | ||
128 | { | 129 | { |
129 | pgd_t *pgd; | 130 | pgd_t *pgd; |
130 | pud_t *pud; | 131 | pud_t *pud; |
@@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
368 | return -ENOMEM; | 369 | return -ENOMEM; |
369 | 370 | ||
370 | if (flags & MAP_FIXED) { | 371 | if (flags & MAP_FIXED) { |
371 | if (prepare_hugepage_range(addr, len)) | 372 | if (prepare_hugepage_range(file, addr, len)) |
372 | return -EINVAL; | 373 | return -EINVAL; |
373 | return addr; | 374 | return addr; |
374 | } | 375 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 428eff5b73f3..516c581b5371 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
80 | struct inode *inode = file->f_path.dentry->d_inode; | 80 | struct inode *inode = file->f_path.dentry->d_inode; |
81 | loff_t len, vma_len; | 81 | loff_t len, vma_len; |
82 | int ret; | 82 | int ret; |
83 | struct hstate *h = hstate_file(file); | ||
83 | 84 | ||
84 | /* | 85 | /* |
85 | * vma address alignment (but not the pgoff alignment) has | 86 | * vma address alignment (but not the pgoff alignment) has |
@@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
92 | vma->vm_flags |= VM_HUGETLB | VM_RESERVED; | 93 | vma->vm_flags |= VM_HUGETLB | VM_RESERVED; |
93 | vma->vm_ops = &hugetlb_vm_ops; | 94 | vma->vm_ops = &hugetlb_vm_ops; |
94 | 95 | ||
95 | if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) | 96 | if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) |
96 | return -EINVAL; | 97 | return -EINVAL; |
97 | 98 | ||
98 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); | 99 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); |
@@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
104 | len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 105 | len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
105 | 106 | ||
106 | if (hugetlb_reserve_pages(inode, | 107 | if (hugetlb_reserve_pages(inode, |
107 | vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), | 108 | vma->vm_pgoff >> huge_page_order(h), |
108 | len >> HPAGE_SHIFT, vma)) | 109 | len >> huge_page_shift(h), vma)) |
109 | goto out; | 110 | goto out; |
110 | 111 | ||
111 | ret = 0; | 112 | ret = 0; |
@@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
130 | struct mm_struct *mm = current->mm; | 131 | struct mm_struct *mm = current->mm; |
131 | struct vm_area_struct *vma; | 132 | struct vm_area_struct *vma; |
132 | unsigned long start_addr; | 133 | unsigned long start_addr; |
134 | struct hstate *h = hstate_file(file); | ||
133 | 135 | ||
134 | if (len & ~HPAGE_MASK) | 136 | if (len & ~huge_page_mask(h)) |
135 | return -EINVAL; | 137 | return -EINVAL; |
136 | if (len > TASK_SIZE) | 138 | if (len > TASK_SIZE) |
137 | return -ENOMEM; | 139 | return -ENOMEM; |
138 | 140 | ||
139 | if (flags & MAP_FIXED) { | 141 | if (flags & MAP_FIXED) { |
140 | if (prepare_hugepage_range(addr, len)) | 142 | if (prepare_hugepage_range(file, addr, len)) |
141 | return -EINVAL; | 143 | return -EINVAL; |
142 | return addr; | 144 | return addr; |
143 | } | 145 | } |
144 | 146 | ||
145 | if (addr) { | 147 | if (addr) { |
146 | addr = ALIGN(addr, HPAGE_SIZE); | 148 | addr = ALIGN(addr, huge_page_size(h)); |
147 | vma = find_vma(mm, addr); | 149 | vma = find_vma(mm, addr); |
148 | if (TASK_SIZE - len >= addr && | 150 | if (TASK_SIZE - len >= addr && |
149 | (!vma || addr + len <= vma->vm_start)) | 151 | (!vma || addr + len <= vma->vm_start)) |
@@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
156 | start_addr = TASK_UNMAPPED_BASE; | 158 | start_addr = TASK_UNMAPPED_BASE; |
157 | 159 | ||
158 | full_search: | 160 | full_search: |
159 | addr = ALIGN(start_addr, HPAGE_SIZE); | 161 | addr = ALIGN(start_addr, huge_page_size(h)); |
160 | 162 | ||
161 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 163 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
162 | /* At this point: (!vma || addr < vma->vm_end). */ | 164 | /* At this point: (!vma || addr < vma->vm_end). */ |
@@ -174,7 +176,7 @@ full_search: | |||
174 | 176 | ||
175 | if (!vma || addr + len <= vma->vm_start) | 177 | if (!vma || addr + len <= vma->vm_start) |
176 | return addr; | 178 | return addr; |
177 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 179 | addr = ALIGN(vma->vm_end, huge_page_size(h)); |
178 | } | 180 | } |
179 | } | 181 | } |
180 | #endif | 182 | #endif |
@@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset, | |||
225 | static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, | 227 | static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, |
226 | size_t len, loff_t *ppos) | 228 | size_t len, loff_t *ppos) |
227 | { | 229 | { |
230 | struct hstate *h = hstate_file(filp); | ||
228 | struct address_space *mapping = filp->f_mapping; | 231 | struct address_space *mapping = filp->f_mapping; |
229 | struct inode *inode = mapping->host; | 232 | struct inode *inode = mapping->host; |
230 | unsigned long index = *ppos >> HPAGE_SHIFT; | 233 | unsigned long index = *ppos >> huge_page_shift(h); |
231 | unsigned long offset = *ppos & ~HPAGE_MASK; | 234 | unsigned long offset = *ppos & ~huge_page_mask(h); |
232 | unsigned long end_index; | 235 | unsigned long end_index; |
233 | loff_t isize; | 236 | loff_t isize; |
234 | ssize_t retval = 0; | 237 | ssize_t retval = 0; |
@@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, | |||
243 | if (!isize) | 246 | if (!isize) |
244 | goto out; | 247 | goto out; |
245 | 248 | ||
246 | end_index = (isize - 1) >> HPAGE_SHIFT; | 249 | end_index = (isize - 1) >> huge_page_shift(h); |
247 | for (;;) { | 250 | for (;;) { |
248 | struct page *page; | 251 | struct page *page; |
249 | int nr, ret; | 252 | unsigned long nr, ret; |
250 | 253 | ||
251 | /* nr is the maximum number of bytes to copy from this page */ | 254 | /* nr is the maximum number of bytes to copy from this page */ |
252 | nr = HPAGE_SIZE; | 255 | nr = huge_page_size(h); |
253 | if (index >= end_index) { | 256 | if (index >= end_index) { |
254 | if (index > end_index) | 257 | if (index > end_index) |
255 | goto out; | 258 | goto out; |
256 | nr = ((isize - 1) & ~HPAGE_MASK) + 1; | 259 | nr = ((isize - 1) & ~huge_page_mask(h)) + 1; |
257 | if (nr <= offset) { | 260 | if (nr <= offset) { |
258 | goto out; | 261 | goto out; |
259 | } | 262 | } |
@@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, | |||
287 | offset += ret; | 290 | offset += ret; |
288 | retval += ret; | 291 | retval += ret; |
289 | len -= ret; | 292 | len -= ret; |
290 | index += offset >> HPAGE_SHIFT; | 293 | index += offset >> huge_page_shift(h); |
291 | offset &= ~HPAGE_MASK; | 294 | offset &= ~huge_page_mask(h); |
292 | 295 | ||
293 | if (page) | 296 | if (page) |
294 | page_cache_release(page); | 297 | page_cache_release(page); |
@@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, | |||
298 | break; | 301 | break; |
299 | } | 302 | } |
300 | out: | 303 | out: |
301 | *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; | 304 | *ppos = ((loff_t)index << huge_page_shift(h)) + offset; |
302 | mutex_unlock(&inode->i_mutex); | 305 | mutex_unlock(&inode->i_mutex); |
303 | return retval; | 306 | return retval; |
304 | } | 307 | } |
@@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page) | |||
339 | 342 | ||
340 | static void truncate_hugepages(struct inode *inode, loff_t lstart) | 343 | static void truncate_hugepages(struct inode *inode, loff_t lstart) |
341 | { | 344 | { |
345 | struct hstate *h = hstate_inode(inode); | ||
342 | struct address_space *mapping = &inode->i_data; | 346 | struct address_space *mapping = &inode->i_data; |
343 | const pgoff_t start = lstart >> HPAGE_SHIFT; | 347 | const pgoff_t start = lstart >> huge_page_shift(h); |
344 | struct pagevec pvec; | 348 | struct pagevec pvec; |
345 | pgoff_t next; | 349 | pgoff_t next; |
346 | int i, freed = 0; | 350 | int i, freed = 0; |
@@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
449 | { | 453 | { |
450 | pgoff_t pgoff; | 454 | pgoff_t pgoff; |
451 | struct address_space *mapping = inode->i_mapping; | 455 | struct address_space *mapping = inode->i_mapping; |
456 | struct hstate *h = hstate_inode(inode); | ||
452 | 457 | ||
453 | BUG_ON(offset & ~HPAGE_MASK); | 458 | BUG_ON(offset & ~huge_page_mask(h)); |
454 | pgoff = offset >> PAGE_SHIFT; | 459 | pgoff = offset >> PAGE_SHIFT; |
455 | 460 | ||
456 | i_size_write(inode, offset); | 461 | i_size_write(inode, offset); |
@@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
465 | static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) | 470 | static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) |
466 | { | 471 | { |
467 | struct inode *inode = dentry->d_inode; | 472 | struct inode *inode = dentry->d_inode; |
473 | struct hstate *h = hstate_inode(inode); | ||
468 | int error; | 474 | int error; |
469 | unsigned int ia_valid = attr->ia_valid; | 475 | unsigned int ia_valid = attr->ia_valid; |
470 | 476 | ||
@@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
476 | 482 | ||
477 | if (ia_valid & ATTR_SIZE) { | 483 | if (ia_valid & ATTR_SIZE) { |
478 | error = -EINVAL; | 484 | error = -EINVAL; |
479 | if (!(attr->ia_size & ~HPAGE_MASK)) | 485 | if (!(attr->ia_size & ~huge_page_mask(h))) |
480 | error = hugetlb_vmtruncate(inode, attr->ia_size); | 486 | error = hugetlb_vmtruncate(inode, attr->ia_size); |
481 | if (error) | 487 | if (error) |
482 | goto out; | 488 | goto out; |
@@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
610 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 616 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
611 | { | 617 | { |
612 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 618 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); |
619 | struct hstate *h = hstate_inode(dentry->d_inode); | ||
613 | 620 | ||
614 | buf->f_type = HUGETLBFS_MAGIC; | 621 | buf->f_type = HUGETLBFS_MAGIC; |
615 | buf->f_bsize = HPAGE_SIZE; | 622 | buf->f_bsize = huge_page_size(h); |
616 | if (sbinfo) { | 623 | if (sbinfo) { |
617 | spin_lock(&sbinfo->stat_lock); | 624 | spin_lock(&sbinfo->stat_lock); |
618 | /* If no limits set, just report 0 for max/free/used | 625 | /* If no limits set, just report 0 for max/free/used |
@@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size) | |||
942 | goto out_dentry; | 949 | goto out_dentry; |
943 | 950 | ||
944 | error = -ENOMEM; | 951 | error = -ENOMEM; |
945 | if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL)) | 952 | if (hugetlb_reserve_pages(inode, 0, |
953 | size >> huge_page_shift(hstate_inode(inode)), NULL)) | ||
946 | goto out_inode; | 954 | goto out_inode; |
947 | 955 | ||
948 | d_instantiate(dentry, inode); | 956 | d_instantiate(dentry, inode); |
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h index e9d1e5e2382d..da55c63728e0 100644 --- a/include/asm-ia64/hugetlb.h +++ b/include/asm-ia64/hugetlb.h | |||
@@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, | |||
8 | unsigned long end, unsigned long floor, | 8 | unsigned long end, unsigned long floor, |
9 | unsigned long ceiling); | 9 | unsigned long ceiling); |
10 | 10 | ||
11 | int prepare_hugepage_range(unsigned long addr, unsigned long len); | 11 | int prepare_hugepage_range(struct file *file, |
12 | unsigned long addr, unsigned long len); | ||
12 | 13 | ||
13 | static inline int is_hugepage_only_range(struct mm_struct *mm, | 14 | static inline int is_hugepage_only_range(struct mm_struct *mm, |
14 | unsigned long addr, | 15 | unsigned long addr, |
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h index 0a37aa5ecaa5..ca37c4af27b1 100644 --- a/include/asm-powerpc/hugetlb.h +++ b/include/asm-powerpc/hugetlb.h | |||
@@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
21 | * If the arch doesn't supply something else, assume that hugepage | 21 | * If the arch doesn't supply something else, assume that hugepage |
22 | * size aligned regions are ok without further preparation. | 22 | * size aligned regions are ok without further preparation. |
23 | */ | 23 | */ |
24 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | 24 | static inline int prepare_hugepage_range(struct file *file, |
25 | unsigned long addr, unsigned long len) | ||
25 | { | 26 | { |
26 | if (len & ~HPAGE_MASK) | 27 | if (len & ~HPAGE_MASK) |
27 | return -EINVAL; | 28 | return -EINVAL; |
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h index 600a776f8f75..670a1d1745d2 100644 --- a/include/asm-s390/hugetlb.h +++ b/include/asm-s390/hugetlb.h | |||
@@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
22 | * If the arch doesn't supply something else, assume that hugepage | 22 | * If the arch doesn't supply something else, assume that hugepage |
23 | * size aligned regions are ok without further preparation. | 23 | * size aligned regions are ok without further preparation. |
24 | */ | 24 | */ |
25 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | 25 | static inline int prepare_hugepage_range(struct file *file, |
26 | unsigned long addr, unsigned long len) | ||
26 | { | 27 | { |
27 | if (len & ~HPAGE_MASK) | 28 | if (len & ~HPAGE_MASK) |
28 | return -EINVAL; | 29 | return -EINVAL; |
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h index fb30018938c7..967068fb79ac 100644 --- a/include/asm-sh/hugetlb.h +++ b/include/asm-sh/hugetlb.h | |||
@@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, | |||
14 | * If the arch doesn't supply something else, assume that hugepage | 14 | * If the arch doesn't supply something else, assume that hugepage |
15 | * size aligned regions are ok without further preparation. | 15 | * size aligned regions are ok without further preparation. |
16 | */ | 16 | */ |
17 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | 17 | static inline int prepare_hugepage_range(struct file *file, |
18 | unsigned long addr, unsigned long len) | ||
18 | { | 19 | { |
19 | if (len & ~HPAGE_MASK) | 20 | if (len & ~HPAGE_MASK) |
20 | return -EINVAL; | 21 | return -EINVAL; |
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h index aeb92374ca3d..177061064ee6 100644 --- a/include/asm-sparc/hugetlb.h +++ b/include/asm-sparc/hugetlb.h | |||
@@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, | |||
22 | * If the arch doesn't supply something else, assume that hugepage | 22 | * If the arch doesn't supply something else, assume that hugepage |
23 | * size aligned regions are ok without further preparation. | 23 | * size aligned regions are ok without further preparation. |
24 | */ | 24 | */ |
25 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | 25 | static inline int prepare_hugepage_range(struct file *file, |
26 | unsigned long addr, unsigned long len) | ||
26 | { | 27 | { |
27 | if (len & ~HPAGE_MASK) | 28 | if (len & ~HPAGE_MASK) |
28 | return -EINVAL; | 29 | return -EINVAL; |
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 7eed6e0883bf..439a9acc132d 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h | |||
@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, | |||
14 | * If the arch doesn't supply something else, assume that hugepage | 14 | * If the arch doesn't supply something else, assume that hugepage |
15 | * size aligned regions are ok without further preparation. | 15 | * size aligned regions are ok without further preparation. |
16 | */ | 16 | */ |
17 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | 17 | static inline int prepare_hugepage_range(struct file *file, |
18 | unsigned long addr, unsigned long len) | ||
18 | { | 19 | { |
19 | if (len & ~HPAGE_MASK) | 20 | struct hstate *h = hstate_file(file); |
21 | if (len & ~huge_page_mask(h)) | ||
20 | return -EINVAL; | 22 | return -EINVAL; |
21 | if (addr & ~HPAGE_MASK) | 23 | if (addr & ~huge_page_mask(h)) |
22 | return -EINVAL; | 24 | return -EINVAL; |
23 | return 0; | 25 | return 0; |
24 | } | 26 | } |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index abbc187193a1..ad2271e11f9b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/mempolicy.h> | 8 | #include <linux/mempolicy.h> |
9 | #include <linux/shm.h> | 9 | #include <linux/shm.h> |
10 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
11 | #include <asm/hugetlb.h> | ||
12 | 11 | ||
13 | struct ctl_table; | 12 | struct ctl_table; |
14 | 13 | ||
@@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group; | |||
45 | 44 | ||
46 | /* arch callbacks */ | 45 | /* arch callbacks */ |
47 | 46 | ||
48 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); | 47 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
48 | unsigned long addr, unsigned long sz); | ||
49 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); | 49 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); |
50 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); | 50 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); |
51 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | 51 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, |
@@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void) | |||
80 | #define hugetlb_report_meminfo(buf) 0 | 80 | #define hugetlb_report_meminfo(buf) 0 |
81 | #define hugetlb_report_node_meminfo(n, buf) 0 | 81 | #define hugetlb_report_node_meminfo(n, buf) 0 |
82 | #define follow_huge_pmd(mm, addr, pmd, write) NULL | 82 | #define follow_huge_pmd(mm, addr, pmd, write) NULL |
83 | #define prepare_hugepage_range(addr,len) (-EINVAL) | 83 | #define prepare_hugepage_range(file, addr, len) (-EINVAL) |
84 | #define pmd_huge(x) 0 | 84 | #define pmd_huge(x) 0 |
85 | #define is_hugepage_only_range(mm, addr, len) 0 | 85 | #define is_hugepage_only_range(mm, addr, len) 0 |
86 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) | 86 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
@@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t); | |||
134 | int hugetlb_get_quota(struct address_space *mapping, long delta); | 134 | int hugetlb_get_quota(struct address_space *mapping, long delta); |
135 | void hugetlb_put_quota(struct address_space *mapping, long delta); | 135 | void hugetlb_put_quota(struct address_space *mapping, long delta); |
136 | 136 | ||
137 | #define BLOCKS_PER_HUGEPAGE (HPAGE_SIZE / 512) | ||
138 | |||
139 | static inline int is_file_hugepages(struct file *file) | 137 | static inline int is_file_hugepages(struct file *file) |
140 | { | 138 | { |
141 | if (file->f_op == &hugetlbfs_file_operations) | 139 | if (file->f_op == &hugetlbfs_file_operations) |
@@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
164 | unsigned long flags); | 162 | unsigned long flags); |
165 | #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ | 163 | #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ |
166 | 164 | ||
165 | #ifdef CONFIG_HUGETLB_PAGE | ||
166 | |||
167 | /* Defines one hugetlb page size */ | ||
168 | struct hstate { | ||
169 | int hugetlb_next_nid; | ||
170 | unsigned int order; | ||
171 | unsigned long mask; | ||
172 | unsigned long max_huge_pages; | ||
173 | unsigned long nr_huge_pages; | ||
174 | unsigned long free_huge_pages; | ||
175 | unsigned long resv_huge_pages; | ||
176 | unsigned long surplus_huge_pages; | ||
177 | unsigned long nr_overcommit_huge_pages; | ||
178 | struct list_head hugepage_freelists[MAX_NUMNODES]; | ||
179 | unsigned int nr_huge_pages_node[MAX_NUMNODES]; | ||
180 | unsigned int free_huge_pages_node[MAX_NUMNODES]; | ||
181 | unsigned int surplus_huge_pages_node[MAX_NUMNODES]; | ||
182 | }; | ||
183 | |||
184 | extern struct hstate default_hstate; | ||
185 | |||
186 | static inline struct hstate *hstate_vma(struct vm_area_struct *vma) | ||
187 | { | ||
188 | return &default_hstate; | ||
189 | } | ||
190 | |||
191 | static inline struct hstate *hstate_file(struct file *f) | ||
192 | { | ||
193 | return &default_hstate; | ||
194 | } | ||
195 | |||
196 | static inline struct hstate *hstate_inode(struct inode *i) | ||
197 | { | ||
198 | return &default_hstate; | ||
199 | } | ||
200 | |||
201 | static inline unsigned long huge_page_size(struct hstate *h) | ||
202 | { | ||
203 | return (unsigned long)PAGE_SIZE << h->order; | ||
204 | } | ||
205 | |||
206 | static inline unsigned long huge_page_mask(struct hstate *h) | ||
207 | { | ||
208 | return h->mask; | ||
209 | } | ||
210 | |||
211 | static inline unsigned int huge_page_order(struct hstate *h) | ||
212 | { | ||
213 | return h->order; | ||
214 | } | ||
215 | |||
216 | static inline unsigned huge_page_shift(struct hstate *h) | ||
217 | { | ||
218 | return h->order + PAGE_SHIFT; | ||
219 | } | ||
220 | |||
221 | static inline unsigned int pages_per_huge_page(struct hstate *h) | ||
222 | { | ||
223 | return 1 << h->order; | ||
224 | } | ||
225 | |||
226 | static inline unsigned int blocks_per_huge_page(struct hstate *h) | ||
227 | { | ||
228 | return huge_page_size(h) / 512; | ||
229 | } | ||
230 | |||
231 | #include <asm/hugetlb.h> | ||
232 | |||
233 | #else | ||
234 | struct hstate {}; | ||
235 | #define hstate_file(f) NULL | ||
236 | #define hstate_vma(v) NULL | ||
237 | #define hstate_inode(i) NULL | ||
238 | #define huge_page_size(h) PAGE_SIZE | ||
239 | #define huge_page_mask(h) PAGE_MASK | ||
240 | #define huge_page_order(h) 0 | ||
241 | #define huge_page_shift(h) PAGE_SHIFT | ||
242 | #define pages_per_huge_page(h) 1 | ||
243 | #endif | ||
244 | |||
167 | #endif /* _LINUX_HUGETLB_H */ | 245 | #endif /* _LINUX_HUGETLB_H */ |
@@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | |||
577 | 577 | ||
578 | if (is_file_hugepages(shp->shm_file)) { | 578 | if (is_file_hugepages(shp->shm_file)) { |
579 | struct address_space *mapping = inode->i_mapping; | 579 | struct address_space *mapping = inode->i_mapping; |
580 | *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; | 580 | struct hstate *h = hstate_file(shp->shm_file); |
581 | *rss += pages_per_huge_page(h) * mapping->nrpages; | ||
581 | } else { | 582 | } else { |
582 | struct shmem_inode_info *info = SHMEM_I(inode); | 583 | struct shmem_inode_info *info = SHMEM_I(inode); |
583 | spin_lock(&info->lock); | 584 | spin_lock(&info->lock); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 32dff4290c66..0d8153e25f09 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -22,18 +22,12 @@ | |||
22 | #include "internal.h" | 22 | #include "internal.h" |
23 | 23 | ||
24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
25 | static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; | ||
26 | static unsigned long surplus_huge_pages; | ||
27 | static unsigned long nr_overcommit_huge_pages; | ||
28 | unsigned long max_huge_pages; | 25 | unsigned long max_huge_pages; |
29 | unsigned long sysctl_overcommit_huge_pages; | 26 | unsigned long sysctl_overcommit_huge_pages; |
30 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | ||
31 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | ||
32 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | ||
33 | static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; | ||
34 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | 27 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; |
35 | unsigned long hugepages_treat_as_movable; | 28 | unsigned long hugepages_treat_as_movable; |
36 | static int hugetlb_next_nid; | 29 | |
30 | struct hstate default_hstate; | ||
37 | 31 | ||
38 | /* | 32 | /* |
39 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 33 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
@@ -203,11 +197,11 @@ static long region_count(struct list_head *head, long f, long t) | |||
203 | * Convert the address within this vma to the page offset within | 197 | * Convert the address within this vma to the page offset within |
204 | * the mapping, in pagecache page units; huge pages here. | 198 | * the mapping, in pagecache page units; huge pages here. |
205 | */ | 199 | */ |
206 | static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma, | 200 | static pgoff_t vma_hugecache_offset(struct hstate *h, |
207 | unsigned long address) | 201 | struct vm_area_struct *vma, unsigned long address) |
208 | { | 202 | { |
209 | return ((address - vma->vm_start) >> HPAGE_SHIFT) + | 203 | return ((address - vma->vm_start) >> huge_page_shift(h)) + |
210 | (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | 204 | (vma->vm_pgoff >> huge_page_order(h)); |
211 | } | 205 | } |
212 | 206 | ||
213 | /* | 207 | /* |
@@ -309,20 +303,21 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) | |||
309 | } | 303 | } |
310 | 304 | ||
311 | /* Decrement the reserved pages in the hugepage pool by one */ | 305 | /* Decrement the reserved pages in the hugepage pool by one */ |
312 | static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) | 306 | static void decrement_hugepage_resv_vma(struct hstate *h, |
307 | struct vm_area_struct *vma) | ||
313 | { | 308 | { |
314 | if (vma->vm_flags & VM_NORESERVE) | 309 | if (vma->vm_flags & VM_NORESERVE) |
315 | return; | 310 | return; |
316 | 311 | ||
317 | if (vma->vm_flags & VM_SHARED) { | 312 | if (vma->vm_flags & VM_SHARED) { |
318 | /* Shared mappings always use reserves */ | 313 | /* Shared mappings always use reserves */ |
319 | resv_huge_pages--; | 314 | h->resv_huge_pages--; |
320 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | 315 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { |
321 | /* | 316 | /* |
322 | * Only the process that called mmap() has reserves for | 317 | * Only the process that called mmap() has reserves for |
323 | * private mappings. | 318 | * private mappings. |
324 | */ | 319 | */ |
325 | resv_huge_pages--; | 320 | h->resv_huge_pages--; |
326 | } | 321 | } |
327 | } | 322 | } |
328 | 323 | ||
@@ -344,12 +339,13 @@ static int vma_has_private_reserves(struct vm_area_struct *vma) | |||
344 | return 1; | 339 | return 1; |
345 | } | 340 | } |
346 | 341 | ||
347 | static void clear_huge_page(struct page *page, unsigned long addr) | 342 | static void clear_huge_page(struct page *page, |
343 | unsigned long addr, unsigned long sz) | ||
348 | { | 344 | { |
349 | int i; | 345 | int i; |
350 | 346 | ||
351 | might_sleep(); | 347 | might_sleep(); |
352 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { | 348 | for (i = 0; i < sz/PAGE_SIZE; i++) { |
353 | cond_resched(); | 349 | cond_resched(); |
354 | clear_user_highpage(page + i, addr + i * PAGE_SIZE); | 350 | clear_user_highpage(page + i, addr + i * PAGE_SIZE); |
355 | } | 351 | } |
@@ -359,41 +355,43 @@ static void copy_huge_page(struct page *dst, struct page *src, | |||
359 | unsigned long addr, struct vm_area_struct *vma) | 355 | unsigned long addr, struct vm_area_struct *vma) |
360 | { | 356 | { |
361 | int i; | 357 | int i; |
358 | struct hstate *h = hstate_vma(vma); | ||
362 | 359 | ||
363 | might_sleep(); | 360 | might_sleep(); |
364 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { | 361 | for (i = 0; i < pages_per_huge_page(h); i++) { |
365 | cond_resched(); | 362 | cond_resched(); |
366 | copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); | 363 | copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); |
367 | } | 364 | } |
368 | } | 365 | } |
369 | 366 | ||
370 | static void enqueue_huge_page(struct page *page) | 367 | static void enqueue_huge_page(struct hstate *h, struct page *page) |
371 | { | 368 | { |
372 | int nid = page_to_nid(page); | 369 | int nid = page_to_nid(page); |
373 | list_add(&page->lru, &hugepage_freelists[nid]); | 370 | list_add(&page->lru, &h->hugepage_freelists[nid]); |
374 | free_huge_pages++; | 371 | h->free_huge_pages++; |
375 | free_huge_pages_node[nid]++; | 372 | h->free_huge_pages_node[nid]++; |
376 | } | 373 | } |
377 | 374 | ||
378 | static struct page *dequeue_huge_page(void) | 375 | static struct page *dequeue_huge_page(struct hstate *h) |
379 | { | 376 | { |
380 | int nid; | 377 | int nid; |
381 | struct page *page = NULL; | 378 | struct page *page = NULL; |
382 | 379 | ||
383 | for (nid = 0; nid < MAX_NUMNODES; ++nid) { | 380 | for (nid = 0; nid < MAX_NUMNODES; ++nid) { |
384 | if (!list_empty(&hugepage_freelists[nid])) { | 381 | if (!list_empty(&h->hugepage_freelists[nid])) { |
385 | page = list_entry(hugepage_freelists[nid].next, | 382 | page = list_entry(h->hugepage_freelists[nid].next, |
386 | struct page, lru); | 383 | struct page, lru); |
387 | list_del(&page->lru); | 384 | list_del(&page->lru); |
388 | free_huge_pages--; | 385 | h->free_huge_pages--; |
389 | free_huge_pages_node[nid]--; | 386 | h->free_huge_pages_node[nid]--; |
390 | break; | 387 | break; |
391 | } | 388 | } |
392 | } | 389 | } |
393 | return page; | 390 | return page; |
394 | } | 391 | } |
395 | 392 | ||
396 | static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | 393 | static struct page *dequeue_huge_page_vma(struct hstate *h, |
394 | struct vm_area_struct *vma, | ||
397 | unsigned long address, int avoid_reserve) | 395 | unsigned long address, int avoid_reserve) |
398 | { | 396 | { |
399 | int nid; | 397 | int nid; |
@@ -411,26 +409,26 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
411 | * not "stolen". The child may still get SIGKILLed | 409 | * not "stolen". The child may still get SIGKILLed |
412 | */ | 410 | */ |
413 | if (!vma_has_private_reserves(vma) && | 411 | if (!vma_has_private_reserves(vma) && |
414 | free_huge_pages - resv_huge_pages == 0) | 412 | h->free_huge_pages - h->resv_huge_pages == 0) |
415 | return NULL; | 413 | return NULL; |
416 | 414 | ||
417 | /* If reserves cannot be used, ensure enough pages are in the pool */ | 415 | /* If reserves cannot be used, ensure enough pages are in the pool */ |
418 | if (avoid_reserve && free_huge_pages - resv_huge_pages == 0) | 416 | if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) |
419 | return NULL; | 417 | return NULL; |
420 | 418 | ||
421 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 419 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
422 | MAX_NR_ZONES - 1, nodemask) { | 420 | MAX_NR_ZONES - 1, nodemask) { |
423 | nid = zone_to_nid(zone); | 421 | nid = zone_to_nid(zone); |
424 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && | 422 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && |
425 | !list_empty(&hugepage_freelists[nid])) { | 423 | !list_empty(&h->hugepage_freelists[nid])) { |
426 | page = list_entry(hugepage_freelists[nid].next, | 424 | page = list_entry(h->hugepage_freelists[nid].next, |
427 | struct page, lru); | 425 | struct page, lru); |
428 | list_del(&page->lru); | 426 | list_del(&page->lru); |
429 | free_huge_pages--; | 427 | h->free_huge_pages--; |
430 | free_huge_pages_node[nid]--; | 428 | h->free_huge_pages_node[nid]--; |
431 | 429 | ||
432 | if (!avoid_reserve) | 430 | if (!avoid_reserve) |
433 | decrement_hugepage_resv_vma(vma); | 431 | decrement_hugepage_resv_vma(h, vma); |
434 | 432 | ||
435 | break; | 433 | break; |
436 | } | 434 | } |
@@ -439,12 +437,13 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
439 | return page; | 437 | return page; |
440 | } | 438 | } |
441 | 439 | ||
442 | static void update_and_free_page(struct page *page) | 440 | static void update_and_free_page(struct hstate *h, struct page *page) |
443 | { | 441 | { |
444 | int i; | 442 | int i; |
445 | nr_huge_pages--; | 443 | |
446 | nr_huge_pages_node[page_to_nid(page)]--; | 444 | h->nr_huge_pages--; |
447 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { | 445 | h->nr_huge_pages_node[page_to_nid(page)]--; |
446 | for (i = 0; i < pages_per_huge_page(h); i++) { | ||
448 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | 447 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | |
449 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | 448 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | |
450 | 1 << PG_private | 1<< PG_writeback); | 449 | 1 << PG_private | 1<< PG_writeback); |
@@ -452,11 +451,16 @@ static void update_and_free_page(struct page *page) | |||
452 | set_compound_page_dtor(page, NULL); | 451 | set_compound_page_dtor(page, NULL); |
453 | set_page_refcounted(page); | 452 | set_page_refcounted(page); |
454 | arch_release_hugepage(page); | 453 | arch_release_hugepage(page); |
455 | __free_pages(page, HUGETLB_PAGE_ORDER); | 454 | __free_pages(page, huge_page_order(h)); |
456 | } | 455 | } |
457 | 456 | ||
458 | static void free_huge_page(struct page *page) | 457 | static void free_huge_page(struct page *page) |
459 | { | 458 | { |
459 | /* | ||
460 | * Can't pass hstate in here because it is called from the | ||
461 | * compound page destructor. | ||
462 | */ | ||
463 | struct hstate *h = &default_hstate; | ||
460 | int nid = page_to_nid(page); | 464 | int nid = page_to_nid(page); |
461 | struct address_space *mapping; | 465 | struct address_space *mapping; |
462 | 466 | ||
@@ -466,12 +470,12 @@ static void free_huge_page(struct page *page) | |||
466 | INIT_LIST_HEAD(&page->lru); | 470 | INIT_LIST_HEAD(&page->lru); |
467 | 471 | ||
468 | spin_lock(&hugetlb_lock); | 472 | spin_lock(&hugetlb_lock); |
469 | if (surplus_huge_pages_node[nid]) { | 473 | if (h->surplus_huge_pages_node[nid]) { |
470 | update_and_free_page(page); | 474 | update_and_free_page(h, page); |
471 | surplus_huge_pages--; | 475 | h->surplus_huge_pages--; |
472 | surplus_huge_pages_node[nid]--; | 476 | h->surplus_huge_pages_node[nid]--; |
473 | } else { | 477 | } else { |
474 | enqueue_huge_page(page); | 478 | enqueue_huge_page(h, page); |
475 | } | 479 | } |
476 | spin_unlock(&hugetlb_lock); | 480 | spin_unlock(&hugetlb_lock); |
477 | if (mapping) | 481 | if (mapping) |
@@ -483,7 +487,7 @@ static void free_huge_page(struct page *page) | |||
483 | * balanced by operating on them in a round-robin fashion. | 487 | * balanced by operating on them in a round-robin fashion. |
484 | * Returns 1 if an adjustment was made. | 488 | * Returns 1 if an adjustment was made. |
485 | */ | 489 | */ |
486 | static int adjust_pool_surplus(int delta) | 490 | static int adjust_pool_surplus(struct hstate *h, int delta) |
487 | { | 491 | { |
488 | static int prev_nid; | 492 | static int prev_nid; |
489 | int nid = prev_nid; | 493 | int nid = prev_nid; |
@@ -496,15 +500,15 @@ static int adjust_pool_surplus(int delta) | |||
496 | nid = first_node(node_online_map); | 500 | nid = first_node(node_online_map); |
497 | 501 | ||
498 | /* To shrink on this node, there must be a surplus page */ | 502 | /* To shrink on this node, there must be a surplus page */ |
499 | if (delta < 0 && !surplus_huge_pages_node[nid]) | 503 | if (delta < 0 && !h->surplus_huge_pages_node[nid]) |
500 | continue; | 504 | continue; |
501 | /* Surplus cannot exceed the total number of pages */ | 505 | /* Surplus cannot exceed the total number of pages */ |
502 | if (delta > 0 && surplus_huge_pages_node[nid] >= | 506 | if (delta > 0 && h->surplus_huge_pages_node[nid] >= |
503 | nr_huge_pages_node[nid]) | 507 | h->nr_huge_pages_node[nid]) |
504 | continue; | 508 | continue; |
505 | 509 | ||
506 | surplus_huge_pages += delta; | 510 | h->surplus_huge_pages += delta; |
507 | surplus_huge_pages_node[nid] += delta; | 511 | h->surplus_huge_pages_node[nid] += delta; |
508 | ret = 1; | 512 | ret = 1; |
509 | break; | 513 | break; |
510 | } while (nid != prev_nid); | 514 | } while (nid != prev_nid); |
@@ -513,46 +517,46 @@ static int adjust_pool_surplus(int delta) | |||
513 | return ret; | 517 | return ret; |
514 | } | 518 | } |
515 | 519 | ||
516 | static void prep_new_huge_page(struct page *page, int nid) | 520 | static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) |
517 | { | 521 | { |
518 | set_compound_page_dtor(page, free_huge_page); | 522 | set_compound_page_dtor(page, free_huge_page); |
519 | spin_lock(&hugetlb_lock); | 523 | spin_lock(&hugetlb_lock); |
520 | nr_huge_pages++; | 524 | h->nr_huge_pages++; |
521 | nr_huge_pages_node[nid]++; | 525 | h->nr_huge_pages_node[nid]++; |
522 | spin_unlock(&hugetlb_lock); | 526 | spin_unlock(&hugetlb_lock); |
523 | put_page(page); /* free it into the hugepage allocator */ | 527 | put_page(page); /* free it into the hugepage allocator */ |
524 | } | 528 | } |
525 | 529 | ||
526 | static struct page *alloc_fresh_huge_page_node(int nid) | 530 | static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) |
527 | { | 531 | { |
528 | struct page *page; | 532 | struct page *page; |
529 | 533 | ||
530 | page = alloc_pages_node(nid, | 534 | page = alloc_pages_node(nid, |
531 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | 535 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| |
532 | __GFP_REPEAT|__GFP_NOWARN, | 536 | __GFP_REPEAT|__GFP_NOWARN, |
533 | HUGETLB_PAGE_ORDER); | 537 | huge_page_order(h)); |
534 | if (page) { | 538 | if (page) { |
535 | if (arch_prepare_hugepage(page)) { | 539 | if (arch_prepare_hugepage(page)) { |
536 | __free_pages(page, HUGETLB_PAGE_ORDER); | 540 | __free_pages(page, HUGETLB_PAGE_ORDER); |
537 | return NULL; | 541 | return NULL; |
538 | } | 542 | } |
539 | prep_new_huge_page(page, nid); | 543 | prep_new_huge_page(h, page, nid); |
540 | } | 544 | } |
541 | 545 | ||
542 | return page; | 546 | return page; |
543 | } | 547 | } |
544 | 548 | ||
545 | static int alloc_fresh_huge_page(void) | 549 | static int alloc_fresh_huge_page(struct hstate *h) |
546 | { | 550 | { |
547 | struct page *page; | 551 | struct page *page; |
548 | int start_nid; | 552 | int start_nid; |
549 | int next_nid; | 553 | int next_nid; |
550 | int ret = 0; | 554 | int ret = 0; |
551 | 555 | ||
552 | start_nid = hugetlb_next_nid; | 556 | start_nid = h->hugetlb_next_nid; |
553 | 557 | ||
554 | do { | 558 | do { |
555 | page = alloc_fresh_huge_page_node(hugetlb_next_nid); | 559 | page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid); |
556 | if (page) | 560 | if (page) |
557 | ret = 1; | 561 | ret = 1; |
558 | /* | 562 | /* |
@@ -566,11 +570,11 @@ static int alloc_fresh_huge_page(void) | |||
566 | * if we just successfully allocated a hugepage so that | 570 | * if we just successfully allocated a hugepage so that |
567 | * the next caller gets hugepages on the next node. | 571 | * the next caller gets hugepages on the next node. |
568 | */ | 572 | */ |
569 | next_nid = next_node(hugetlb_next_nid, node_online_map); | 573 | next_nid = next_node(h->hugetlb_next_nid, node_online_map); |
570 | if (next_nid == MAX_NUMNODES) | 574 | if (next_nid == MAX_NUMNODES) |
571 | next_nid = first_node(node_online_map); | 575 | next_nid = first_node(node_online_map); |
572 | hugetlb_next_nid = next_nid; | 576 | h->hugetlb_next_nid = next_nid; |
573 | } while (!page && hugetlb_next_nid != start_nid); | 577 | } while (!page && h->hugetlb_next_nid != start_nid); |
574 | 578 | ||
575 | if (ret) | 579 | if (ret) |
576 | count_vm_event(HTLB_BUDDY_PGALLOC); | 580 | count_vm_event(HTLB_BUDDY_PGALLOC); |
@@ -580,8 +584,8 @@ static int alloc_fresh_huge_page(void) | |||
580 | return ret; | 584 | return ret; |
581 | } | 585 | } |
582 | 586 | ||
583 | static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | 587 | static struct page *alloc_buddy_huge_page(struct hstate *h, |
584 | unsigned long address) | 588 | struct vm_area_struct *vma, unsigned long address) |
585 | { | 589 | { |
586 | struct page *page; | 590 | struct page *page; |
587 | unsigned int nid; | 591 | unsigned int nid; |
@@ -610,18 +614,18 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
610 | * per-node value is checked there. | 614 | * per-node value is checked there. |
611 | */ | 615 | */ |
612 | spin_lock(&hugetlb_lock); | 616 | spin_lock(&hugetlb_lock); |
613 | if (surplus_huge_pages >= nr_overcommit_huge_pages) { | 617 | if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { |
614 | spin_unlock(&hugetlb_lock); | 618 | spin_unlock(&hugetlb_lock); |
615 | return NULL; | 619 | return NULL; |
616 | } else { | 620 | } else { |
617 | nr_huge_pages++; | 621 | h->nr_huge_pages++; |
618 | surplus_huge_pages++; | 622 | h->surplus_huge_pages++; |
619 | } | 623 | } |
620 | spin_unlock(&hugetlb_lock); | 624 | spin_unlock(&hugetlb_lock); |
621 | 625 | ||
622 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 626 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| |
623 | __GFP_REPEAT|__GFP_NOWARN, | 627 | __GFP_REPEAT|__GFP_NOWARN, |
624 | HUGETLB_PAGE_ORDER); | 628 | huge_page_order(h)); |
625 | 629 | ||
626 | spin_lock(&hugetlb_lock); | 630 | spin_lock(&hugetlb_lock); |
627 | if (page) { | 631 | if (page) { |
@@ -636,12 +640,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
636 | /* | 640 | /* |
637 | * We incremented the global counters already | 641 | * We incremented the global counters already |
638 | */ | 642 | */ |
639 | nr_huge_pages_node[nid]++; | 643 | h->nr_huge_pages_node[nid]++; |
640 | surplus_huge_pages_node[nid]++; | 644 | h->surplus_huge_pages_node[nid]++; |
641 | __count_vm_event(HTLB_BUDDY_PGALLOC); | 645 | __count_vm_event(HTLB_BUDDY_PGALLOC); |
642 | } else { | 646 | } else { |
643 | nr_huge_pages--; | 647 | h->nr_huge_pages--; |
644 | surplus_huge_pages--; | 648 | h->surplus_huge_pages--; |
645 | __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); | 649 | __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); |
646 | } | 650 | } |
647 | spin_unlock(&hugetlb_lock); | 651 | spin_unlock(&hugetlb_lock); |
@@ -653,16 +657,16 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
653 | * Increase the hugetlb pool such that it can accomodate a reservation | 657 | * Increase the hugetlb pool such that it can accomodate a reservation |
654 | * of size 'delta'. | 658 | * of size 'delta'. |
655 | */ | 659 | */ |
656 | static int gather_surplus_pages(int delta) | 660 | static int gather_surplus_pages(struct hstate *h, int delta) |
657 | { | 661 | { |
658 | struct list_head surplus_list; | 662 | struct list_head surplus_list; |
659 | struct page *page, *tmp; | 663 | struct page *page, *tmp; |
660 | int ret, i; | 664 | int ret, i; |
661 | int needed, allocated; | 665 | int needed, allocated; |
662 | 666 | ||
663 | needed = (resv_huge_pages + delta) - free_huge_pages; | 667 | needed = (h->resv_huge_pages + delta) - h->free_huge_pages; |
664 | if (needed <= 0) { | 668 | if (needed <= 0) { |
665 | resv_huge_pages += delta; | 669 | h->resv_huge_pages += delta; |
666 | return 0; | 670 | return 0; |
667 | } | 671 | } |
668 | 672 | ||
@@ -673,7 +677,7 @@ static int gather_surplus_pages(int delta) | |||
673 | retry: | 677 | retry: |
674 | spin_unlock(&hugetlb_lock); | 678 | spin_unlock(&hugetlb_lock); |
675 | for (i = 0; i < needed; i++) { | 679 | for (i = 0; i < needed; i++) { |
676 | page = alloc_buddy_huge_page(NULL, 0); | 680 | page = alloc_buddy_huge_page(h, NULL, 0); |
677 | if (!page) { | 681 | if (!page) { |
678 | /* | 682 | /* |
679 | * We were not able to allocate enough pages to | 683 | * We were not able to allocate enough pages to |
@@ -694,7 +698,8 @@ retry: | |||
694 | * because either resv_huge_pages or free_huge_pages may have changed. | 698 | * because either resv_huge_pages or free_huge_pages may have changed. |
695 | */ | 699 | */ |
696 | spin_lock(&hugetlb_lock); | 700 | spin_lock(&hugetlb_lock); |
697 | needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); | 701 | needed = (h->resv_huge_pages + delta) - |
702 | (h->free_huge_pages + allocated); | ||
698 | if (needed > 0) | 703 | if (needed > 0) |
699 | goto retry; | 704 | goto retry; |
700 | 705 | ||
@@ -707,7 +712,7 @@ retry: | |||
707 | * before they are reserved. | 712 | * before they are reserved. |
708 | */ | 713 | */ |
709 | needed += allocated; | 714 | needed += allocated; |
710 | resv_huge_pages += delta; | 715 | h->resv_huge_pages += delta; |
711 | ret = 0; | 716 | ret = 0; |
712 | free: | 717 | free: |
713 | /* Free the needed pages to the hugetlb pool */ | 718 | /* Free the needed pages to the hugetlb pool */ |
@@ -715,7 +720,7 @@ free: | |||
715 | if ((--needed) < 0) | 720 | if ((--needed) < 0) |
716 | break; | 721 | break; |
717 | list_del(&page->lru); | 722 | list_del(&page->lru); |
718 | enqueue_huge_page(page); | 723 | enqueue_huge_page(h, page); |
719 | } | 724 | } |
720 | 725 | ||
721 | /* Free unnecessary surplus pages to the buddy allocator */ | 726 | /* Free unnecessary surplus pages to the buddy allocator */ |
@@ -743,7 +748,8 @@ free: | |||
743 | * allocated to satisfy the reservation must be explicitly freed if they were | 748 | * allocated to satisfy the reservation must be explicitly freed if they were |
744 | * never used. | 749 | * never used. |
745 | */ | 750 | */ |
746 | static void return_unused_surplus_pages(unsigned long unused_resv_pages) | 751 | static void return_unused_surplus_pages(struct hstate *h, |
752 | unsigned long unused_resv_pages) | ||
747 | { | 753 | { |
748 | static int nid = -1; | 754 | static int nid = -1; |
749 | struct page *page; | 755 | struct page *page; |
@@ -758,27 +764,27 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) | |||
758 | unsigned long remaining_iterations = num_online_nodes(); | 764 | unsigned long remaining_iterations = num_online_nodes(); |
759 | 765 | ||
760 | /* Uncommit the reservation */ | 766 | /* Uncommit the reservation */ |
761 | resv_huge_pages -= unused_resv_pages; | 767 | h->resv_huge_pages -= unused_resv_pages; |
762 | 768 | ||
763 | nr_pages = min(unused_resv_pages, surplus_huge_pages); | 769 | nr_pages = min(unused_resv_pages, h->surplus_huge_pages); |
764 | 770 | ||
765 | while (remaining_iterations-- && nr_pages) { | 771 | while (remaining_iterations-- && nr_pages) { |
766 | nid = next_node(nid, node_online_map); | 772 | nid = next_node(nid, node_online_map); |
767 | if (nid == MAX_NUMNODES) | 773 | if (nid == MAX_NUMNODES) |
768 | nid = first_node(node_online_map); | 774 | nid = first_node(node_online_map); |
769 | 775 | ||
770 | if (!surplus_huge_pages_node[nid]) | 776 | if (!h->surplus_huge_pages_node[nid]) |
771 | continue; | 777 | continue; |
772 | 778 | ||
773 | if (!list_empty(&hugepage_freelists[nid])) { | 779 | if (!list_empty(&h->hugepage_freelists[nid])) { |
774 | page = list_entry(hugepage_freelists[nid].next, | 780 | page = list_entry(h->hugepage_freelists[nid].next, |
775 | struct page, lru); | 781 | struct page, lru); |
776 | list_del(&page->lru); | 782 | list_del(&page->lru); |
777 | update_and_free_page(page); | 783 | update_and_free_page(h, page); |
778 | free_huge_pages--; | 784 | h->free_huge_pages--; |
779 | free_huge_pages_node[nid]--; | 785 | h->free_huge_pages_node[nid]--; |
780 | surplus_huge_pages--; | 786 | h->surplus_huge_pages--; |
781 | surplus_huge_pages_node[nid]--; | 787 | h->surplus_huge_pages_node[nid]--; |
782 | nr_pages--; | 788 | nr_pages--; |
783 | remaining_iterations = num_online_nodes(); | 789 | remaining_iterations = num_online_nodes(); |
784 | } | 790 | } |
@@ -794,13 +800,14 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) | |||
794 | * an instantiated the change should be committed via vma_commit_reservation. | 800 | * an instantiated the change should be committed via vma_commit_reservation. |
795 | * No action is required on failure. | 801 | * No action is required on failure. |
796 | */ | 802 | */ |
797 | static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) | 803 | static int vma_needs_reservation(struct hstate *h, |
804 | struct vm_area_struct *vma, unsigned long addr) | ||
798 | { | 805 | { |
799 | struct address_space *mapping = vma->vm_file->f_mapping; | 806 | struct address_space *mapping = vma->vm_file->f_mapping; |
800 | struct inode *inode = mapping->host; | 807 | struct inode *inode = mapping->host; |
801 | 808 | ||
802 | if (vma->vm_flags & VM_SHARED) { | 809 | if (vma->vm_flags & VM_SHARED) { |
803 | pgoff_t idx = vma_hugecache_offset(vma, addr); | 810 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
804 | return region_chg(&inode->i_mapping->private_list, | 811 | return region_chg(&inode->i_mapping->private_list, |
805 | idx, idx + 1); | 812 | idx, idx + 1); |
806 | 813 | ||
@@ -809,7 +816,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) | |||
809 | 816 | ||
810 | } else { | 817 | } else { |
811 | int err; | 818 | int err; |
812 | pgoff_t idx = vma_hugecache_offset(vma, addr); | 819 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
813 | struct resv_map *reservations = vma_resv_map(vma); | 820 | struct resv_map *reservations = vma_resv_map(vma); |
814 | 821 | ||
815 | err = region_chg(&reservations->regions, idx, idx + 1); | 822 | err = region_chg(&reservations->regions, idx, idx + 1); |
@@ -818,18 +825,18 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) | |||
818 | return 0; | 825 | return 0; |
819 | } | 826 | } |
820 | } | 827 | } |
821 | static void vma_commit_reservation(struct vm_area_struct *vma, | 828 | static void vma_commit_reservation(struct hstate *h, |
822 | unsigned long addr) | 829 | struct vm_area_struct *vma, unsigned long addr) |
823 | { | 830 | { |
824 | struct address_space *mapping = vma->vm_file->f_mapping; | 831 | struct address_space *mapping = vma->vm_file->f_mapping; |
825 | struct inode *inode = mapping->host; | 832 | struct inode *inode = mapping->host; |
826 | 833 | ||
827 | if (vma->vm_flags & VM_SHARED) { | 834 | if (vma->vm_flags & VM_SHARED) { |
828 | pgoff_t idx = vma_hugecache_offset(vma, addr); | 835 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
829 | region_add(&inode->i_mapping->private_list, idx, idx + 1); | 836 | region_add(&inode->i_mapping->private_list, idx, idx + 1); |
830 | 837 | ||
831 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | 838 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { |
832 | pgoff_t idx = vma_hugecache_offset(vma, addr); | 839 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
833 | struct resv_map *reservations = vma_resv_map(vma); | 840 | struct resv_map *reservations = vma_resv_map(vma); |
834 | 841 | ||
835 | /* Mark this page used in the map. */ | 842 | /* Mark this page used in the map. */ |
@@ -840,6 +847,7 @@ static void vma_commit_reservation(struct vm_area_struct *vma, | |||
840 | static struct page *alloc_huge_page(struct vm_area_struct *vma, | 847 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
841 | unsigned long addr, int avoid_reserve) | 848 | unsigned long addr, int avoid_reserve) |
842 | { | 849 | { |
850 | struct hstate *h = hstate_vma(vma); | ||
843 | struct page *page; | 851 | struct page *page; |
844 | struct address_space *mapping = vma->vm_file->f_mapping; | 852 | struct address_space *mapping = vma->vm_file->f_mapping; |
845 | struct inode *inode = mapping->host; | 853 | struct inode *inode = mapping->host; |
@@ -852,7 +860,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
852 | * MAP_NORESERVE mappings may also need pages and quota allocated | 860 | * MAP_NORESERVE mappings may also need pages and quota allocated |
853 | * if no reserve mapping overlaps. | 861 | * if no reserve mapping overlaps. |
854 | */ | 862 | */ |
855 | chg = vma_needs_reservation(vma, addr); | 863 | chg = vma_needs_reservation(h, vma, addr); |
856 | if (chg < 0) | 864 | if (chg < 0) |
857 | return ERR_PTR(chg); | 865 | return ERR_PTR(chg); |
858 | if (chg) | 866 | if (chg) |
@@ -860,11 +868,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
860 | return ERR_PTR(-ENOSPC); | 868 | return ERR_PTR(-ENOSPC); |
861 | 869 | ||
862 | spin_lock(&hugetlb_lock); | 870 | spin_lock(&hugetlb_lock); |
863 | page = dequeue_huge_page_vma(vma, addr, avoid_reserve); | 871 | page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); |
864 | spin_unlock(&hugetlb_lock); | 872 | spin_unlock(&hugetlb_lock); |
865 | 873 | ||
866 | if (!page) { | 874 | if (!page) { |
867 | page = alloc_buddy_huge_page(vma, addr); | 875 | page = alloc_buddy_huge_page(h, vma, addr); |
868 | if (!page) { | 876 | if (!page) { |
869 | hugetlb_put_quota(inode->i_mapping, chg); | 877 | hugetlb_put_quota(inode->i_mapping, chg); |
870 | return ERR_PTR(-VM_FAULT_OOM); | 878 | return ERR_PTR(-VM_FAULT_OOM); |
@@ -874,7 +882,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
874 | set_page_refcounted(page); | 882 | set_page_refcounted(page); |
875 | set_page_private(page, (unsigned long) mapping); | 883 | set_page_private(page, (unsigned long) mapping); |
876 | 884 | ||
877 | vma_commit_reservation(vma, addr); | 885 | vma_commit_reservation(h, vma, addr); |
878 | 886 | ||
879 | return page; | 887 | return page; |
880 | } | 888 | } |
@@ -882,21 +890,28 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
882 | static int __init hugetlb_init(void) | 890 | static int __init hugetlb_init(void) |
883 | { | 891 | { |
884 | unsigned long i; | 892 | unsigned long i; |
893 | struct hstate *h = &default_hstate; | ||
885 | 894 | ||
886 | if (HPAGE_SHIFT == 0) | 895 | if (HPAGE_SHIFT == 0) |
887 | return 0; | 896 | return 0; |
888 | 897 | ||
898 | if (!h->order) { | ||
899 | h->order = HPAGE_SHIFT - PAGE_SHIFT; | ||
900 | h->mask = HPAGE_MASK; | ||
901 | } | ||
902 | |||
889 | for (i = 0; i < MAX_NUMNODES; ++i) | 903 | for (i = 0; i < MAX_NUMNODES; ++i) |
890 | INIT_LIST_HEAD(&hugepage_freelists[i]); | 904 | INIT_LIST_HEAD(&h->hugepage_freelists[i]); |
891 | 905 | ||
892 | hugetlb_next_nid = first_node(node_online_map); | 906 | h->hugetlb_next_nid = first_node(node_online_map); |
893 | 907 | ||
894 | for (i = 0; i < max_huge_pages; ++i) { | 908 | for (i = 0; i < max_huge_pages; ++i) { |
895 | if (!alloc_fresh_huge_page()) | 909 | if (!alloc_fresh_huge_page(h)) |
896 | break; | 910 | break; |
897 | } | 911 | } |
898 | max_huge_pages = free_huge_pages = nr_huge_pages = i; | 912 | max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; |
899 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); | 913 | printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n", |
914 | h->free_huge_pages); | ||
900 | return 0; | 915 | return 0; |
901 | } | 916 | } |
902 | module_init(hugetlb_init); | 917 | module_init(hugetlb_init); |
@@ -922,34 +937,36 @@ static unsigned int cpuset_mems_nr(unsigned int *array) | |||
922 | 937 | ||
923 | #ifdef CONFIG_SYSCTL | 938 | #ifdef CONFIG_SYSCTL |
924 | #ifdef CONFIG_HIGHMEM | 939 | #ifdef CONFIG_HIGHMEM |
925 | static void try_to_free_low(unsigned long count) | 940 | static void try_to_free_low(struct hstate *h, unsigned long count) |
926 | { | 941 | { |
927 | int i; | 942 | int i; |
928 | 943 | ||
929 | for (i = 0; i < MAX_NUMNODES; ++i) { | 944 | for (i = 0; i < MAX_NUMNODES; ++i) { |
930 | struct page *page, *next; | 945 | struct page *page, *next; |
931 | list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { | 946 | struct list_head *freel = &h->hugepage_freelists[i]; |
932 | if (count >= nr_huge_pages) | 947 | list_for_each_entry_safe(page, next, freel, lru) { |
948 | if (count >= h->nr_huge_pages) | ||
933 | return; | 949 | return; |
934 | if (PageHighMem(page)) | 950 | if (PageHighMem(page)) |
935 | continue; | 951 | continue; |
936 | list_del(&page->lru); | 952 | list_del(&page->lru); |
937 | update_and_free_page(page); | 953 | update_and_free_page(page); |
938 | free_huge_pages--; | 954 | h->free_huge_pages--; |
939 | free_huge_pages_node[page_to_nid(page)]--; | 955 | h->free_huge_pages_node[page_to_nid(page)]--; |
940 | } | 956 | } |
941 | } | 957 | } |
942 | } | 958 | } |
943 | #else | 959 | #else |
944 | static inline void try_to_free_low(unsigned long count) | 960 | static inline void try_to_free_low(struct hstate *h, unsigned long count) |
945 | { | 961 | { |
946 | } | 962 | } |
947 | #endif | 963 | #endif |
948 | 964 | ||
949 | #define persistent_huge_pages (nr_huge_pages - surplus_huge_pages) | 965 | #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) |
950 | static unsigned long set_max_huge_pages(unsigned long count) | 966 | static unsigned long set_max_huge_pages(unsigned long count) |
951 | { | 967 | { |
952 | unsigned long min_count, ret; | 968 | unsigned long min_count, ret; |
969 | struct hstate *h = &default_hstate; | ||
953 | 970 | ||
954 | /* | 971 | /* |
955 | * Increase the pool size | 972 | * Increase the pool size |
@@ -963,19 +980,19 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
963 | * within all the constraints specified by the sysctls. | 980 | * within all the constraints specified by the sysctls. |
964 | */ | 981 | */ |
965 | spin_lock(&hugetlb_lock); | 982 | spin_lock(&hugetlb_lock); |
966 | while (surplus_huge_pages && count > persistent_huge_pages) { | 983 | while (h->surplus_huge_pages && count > persistent_huge_pages(h)) { |
967 | if (!adjust_pool_surplus(-1)) | 984 | if (!adjust_pool_surplus(h, -1)) |
968 | break; | 985 | break; |
969 | } | 986 | } |
970 | 987 | ||
971 | while (count > persistent_huge_pages) { | 988 | while (count > persistent_huge_pages(h)) { |
972 | /* | 989 | /* |
973 | * If this allocation races such that we no longer need the | 990 | * If this allocation races such that we no longer need the |
974 | * page, free_huge_page will handle it by freeing the page | 991 | * page, free_huge_page will handle it by freeing the page |
975 | * and reducing the surplus. | 992 | * and reducing the surplus. |
976 | */ | 993 | */ |
977 | spin_unlock(&hugetlb_lock); | 994 | spin_unlock(&hugetlb_lock); |
978 | ret = alloc_fresh_huge_page(); | 995 | ret = alloc_fresh_huge_page(h); |
979 | spin_lock(&hugetlb_lock); | 996 | spin_lock(&hugetlb_lock); |
980 | if (!ret) | 997 | if (!ret) |
981 | goto out; | 998 | goto out; |
@@ -997,21 +1014,21 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
997 | * and won't grow the pool anywhere else. Not until one of the | 1014 | * and won't grow the pool anywhere else. Not until one of the |
998 | * sysctls are changed, or the surplus pages go out of use. | 1015 | * sysctls are changed, or the surplus pages go out of use. |
999 | */ | 1016 | */ |
1000 | min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; | 1017 | min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; |
1001 | min_count = max(count, min_count); | 1018 | min_count = max(count, min_count); |
1002 | try_to_free_low(min_count); | 1019 | try_to_free_low(h, min_count); |
1003 | while (min_count < persistent_huge_pages) { | 1020 | while (min_count < persistent_huge_pages(h)) { |
1004 | struct page *page = dequeue_huge_page(); | 1021 | struct page *page = dequeue_huge_page(h); |
1005 | if (!page) | 1022 | if (!page) |
1006 | break; | 1023 | break; |
1007 | update_and_free_page(page); | 1024 | update_and_free_page(h, page); |
1008 | } | 1025 | } |
1009 | while (count < persistent_huge_pages) { | 1026 | while (count < persistent_huge_pages(h)) { |
1010 | if (!adjust_pool_surplus(1)) | 1027 | if (!adjust_pool_surplus(h, 1)) |
1011 | break; | 1028 | break; |
1012 | } | 1029 | } |
1013 | out: | 1030 | out: |
1014 | ret = persistent_huge_pages; | 1031 | ret = persistent_huge_pages(h); |
1015 | spin_unlock(&hugetlb_lock); | 1032 | spin_unlock(&hugetlb_lock); |
1016 | return ret; | 1033 | return ret; |
1017 | } | 1034 | } |
@@ -1041,9 +1058,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1041 | struct file *file, void __user *buffer, | 1058 | struct file *file, void __user *buffer, |
1042 | size_t *length, loff_t *ppos) | 1059 | size_t *length, loff_t *ppos) |
1043 | { | 1060 | { |
1061 | struct hstate *h = &default_hstate; | ||
1044 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 1062 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); |
1045 | spin_lock(&hugetlb_lock); | 1063 | spin_lock(&hugetlb_lock); |
1046 | nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; | 1064 | h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; |
1047 | spin_unlock(&hugetlb_lock); | 1065 | spin_unlock(&hugetlb_lock); |
1048 | return 0; | 1066 | return 0; |
1049 | } | 1067 | } |
@@ -1052,37 +1070,40 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1052 | 1070 | ||
1053 | int hugetlb_report_meminfo(char *buf) | 1071 | int hugetlb_report_meminfo(char *buf) |
1054 | { | 1072 | { |
1073 | struct hstate *h = &default_hstate; | ||
1055 | return sprintf(buf, | 1074 | return sprintf(buf, |
1056 | "HugePages_Total: %5lu\n" | 1075 | "HugePages_Total: %5lu\n" |
1057 | "HugePages_Free: %5lu\n" | 1076 | "HugePages_Free: %5lu\n" |
1058 | "HugePages_Rsvd: %5lu\n" | 1077 | "HugePages_Rsvd: %5lu\n" |
1059 | "HugePages_Surp: %5lu\n" | 1078 | "HugePages_Surp: %5lu\n" |
1060 | "Hugepagesize: %5lu kB\n", | 1079 | "Hugepagesize: %5lu kB\n", |
1061 | nr_huge_pages, | 1080 | h->nr_huge_pages, |
1062 | free_huge_pages, | 1081 | h->free_huge_pages, |
1063 | resv_huge_pages, | 1082 | h->resv_huge_pages, |
1064 | surplus_huge_pages, | 1083 | h->surplus_huge_pages, |
1065 | HPAGE_SIZE/1024); | 1084 | 1UL << (huge_page_order(h) + PAGE_SHIFT - 10)); |
1066 | } | 1085 | } |
1067 | 1086 | ||
1068 | int hugetlb_report_node_meminfo(int nid, char *buf) | 1087 | int hugetlb_report_node_meminfo(int nid, char *buf) |
1069 | { | 1088 | { |
1089 | struct hstate *h = &default_hstate; | ||
1070 | return sprintf(buf, | 1090 | return sprintf(buf, |
1071 | "Node %d HugePages_Total: %5u\n" | 1091 | "Node %d HugePages_Total: %5u\n" |
1072 | "Node %d HugePages_Free: %5u\n" | 1092 | "Node %d HugePages_Free: %5u\n" |
1073 | "Node %d HugePages_Surp: %5u\n", | 1093 | "Node %d HugePages_Surp: %5u\n", |
1074 | nid, nr_huge_pages_node[nid], | 1094 | nid, h->nr_huge_pages_node[nid], |
1075 | nid, free_huge_pages_node[nid], | 1095 | nid, h->free_huge_pages_node[nid], |
1076 | nid, surplus_huge_pages_node[nid]); | 1096 | nid, h->surplus_huge_pages_node[nid]); |
1077 | } | 1097 | } |
1078 | 1098 | ||
1079 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | 1099 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ |
1080 | unsigned long hugetlb_total_pages(void) | 1100 | unsigned long hugetlb_total_pages(void) |
1081 | { | 1101 | { |
1082 | return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); | 1102 | struct hstate *h = &default_hstate; |
1103 | return h->nr_huge_pages * pages_per_huge_page(h); | ||
1083 | } | 1104 | } |
1084 | 1105 | ||
1085 | static int hugetlb_acct_memory(long delta) | 1106 | static int hugetlb_acct_memory(struct hstate *h, long delta) |
1086 | { | 1107 | { |
1087 | int ret = -ENOMEM; | 1108 | int ret = -ENOMEM; |
1088 | 1109 | ||
@@ -1105,18 +1126,18 @@ static int hugetlb_acct_memory(long delta) | |||
1105 | * semantics that cpuset has. | 1126 | * semantics that cpuset has. |
1106 | */ | 1127 | */ |
1107 | if (delta > 0) { | 1128 | if (delta > 0) { |
1108 | if (gather_surplus_pages(delta) < 0) | 1129 | if (gather_surplus_pages(h, delta) < 0) |
1109 | goto out; | 1130 | goto out; |
1110 | 1131 | ||
1111 | if (delta > cpuset_mems_nr(free_huge_pages_node)) { | 1132 | if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { |
1112 | return_unused_surplus_pages(delta); | 1133 | return_unused_surplus_pages(h, delta); |
1113 | goto out; | 1134 | goto out; |
1114 | } | 1135 | } |
1115 | } | 1136 | } |
1116 | 1137 | ||
1117 | ret = 0; | 1138 | ret = 0; |
1118 | if (delta < 0) | 1139 | if (delta < 0) |
1119 | return_unused_surplus_pages((unsigned long) -delta); | 1140 | return_unused_surplus_pages(h, (unsigned long) -delta); |
1120 | 1141 | ||
1121 | out: | 1142 | out: |
1122 | spin_unlock(&hugetlb_lock); | 1143 | spin_unlock(&hugetlb_lock); |
@@ -1141,14 +1162,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) | |||
1141 | 1162 | ||
1142 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) | 1163 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) |
1143 | { | 1164 | { |
1165 | struct hstate *h = hstate_vma(vma); | ||
1144 | struct resv_map *reservations = vma_resv_map(vma); | 1166 | struct resv_map *reservations = vma_resv_map(vma); |
1145 | unsigned long reserve; | 1167 | unsigned long reserve; |
1146 | unsigned long start; | 1168 | unsigned long start; |
1147 | unsigned long end; | 1169 | unsigned long end; |
1148 | 1170 | ||
1149 | if (reservations) { | 1171 | if (reservations) { |
1150 | start = vma_hugecache_offset(vma, vma->vm_start); | 1172 | start = vma_hugecache_offset(h, vma, vma->vm_start); |
1151 | end = vma_hugecache_offset(vma, vma->vm_end); | 1173 | end = vma_hugecache_offset(h, vma, vma->vm_end); |
1152 | 1174 | ||
1153 | reserve = (end - start) - | 1175 | reserve = (end - start) - |
1154 | region_count(&reservations->regions, start, end); | 1176 | region_count(&reservations->regions, start, end); |
@@ -1156,7 +1178,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) | |||
1156 | kref_put(&reservations->refs, resv_map_release); | 1178 | kref_put(&reservations->refs, resv_map_release); |
1157 | 1179 | ||
1158 | if (reserve) | 1180 | if (reserve) |
1159 | hugetlb_acct_memory(-reserve); | 1181 | hugetlb_acct_memory(h, -reserve); |
1160 | } | 1182 | } |
1161 | } | 1183 | } |
1162 | 1184 | ||
@@ -1214,14 +1236,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
1214 | struct page *ptepage; | 1236 | struct page *ptepage; |
1215 | unsigned long addr; | 1237 | unsigned long addr; |
1216 | int cow; | 1238 | int cow; |
1239 | struct hstate *h = hstate_vma(vma); | ||
1240 | unsigned long sz = huge_page_size(h); | ||
1217 | 1241 | ||
1218 | cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 1242 | cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
1219 | 1243 | ||
1220 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | 1244 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { |
1221 | src_pte = huge_pte_offset(src, addr); | 1245 | src_pte = huge_pte_offset(src, addr); |
1222 | if (!src_pte) | 1246 | if (!src_pte) |
1223 | continue; | 1247 | continue; |
1224 | dst_pte = huge_pte_alloc(dst, addr); | 1248 | dst_pte = huge_pte_alloc(dst, addr, sz); |
1225 | if (!dst_pte) | 1249 | if (!dst_pte) |
1226 | goto nomem; | 1250 | goto nomem; |
1227 | 1251 | ||
@@ -1257,6 +1281,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1257 | pte_t pte; | 1281 | pte_t pte; |
1258 | struct page *page; | 1282 | struct page *page; |
1259 | struct page *tmp; | 1283 | struct page *tmp; |
1284 | struct hstate *h = hstate_vma(vma); | ||
1285 | unsigned long sz = huge_page_size(h); | ||
1286 | |||
1260 | /* | 1287 | /* |
1261 | * A page gathering list, protected by per file i_mmap_lock. The | 1288 | * A page gathering list, protected by per file i_mmap_lock. The |
1262 | * lock is used to avoid list corruption from multiple unmapping | 1289 | * lock is used to avoid list corruption from multiple unmapping |
@@ -1265,11 +1292,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1265 | LIST_HEAD(page_list); | 1292 | LIST_HEAD(page_list); |
1266 | 1293 | ||
1267 | WARN_ON(!is_vm_hugetlb_page(vma)); | 1294 | WARN_ON(!is_vm_hugetlb_page(vma)); |
1268 | BUG_ON(start & ~HPAGE_MASK); | 1295 | BUG_ON(start & ~huge_page_mask(h)); |
1269 | BUG_ON(end & ~HPAGE_MASK); | 1296 | BUG_ON(end & ~huge_page_mask(h)); |
1270 | 1297 | ||
1271 | spin_lock(&mm->page_table_lock); | 1298 | spin_lock(&mm->page_table_lock); |
1272 | for (address = start; address < end; address += HPAGE_SIZE) { | 1299 | for (address = start; address < end; address += sz) { |
1273 | ptep = huge_pte_offset(mm, address); | 1300 | ptep = huge_pte_offset(mm, address); |
1274 | if (!ptep) | 1301 | if (!ptep) |
1275 | continue; | 1302 | continue; |
@@ -1383,6 +1410,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1383 | unsigned long address, pte_t *ptep, pte_t pte, | 1410 | unsigned long address, pte_t *ptep, pte_t pte, |
1384 | struct page *pagecache_page) | 1411 | struct page *pagecache_page) |
1385 | { | 1412 | { |
1413 | struct hstate *h = hstate_vma(vma); | ||
1386 | struct page *old_page, *new_page; | 1414 | struct page *old_page, *new_page; |
1387 | int avoidcopy; | 1415 | int avoidcopy; |
1388 | int outside_reserve = 0; | 1416 | int outside_reserve = 0; |
@@ -1443,7 +1471,7 @@ retry_avoidcopy: | |||
1443 | __SetPageUptodate(new_page); | 1471 | __SetPageUptodate(new_page); |
1444 | spin_lock(&mm->page_table_lock); | 1472 | spin_lock(&mm->page_table_lock); |
1445 | 1473 | ||
1446 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); | 1474 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
1447 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { | 1475 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { |
1448 | /* Break COW */ | 1476 | /* Break COW */ |
1449 | huge_ptep_clear_flush(vma, address, ptep); | 1477 | huge_ptep_clear_flush(vma, address, ptep); |
@@ -1458,14 +1486,14 @@ retry_avoidcopy: | |||
1458 | } | 1486 | } |
1459 | 1487 | ||
1460 | /* Return the pagecache page at a given address within a VMA */ | 1488 | /* Return the pagecache page at a given address within a VMA */ |
1461 | static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma, | 1489 | static struct page *hugetlbfs_pagecache_page(struct hstate *h, |
1462 | unsigned long address) | 1490 | struct vm_area_struct *vma, unsigned long address) |
1463 | { | 1491 | { |
1464 | struct address_space *mapping; | 1492 | struct address_space *mapping; |
1465 | pgoff_t idx; | 1493 | pgoff_t idx; |
1466 | 1494 | ||
1467 | mapping = vma->vm_file->f_mapping; | 1495 | mapping = vma->vm_file->f_mapping; |
1468 | idx = vma_hugecache_offset(vma, address); | 1496 | idx = vma_hugecache_offset(h, vma, address); |
1469 | 1497 | ||
1470 | return find_lock_page(mapping, idx); | 1498 | return find_lock_page(mapping, idx); |
1471 | } | 1499 | } |
@@ -1473,6 +1501,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma, | |||
1473 | static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1501 | static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1474 | unsigned long address, pte_t *ptep, int write_access) | 1502 | unsigned long address, pte_t *ptep, int write_access) |
1475 | { | 1503 | { |
1504 | struct hstate *h = hstate_vma(vma); | ||
1476 | int ret = VM_FAULT_SIGBUS; | 1505 | int ret = VM_FAULT_SIGBUS; |
1477 | pgoff_t idx; | 1506 | pgoff_t idx; |
1478 | unsigned long size; | 1507 | unsigned long size; |
@@ -1493,7 +1522,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1493 | } | 1522 | } |
1494 | 1523 | ||
1495 | mapping = vma->vm_file->f_mapping; | 1524 | mapping = vma->vm_file->f_mapping; |
1496 | idx = vma_hugecache_offset(vma, address); | 1525 | idx = vma_hugecache_offset(h, vma, address); |
1497 | 1526 | ||
1498 | /* | 1527 | /* |
1499 | * Use page lock to guard against racing truncation | 1528 | * Use page lock to guard against racing truncation |
@@ -1502,7 +1531,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1502 | retry: | 1531 | retry: |
1503 | page = find_lock_page(mapping, idx); | 1532 | page = find_lock_page(mapping, idx); |
1504 | if (!page) { | 1533 | if (!page) { |
1505 | size = i_size_read(mapping->host) >> HPAGE_SHIFT; | 1534 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
1506 | if (idx >= size) | 1535 | if (idx >= size) |
1507 | goto out; | 1536 | goto out; |
1508 | page = alloc_huge_page(vma, address, 0); | 1537 | page = alloc_huge_page(vma, address, 0); |
@@ -1510,7 +1539,7 @@ retry: | |||
1510 | ret = -PTR_ERR(page); | 1539 | ret = -PTR_ERR(page); |
1511 | goto out; | 1540 | goto out; |
1512 | } | 1541 | } |
1513 | clear_huge_page(page, address); | 1542 | clear_huge_page(page, address, huge_page_size(h)); |
1514 | __SetPageUptodate(page); | 1543 | __SetPageUptodate(page); |
1515 | 1544 | ||
1516 | if (vma->vm_flags & VM_SHARED) { | 1545 | if (vma->vm_flags & VM_SHARED) { |
@@ -1526,14 +1555,14 @@ retry: | |||
1526 | } | 1555 | } |
1527 | 1556 | ||
1528 | spin_lock(&inode->i_lock); | 1557 | spin_lock(&inode->i_lock); |
1529 | inode->i_blocks += BLOCKS_PER_HUGEPAGE; | 1558 | inode->i_blocks += blocks_per_huge_page(h); |
1530 | spin_unlock(&inode->i_lock); | 1559 | spin_unlock(&inode->i_lock); |
1531 | } else | 1560 | } else |
1532 | lock_page(page); | 1561 | lock_page(page); |
1533 | } | 1562 | } |
1534 | 1563 | ||
1535 | spin_lock(&mm->page_table_lock); | 1564 | spin_lock(&mm->page_table_lock); |
1536 | size = i_size_read(mapping->host) >> HPAGE_SHIFT; | 1565 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
1537 | if (idx >= size) | 1566 | if (idx >= size) |
1538 | goto backout; | 1567 | goto backout; |
1539 | 1568 | ||
@@ -1569,8 +1598,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1569 | pte_t entry; | 1598 | pte_t entry; |
1570 | int ret; | 1599 | int ret; |
1571 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | 1600 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); |
1601 | struct hstate *h = hstate_vma(vma); | ||
1572 | 1602 | ||
1573 | ptep = huge_pte_alloc(mm, address); | 1603 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); |
1574 | if (!ptep) | 1604 | if (!ptep) |
1575 | return VM_FAULT_OOM; | 1605 | return VM_FAULT_OOM; |
1576 | 1606 | ||
@@ -1594,7 +1624,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1594 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) | 1624 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) |
1595 | if (write_access && !pte_write(entry)) { | 1625 | if (write_access && !pte_write(entry)) { |
1596 | struct page *page; | 1626 | struct page *page; |
1597 | page = hugetlbfs_pagecache_page(vma, address); | 1627 | page = hugetlbfs_pagecache_page(h, vma, address); |
1598 | ret = hugetlb_cow(mm, vma, address, ptep, entry, page); | 1628 | ret = hugetlb_cow(mm, vma, address, ptep, entry, page); |
1599 | if (page) { | 1629 | if (page) { |
1600 | unlock_page(page); | 1630 | unlock_page(page); |
@@ -1615,6 +1645,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1615 | unsigned long pfn_offset; | 1645 | unsigned long pfn_offset; |
1616 | unsigned long vaddr = *position; | 1646 | unsigned long vaddr = *position; |
1617 | int remainder = *length; | 1647 | int remainder = *length; |
1648 | struct hstate *h = hstate_vma(vma); | ||
1618 | 1649 | ||
1619 | spin_lock(&mm->page_table_lock); | 1650 | spin_lock(&mm->page_table_lock); |
1620 | while (vaddr < vma->vm_end && remainder) { | 1651 | while (vaddr < vma->vm_end && remainder) { |
@@ -1626,7 +1657,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1626 | * each hugepage. We have to make * sure we get the | 1657 | * each hugepage. We have to make * sure we get the |
1627 | * first, for the page indexing below to work. | 1658 | * first, for the page indexing below to work. |
1628 | */ | 1659 | */ |
1629 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | 1660 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); |
1630 | 1661 | ||
1631 | if (!pte || huge_pte_none(huge_ptep_get(pte)) || | 1662 | if (!pte || huge_pte_none(huge_ptep_get(pte)) || |
1632 | (write && !pte_write(huge_ptep_get(pte)))) { | 1663 | (write && !pte_write(huge_ptep_get(pte)))) { |
@@ -1644,7 +1675,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1644 | break; | 1675 | break; |
1645 | } | 1676 | } |
1646 | 1677 | ||
1647 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; | 1678 | pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; |
1648 | page = pte_page(huge_ptep_get(pte)); | 1679 | page = pte_page(huge_ptep_get(pte)); |
1649 | same_page: | 1680 | same_page: |
1650 | if (pages) { | 1681 | if (pages) { |
@@ -1660,7 +1691,7 @@ same_page: | |||
1660 | --remainder; | 1691 | --remainder; |
1661 | ++i; | 1692 | ++i; |
1662 | if (vaddr < vma->vm_end && remainder && | 1693 | if (vaddr < vma->vm_end && remainder && |
1663 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | 1694 | pfn_offset < pages_per_huge_page(h)) { |
1664 | /* | 1695 | /* |
1665 | * We use pfn_offset to avoid touching the pageframes | 1696 | * We use pfn_offset to avoid touching the pageframes |
1666 | * of this compound page. | 1697 | * of this compound page. |
@@ -1682,13 +1713,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
1682 | unsigned long start = address; | 1713 | unsigned long start = address; |
1683 | pte_t *ptep; | 1714 | pte_t *ptep; |
1684 | pte_t pte; | 1715 | pte_t pte; |
1716 | struct hstate *h = hstate_vma(vma); | ||
1685 | 1717 | ||
1686 | BUG_ON(address >= end); | 1718 | BUG_ON(address >= end); |
1687 | flush_cache_range(vma, address, end); | 1719 | flush_cache_range(vma, address, end); |
1688 | 1720 | ||
1689 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | 1721 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); |
1690 | spin_lock(&mm->page_table_lock); | 1722 | spin_lock(&mm->page_table_lock); |
1691 | for (; address < end; address += HPAGE_SIZE) { | 1723 | for (; address < end; address += huge_page_size(h)) { |
1692 | ptep = huge_pte_offset(mm, address); | 1724 | ptep = huge_pte_offset(mm, address); |
1693 | if (!ptep) | 1725 | if (!ptep) |
1694 | continue; | 1726 | continue; |
@@ -1711,6 +1743,7 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
1711 | struct vm_area_struct *vma) | 1743 | struct vm_area_struct *vma) |
1712 | { | 1744 | { |
1713 | long ret, chg; | 1745 | long ret, chg; |
1746 | struct hstate *h = hstate_inode(inode); | ||
1714 | 1747 | ||
1715 | if (vma && vma->vm_flags & VM_NORESERVE) | 1748 | if (vma && vma->vm_flags & VM_NORESERVE) |
1716 | return 0; | 1749 | return 0; |
@@ -1739,7 +1772,7 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
1739 | 1772 | ||
1740 | if (hugetlb_get_quota(inode->i_mapping, chg)) | 1773 | if (hugetlb_get_quota(inode->i_mapping, chg)) |
1741 | return -ENOSPC; | 1774 | return -ENOSPC; |
1742 | ret = hugetlb_acct_memory(chg); | 1775 | ret = hugetlb_acct_memory(h, chg); |
1743 | if (ret < 0) { | 1776 | if (ret < 0) { |
1744 | hugetlb_put_quota(inode->i_mapping, chg); | 1777 | hugetlb_put_quota(inode->i_mapping, chg); |
1745 | return ret; | 1778 | return ret; |
@@ -1751,12 +1784,13 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
1751 | 1784 | ||
1752 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | 1785 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) |
1753 | { | 1786 | { |
1787 | struct hstate *h = hstate_inode(inode); | ||
1754 | long chg = region_truncate(&inode->i_mapping->private_list, offset); | 1788 | long chg = region_truncate(&inode->i_mapping->private_list, offset); |
1755 | 1789 | ||
1756 | spin_lock(&inode->i_lock); | 1790 | spin_lock(&inode->i_lock); |
1757 | inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed; | 1791 | inode->i_blocks -= blocks_per_huge_page(h); |
1758 | spin_unlock(&inode->i_lock); | 1792 | spin_unlock(&inode->i_lock); |
1759 | 1793 | ||
1760 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); | 1794 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); |
1761 | hugetlb_acct_memory(-(chg - freed)); | 1795 | hugetlb_acct_memory(h, -(chg - freed)); |
1762 | } | 1796 | } |
diff --git a/mm/memory.c b/mm/memory.c index 72932489a082..c1c1d6d8c22b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
903 | if (unlikely(is_vm_hugetlb_page(vma))) { | 903 | if (unlikely(is_vm_hugetlb_page(vma))) { |
904 | unmap_hugepage_range(vma, start, end, NULL); | 904 | unmap_hugepage_range(vma, start, end, NULL); |
905 | zap_work -= (end - start) / | 905 | zap_work -= (end - start) / |
906 | (HPAGE_SIZE / PAGE_SIZE); | 906 | pages_per_huge_page(hstate_vma(vma)); |
907 | start = end; | 907 | start = end; |
908 | } else | 908 | } else |
909 | start = unmap_page_range(*tlbp, vma, | 909 | start = unmap_page_range(*tlbp, vma, |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c94e58b192c3..e550bec20582 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | |||
1481 | 1481 | ||
1482 | if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) { | 1482 | if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) { |
1483 | zl = node_zonelist(interleave_nid(*mpol, vma, addr, | 1483 | zl = node_zonelist(interleave_nid(*mpol, vma, addr, |
1484 | HPAGE_SHIFT), gfp_flags); | 1484 | huge_page_shift(hstate_vma(vma))), gfp_flags); |
1485 | } else { | 1485 | } else { |
1486 | zl = policy_zonelist(gfp_flags, *mpol); | 1486 | zl = policy_zonelist(gfp_flags, *mpol); |
1487 | if ((*mpol)->mode == MPOL_BIND) | 1487 | if ((*mpol)->mode == MPOL_BIND) |
@@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma, | |||
2220 | { | 2220 | { |
2221 | unsigned long addr; | 2221 | unsigned long addr; |
2222 | struct page *page; | 2222 | struct page *page; |
2223 | struct hstate *h = hstate_vma(vma); | ||
2224 | unsigned long sz = huge_page_size(h); | ||
2223 | 2225 | ||
2224 | for (addr = start; addr < end; addr += HPAGE_SIZE) { | 2226 | for (addr = start; addr < end; addr += sz) { |
2225 | pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK); | 2227 | pte_t *ptep = huge_pte_offset(vma->vm_mm, |
2228 | addr & huge_page_mask(h)); | ||
2226 | pte_t pte; | 2229 | pte_t pte; |
2227 | 2230 | ||
2228 | if (!ptep) | 2231 | if (!ptep) |
@@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1812 | struct mempolicy *pol; | 1812 | struct mempolicy *pol; |
1813 | struct vm_area_struct *new; | 1813 | struct vm_area_struct *new; |
1814 | 1814 | ||
1815 | if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) | 1815 | if (is_vm_hugetlb_page(vma) && (addr & |
1816 | ~(huge_page_mask(hstate_vma(vma))))) | ||
1816 | return -EINVAL; | 1817 | return -EINVAL; |
1817 | 1818 | ||
1818 | if (mm->map_count >= sysctl_max_map_count) | 1819 | if (mm->map_count >= sysctl_max_map_count) |