aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c136
1 files changed, 126 insertions, 10 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d5987a87bbe5..27fad5d9bcf6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,7 +22,7 @@
22#include "internal.h" 22#include "internal.h"
23 23
24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
25static unsigned long nr_huge_pages, free_huge_pages; 25static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages;
26unsigned long max_huge_pages; 26unsigned long max_huge_pages;
27static struct list_head hugepage_freelists[MAX_NUMNODES]; 27static struct list_head hugepage_freelists[MAX_NUMNODES];
28static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 28static unsigned int nr_huge_pages_node[MAX_NUMNODES];
@@ -120,17 +120,136 @@ void free_huge_page(struct page *page)
120 120
121struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) 121struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
122{ 122{
123 struct inode *inode = vma->vm_file->f_dentry->d_inode;
123 struct page *page; 124 struct page *page;
125 int use_reserve = 0;
126 unsigned long idx;
124 127
125 spin_lock(&hugetlb_lock); 128 spin_lock(&hugetlb_lock);
126 page = dequeue_huge_page(vma, addr); 129
127 if (!page) { 130 if (vma->vm_flags & VM_MAYSHARE) {
128 spin_unlock(&hugetlb_lock); 131
129 return NULL; 132 /* idx = radix tree index, i.e. offset into file in
133 * HPAGE_SIZE units */
134 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
135 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
136
137 /* The hugetlbfs specific inode info stores the number
138 * of "guaranteed available" (huge) pages. That is,
139 * the first 'prereserved_hpages' pages of the inode
140 * are either already instantiated, or have been
141 * pre-reserved (by hugetlb_reserve_for_inode()). Here
142 * we're in the process of instantiating the page, so
143 * we use this to determine whether to draw from the
144 * pre-reserved pool or the truly free pool. */
145 if (idx < HUGETLBFS_I(inode)->prereserved_hpages)
146 use_reserve = 1;
147 }
148
149 if (!use_reserve) {
150 if (free_huge_pages <= reserved_huge_pages)
151 goto fail;
152 } else {
153 BUG_ON(reserved_huge_pages == 0);
154 reserved_huge_pages--;
130 } 155 }
156
157 page = dequeue_huge_page(vma, addr);
158 if (!page)
159 goto fail;
160
131 spin_unlock(&hugetlb_lock); 161 spin_unlock(&hugetlb_lock);
132 set_page_refcounted(page); 162 set_page_refcounted(page);
133 return page; 163 return page;
164
165 fail:
166 WARN_ON(use_reserve); /* reserved allocations shouldn't fail */
167 spin_unlock(&hugetlb_lock);
168 return NULL;
169}
170
171/* hugetlb_extend_reservation()
172 *
173 * Ensure that at least 'atleast' hugepages are, and will remain,
174 * available to instantiate the first 'atleast' pages of the given
175 * inode. If the inode doesn't already have this many pages reserved
176 * or instantiated, set aside some hugepages in the reserved pool to
177 * satisfy later faults (or fail now if there aren't enough, rather
178 * than getting the SIGBUS later).
179 */
180int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
181 unsigned long atleast)
182{
183 struct inode *inode = &info->vfs_inode;
184 unsigned long change_in_reserve = 0;
185 int ret = 0;
186
187 spin_lock(&hugetlb_lock);
188 read_lock_irq(&inode->i_mapping->tree_lock);
189
190 if (info->prereserved_hpages >= atleast)
191 goto out;
192
193 /* Because we always call this on shared mappings, none of the
194 * pages beyond info->prereserved_hpages can have been
195 * instantiated, so we need to reserve all of them now. */
196 change_in_reserve = atleast - info->prereserved_hpages;
197
198 if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) {
199 ret = -ENOMEM;
200 goto out;
201 }
202
203 reserved_huge_pages += change_in_reserve;
204 info->prereserved_hpages = atleast;
205
206 out:
207 read_unlock_irq(&inode->i_mapping->tree_lock);
208 spin_unlock(&hugetlb_lock);
209
210 return ret;
211}
212
213/* hugetlb_truncate_reservation()
214 *
215 * This returns pages reserved for the given inode to the general free
216 * hugepage pool. If the inode has any pages prereserved, but not
217 * instantiated, beyond offset (atmost << HPAGE_SIZE), then release
218 * them.
219 */
220void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
221 unsigned long atmost)
222{
223 struct inode *inode = &info->vfs_inode;
224 struct address_space *mapping = inode->i_mapping;
225 unsigned long idx;
226 unsigned long change_in_reserve = 0;
227 struct page *page;
228
229 spin_lock(&hugetlb_lock);
230 read_lock_irq(&inode->i_mapping->tree_lock);
231
232 if (info->prereserved_hpages <= atmost)
233 goto out;
234
235 /* Count pages which were reserved, but not instantiated, and
236 * which we can now release. */
237 for (idx = atmost; idx < info->prereserved_hpages; idx++) {
238 page = radix_tree_lookup(&mapping->page_tree, idx);
239 if (!page)
240 /* Pages which are already instantiated can't
241 * be unreserved (and in fact have already
242 * been removed from the reserved pool) */
243 change_in_reserve++;
244 }
245
246 BUG_ON(reserved_huge_pages < change_in_reserve);
247 reserved_huge_pages -= change_in_reserve;
248 info->prereserved_hpages = atmost;
249
250 out:
251 read_unlock_irq(&inode->i_mapping->tree_lock);
252 spin_unlock(&hugetlb_lock);
134} 253}
135 254
136static int __init hugetlb_init(void) 255static int __init hugetlb_init(void)
@@ -238,9 +357,11 @@ int hugetlb_report_meminfo(char *buf)
238 return sprintf(buf, 357 return sprintf(buf,
239 "HugePages_Total: %5lu\n" 358 "HugePages_Total: %5lu\n"
240 "HugePages_Free: %5lu\n" 359 "HugePages_Free: %5lu\n"
360 "HugePages_Rsvd: %5lu\n"
241 "Hugepagesize: %5lu kB\n", 361 "Hugepagesize: %5lu kB\n",
242 nr_huge_pages, 362 nr_huge_pages,
243 free_huge_pages, 363 free_huge_pages,
364 reserved_huge_pages,
244 HPAGE_SIZE/1024); 365 HPAGE_SIZE/1024);
245} 366}
246 367
@@ -253,11 +374,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
253 nid, free_huge_pages_node[nid]); 374 nid, free_huge_pages_node[nid]);
254} 375}
255 376
256int is_hugepage_mem_enough(size_t size)
257{
258 return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
259}
260
261/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 377/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
262unsigned long hugetlb_total_pages(void) 378unsigned long hugetlb_total_pages(void)
263{ 379{