aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c155
1 files changed, 132 insertions, 23 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8768e5250323..31bbca6b2c90 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -87,6 +87,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
87 list_del(&page->lru); 87 list_del(&page->lru);
88 free_huge_pages--; 88 free_huge_pages--;
89 free_huge_pages_node[nid]--; 89 free_huge_pages_node[nid]--;
90 if (vma && vma->vm_flags & VM_MAYSHARE)
91 resv_huge_pages--;
90 break; 92 break;
91 } 93 }
92 } 94 }
@@ -214,15 +216,116 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
214 return page; 216 return page;
215} 217}
216 218
219/*
220 * Increase the hugetlb pool such that it can accomodate a reservation
221 * of size 'delta'.
222 */
223static int gather_surplus_pages(int delta)
224{
225 struct list_head surplus_list;
226 struct page *page, *tmp;
227 int ret, i;
228 int needed, allocated;
229
230 needed = (resv_huge_pages + delta) - free_huge_pages;
231 if (needed <= 0)
232 return 0;
233
234 allocated = 0;
235 INIT_LIST_HEAD(&surplus_list);
236
237 ret = -ENOMEM;
238retry:
239 spin_unlock(&hugetlb_lock);
240 for (i = 0; i < needed; i++) {
241 page = alloc_buddy_huge_page(NULL, 0);
242 if (!page) {
243 /*
244 * We were not able to allocate enough pages to
245 * satisfy the entire reservation so we free what
246 * we've allocated so far.
247 */
248 spin_lock(&hugetlb_lock);
249 needed = 0;
250 goto free;
251 }
252
253 list_add(&page->lru, &surplus_list);
254 }
255 allocated += needed;
256
257 /*
258 * After retaking hugetlb_lock, we need to recalculate 'needed'
259 * because either resv_huge_pages or free_huge_pages may have changed.
260 */
261 spin_lock(&hugetlb_lock);
262 needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
263 if (needed > 0)
264 goto retry;
265
266 /*
267 * The surplus_list now contains _at_least_ the number of extra pages
268 * needed to accomodate the reservation. Add the appropriate number
269 * of pages to the hugetlb pool and free the extras back to the buddy
270 * allocator.
271 */
272 needed += allocated;
273 ret = 0;
274free:
275 list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
276 list_del(&page->lru);
277 if ((--needed) >= 0)
278 enqueue_huge_page(page);
279 else
280 update_and_free_page(page);
281 }
282
283 return ret;
284}
285
286/*
287 * When releasing a hugetlb pool reservation, any surplus pages that were
288 * allocated to satisfy the reservation must be explicitly freed if they were
289 * never used.
290 */
291void return_unused_surplus_pages(unsigned long unused_resv_pages)
292{
293 static int nid = -1;
294 struct page *page;
295 unsigned long nr_pages;
296
297 nr_pages = min(unused_resv_pages, surplus_huge_pages);
298
299 while (nr_pages) {
300 nid = next_node(nid, node_online_map);
301 if (nid == MAX_NUMNODES)
302 nid = first_node(node_online_map);
303
304 if (!surplus_huge_pages_node[nid])
305 continue;
306
307 if (!list_empty(&hugepage_freelists[nid])) {
308 page = list_entry(hugepage_freelists[nid].next,
309 struct page, lru);
310 list_del(&page->lru);
311 update_and_free_page(page);
312 free_huge_pages--;
313 free_huge_pages_node[nid]--;
314 surplus_huge_pages--;
315 surplus_huge_pages_node[nid]--;
316 nr_pages--;
317 }
318 }
319}
320
217static struct page *alloc_huge_page(struct vm_area_struct *vma, 321static struct page *alloc_huge_page(struct vm_area_struct *vma,
218 unsigned long addr) 322 unsigned long addr)
219{ 323{
220 struct page *page = NULL; 324 struct page *page = NULL;
325 int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
221 326
222 spin_lock(&hugetlb_lock); 327 spin_lock(&hugetlb_lock);
223 if (vma->vm_flags & VM_MAYSHARE) 328 if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
224 resv_huge_pages--;
225 else if (free_huge_pages <= resv_huge_pages)
226 goto fail; 329 goto fail;
227 330
228 page = dequeue_huge_page(vma, addr); 331 page = dequeue_huge_page(vma, addr);
@@ -234,8 +337,6 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
234 return page; 337 return page;
235 338
236fail: 339fail:
237 if (vma->vm_flags & VM_MAYSHARE)
238 resv_huge_pages++;
239 spin_unlock(&hugetlb_lock); 340 spin_unlock(&hugetlb_lock);
240 341
241 /* 342 /*
@@ -243,7 +344,7 @@ fail:
243 * may have failed due to an undersized hugetlb pool. Try to grab a 344 * may have failed due to an undersized hugetlb pool. Try to grab a
244 * surplus huge page from the buddy allocator. 345 * surplus huge page from the buddy allocator.
245 */ 346 */
246 if (!(vma->vm_flags & VM_MAYSHARE)) 347 if (!use_reserved_page)
247 page = alloc_buddy_huge_page(vma, addr); 348 page = alloc_buddy_huge_page(vma, addr);
248 349
249 return page; 350 return page;
@@ -952,21 +1053,6 @@ static int hugetlb_acct_memory(long delta)
952 int ret = -ENOMEM; 1053 int ret = -ENOMEM;
953 1054
954 spin_lock(&hugetlb_lock); 1055 spin_lock(&hugetlb_lock);
955 if ((delta + resv_huge_pages) <= free_huge_pages) {
956 resv_huge_pages += delta;
957 ret = 0;
958 }
959 spin_unlock(&hugetlb_lock);
960 return ret;
961}
962
963int hugetlb_reserve_pages(struct inode *inode, long from, long to)
964{
965 long ret, chg;
966
967 chg = region_chg(&inode->i_mapping->private_list, from, to);
968 if (chg < 0)
969 return chg;
970 /* 1056 /*
971 * When cpuset is configured, it breaks the strict hugetlb page 1057 * When cpuset is configured, it breaks the strict hugetlb page
972 * reservation as the accounting is done on a global variable. Such 1058 * reservation as the accounting is done on a global variable. Such
@@ -984,8 +1070,31 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
984 * a best attempt and hopefully to minimize the impact of changing 1070 * a best attempt and hopefully to minimize the impact of changing
985 * semantics that cpuset has. 1071 * semantics that cpuset has.
986 */ 1072 */
987 if (chg > cpuset_mems_nr(free_huge_pages_node)) 1073 if (delta > 0) {
988 return -ENOMEM; 1074 if (gather_surplus_pages(delta) < 0)
1075 goto out;
1076
1077 if (delta > cpuset_mems_nr(free_huge_pages_node))
1078 goto out;
1079 }
1080
1081 ret = 0;
1082 resv_huge_pages += delta;
1083 if (delta < 0)
1084 return_unused_surplus_pages((unsigned long) -delta);
1085
1086out:
1087 spin_unlock(&hugetlb_lock);
1088 return ret;
1089}
1090
1091int hugetlb_reserve_pages(struct inode *inode, long from, long to)
1092{
1093 long ret, chg;
1094
1095 chg = region_chg(&inode->i_mapping->private_list, from, to);
1096 if (chg < 0)
1097 return chg;
989 1098
990 ret = hugetlb_acct_memory(chg); 1099 ret = hugetlb_acct_memory(chg);
991 if (ret < 0) 1100 if (ret < 0)