diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 155 |
1 files changed, 132 insertions, 23 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8768e5250323..31bbca6b2c90 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -87,6 +87,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
87 | list_del(&page->lru); | 87 | list_del(&page->lru); |
88 | free_huge_pages--; | 88 | free_huge_pages--; |
89 | free_huge_pages_node[nid]--; | 89 | free_huge_pages_node[nid]--; |
90 | if (vma && vma->vm_flags & VM_MAYSHARE) | ||
91 | resv_huge_pages--; | ||
90 | break; | 92 | break; |
91 | } | 93 | } |
92 | } | 94 | } |
@@ -214,15 +216,116 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
214 | return page; | 216 | return page; |
215 | } | 217 | } |
216 | 218 | ||
219 | /* | ||
220 | * Increase the hugetlb pool such that it can accomodate a reservation | ||
221 | * of size 'delta'. | ||
222 | */ | ||
223 | static int gather_surplus_pages(int delta) | ||
224 | { | ||
225 | struct list_head surplus_list; | ||
226 | struct page *page, *tmp; | ||
227 | int ret, i; | ||
228 | int needed, allocated; | ||
229 | |||
230 | needed = (resv_huge_pages + delta) - free_huge_pages; | ||
231 | if (needed <= 0) | ||
232 | return 0; | ||
233 | |||
234 | allocated = 0; | ||
235 | INIT_LIST_HEAD(&surplus_list); | ||
236 | |||
237 | ret = -ENOMEM; | ||
238 | retry: | ||
239 | spin_unlock(&hugetlb_lock); | ||
240 | for (i = 0; i < needed; i++) { | ||
241 | page = alloc_buddy_huge_page(NULL, 0); | ||
242 | if (!page) { | ||
243 | /* | ||
244 | * We were not able to allocate enough pages to | ||
245 | * satisfy the entire reservation so we free what | ||
246 | * we've allocated so far. | ||
247 | */ | ||
248 | spin_lock(&hugetlb_lock); | ||
249 | needed = 0; | ||
250 | goto free; | ||
251 | } | ||
252 | |||
253 | list_add(&page->lru, &surplus_list); | ||
254 | } | ||
255 | allocated += needed; | ||
256 | |||
257 | /* | ||
258 | * After retaking hugetlb_lock, we need to recalculate 'needed' | ||
259 | * because either resv_huge_pages or free_huge_pages may have changed. | ||
260 | */ | ||
261 | spin_lock(&hugetlb_lock); | ||
262 | needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); | ||
263 | if (needed > 0) | ||
264 | goto retry; | ||
265 | |||
266 | /* | ||
267 | * The surplus_list now contains _at_least_ the number of extra pages | ||
268 | * needed to accomodate the reservation. Add the appropriate number | ||
269 | * of pages to the hugetlb pool and free the extras back to the buddy | ||
270 | * allocator. | ||
271 | */ | ||
272 | needed += allocated; | ||
273 | ret = 0; | ||
274 | free: | ||
275 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | ||
276 | list_del(&page->lru); | ||
277 | if ((--needed) >= 0) | ||
278 | enqueue_huge_page(page); | ||
279 | else | ||
280 | update_and_free_page(page); | ||
281 | } | ||
282 | |||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * When releasing a hugetlb pool reservation, any surplus pages that were | ||
288 | * allocated to satisfy the reservation must be explicitly freed if they were | ||
289 | * never used. | ||
290 | */ | ||
291 | void return_unused_surplus_pages(unsigned long unused_resv_pages) | ||
292 | { | ||
293 | static int nid = -1; | ||
294 | struct page *page; | ||
295 | unsigned long nr_pages; | ||
296 | |||
297 | nr_pages = min(unused_resv_pages, surplus_huge_pages); | ||
298 | |||
299 | while (nr_pages) { | ||
300 | nid = next_node(nid, node_online_map); | ||
301 | if (nid == MAX_NUMNODES) | ||
302 | nid = first_node(node_online_map); | ||
303 | |||
304 | if (!surplus_huge_pages_node[nid]) | ||
305 | continue; | ||
306 | |||
307 | if (!list_empty(&hugepage_freelists[nid])) { | ||
308 | page = list_entry(hugepage_freelists[nid].next, | ||
309 | struct page, lru); | ||
310 | list_del(&page->lru); | ||
311 | update_and_free_page(page); | ||
312 | free_huge_pages--; | ||
313 | free_huge_pages_node[nid]--; | ||
314 | surplus_huge_pages--; | ||
315 | surplus_huge_pages_node[nid]--; | ||
316 | nr_pages--; | ||
317 | } | ||
318 | } | ||
319 | } | ||
320 | |||
217 | static struct page *alloc_huge_page(struct vm_area_struct *vma, | 321 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
218 | unsigned long addr) | 322 | unsigned long addr) |
219 | { | 323 | { |
220 | struct page *page = NULL; | 324 | struct page *page = NULL; |
325 | int use_reserved_page = vma->vm_flags & VM_MAYSHARE; | ||
221 | 326 | ||
222 | spin_lock(&hugetlb_lock); | 327 | spin_lock(&hugetlb_lock); |
223 | if (vma->vm_flags & VM_MAYSHARE) | 328 | if (!use_reserved_page && (free_huge_pages <= resv_huge_pages)) |
224 | resv_huge_pages--; | ||
225 | else if (free_huge_pages <= resv_huge_pages) | ||
226 | goto fail; | 329 | goto fail; |
227 | 330 | ||
228 | page = dequeue_huge_page(vma, addr); | 331 | page = dequeue_huge_page(vma, addr); |
@@ -234,8 +337,6 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
234 | return page; | 337 | return page; |
235 | 338 | ||
236 | fail: | 339 | fail: |
237 | if (vma->vm_flags & VM_MAYSHARE) | ||
238 | resv_huge_pages++; | ||
239 | spin_unlock(&hugetlb_lock); | 340 | spin_unlock(&hugetlb_lock); |
240 | 341 | ||
241 | /* | 342 | /* |
@@ -243,7 +344,7 @@ fail: | |||
243 | * may have failed due to an undersized hugetlb pool. Try to grab a | 344 | * may have failed due to an undersized hugetlb pool. Try to grab a |
244 | * surplus huge page from the buddy allocator. | 345 | * surplus huge page from the buddy allocator. |
245 | */ | 346 | */ |
246 | if (!(vma->vm_flags & VM_MAYSHARE)) | 347 | if (!use_reserved_page) |
247 | page = alloc_buddy_huge_page(vma, addr); | 348 | page = alloc_buddy_huge_page(vma, addr); |
248 | 349 | ||
249 | return page; | 350 | return page; |
@@ -952,21 +1053,6 @@ static int hugetlb_acct_memory(long delta) | |||
952 | int ret = -ENOMEM; | 1053 | int ret = -ENOMEM; |
953 | 1054 | ||
954 | spin_lock(&hugetlb_lock); | 1055 | spin_lock(&hugetlb_lock); |
955 | if ((delta + resv_huge_pages) <= free_huge_pages) { | ||
956 | resv_huge_pages += delta; | ||
957 | ret = 0; | ||
958 | } | ||
959 | spin_unlock(&hugetlb_lock); | ||
960 | return ret; | ||
961 | } | ||
962 | |||
963 | int hugetlb_reserve_pages(struct inode *inode, long from, long to) | ||
964 | { | ||
965 | long ret, chg; | ||
966 | |||
967 | chg = region_chg(&inode->i_mapping->private_list, from, to); | ||
968 | if (chg < 0) | ||
969 | return chg; | ||
970 | /* | 1056 | /* |
971 | * When cpuset is configured, it breaks the strict hugetlb page | 1057 | * When cpuset is configured, it breaks the strict hugetlb page |
972 | * reservation as the accounting is done on a global variable. Such | 1058 | * reservation as the accounting is done on a global variable. Such |
@@ -984,8 +1070,31 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) | |||
984 | * a best attempt and hopefully to minimize the impact of changing | 1070 | * a best attempt and hopefully to minimize the impact of changing |
985 | * semantics that cpuset has. | 1071 | * semantics that cpuset has. |
986 | */ | 1072 | */ |
987 | if (chg > cpuset_mems_nr(free_huge_pages_node)) | 1073 | if (delta > 0) { |
988 | return -ENOMEM; | 1074 | if (gather_surplus_pages(delta) < 0) |
1075 | goto out; | ||
1076 | |||
1077 | if (delta > cpuset_mems_nr(free_huge_pages_node)) | ||
1078 | goto out; | ||
1079 | } | ||
1080 | |||
1081 | ret = 0; | ||
1082 | resv_huge_pages += delta; | ||
1083 | if (delta < 0) | ||
1084 | return_unused_surplus_pages((unsigned long) -delta); | ||
1085 | |||
1086 | out: | ||
1087 | spin_unlock(&hugetlb_lock); | ||
1088 | return ret; | ||
1089 | } | ||
1090 | |||
1091 | int hugetlb_reserve_pages(struct inode *inode, long from, long to) | ||
1092 | { | ||
1093 | long ret, chg; | ||
1094 | |||
1095 | chg = region_chg(&inode->i_mapping->private_list, from, to); | ||
1096 | if (chg < 0) | ||
1097 | return chg; | ||
989 | 1098 | ||
990 | ret = hugetlb_acct_memory(chg); | 1099 | ret = hugetlb_acct_memory(chg); |
991 | if (ret < 0) | 1100 | if (ret < 0) |