diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 398 |
1 files changed, 326 insertions, 72 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eab8c428cc93..ae2959bb59cb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -23,12 +23,16 @@ | |||
23 | 23 | ||
24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
25 | static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; | 25 | static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; |
26 | static unsigned long surplus_huge_pages; | ||
26 | unsigned long max_huge_pages; | 27 | unsigned long max_huge_pages; |
27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 28 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 29 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
29 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | 30 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; |
31 | static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; | ||
30 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | 32 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; |
31 | unsigned long hugepages_treat_as_movable; | 33 | unsigned long hugepages_treat_as_movable; |
34 | int hugetlb_dynamic_pool; | ||
35 | static int hugetlb_next_nid; | ||
32 | 36 | ||
33 | /* | 37 | /* |
34 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 38 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
@@ -85,6 +89,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
85 | list_del(&page->lru); | 89 | list_del(&page->lru); |
86 | free_huge_pages--; | 90 | free_huge_pages--; |
87 | free_huge_pages_node[nid]--; | 91 | free_huge_pages_node[nid]--; |
92 | if (vma && vma->vm_flags & VM_MAYSHARE) | ||
93 | resv_huge_pages--; | ||
88 | break; | 94 | break; |
89 | } | 95 | } |
90 | } | 96 | } |
@@ -92,58 +98,269 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
92 | return page; | 98 | return page; |
93 | } | 99 | } |
94 | 100 | ||
101 | static void update_and_free_page(struct page *page) | ||
102 | { | ||
103 | int i; | ||
104 | nr_huge_pages--; | ||
105 | nr_huge_pages_node[page_to_nid(page)]--; | ||
106 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { | ||
107 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | ||
108 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | ||
109 | 1 << PG_private | 1<< PG_writeback); | ||
110 | } | ||
111 | set_compound_page_dtor(page, NULL); | ||
112 | set_page_refcounted(page); | ||
113 | __free_pages(page, HUGETLB_PAGE_ORDER); | ||
114 | } | ||
115 | |||
95 | static void free_huge_page(struct page *page) | 116 | static void free_huge_page(struct page *page) |
96 | { | 117 | { |
97 | BUG_ON(page_count(page)); | 118 | int nid = page_to_nid(page); |
98 | 119 | ||
120 | BUG_ON(page_count(page)); | ||
99 | INIT_LIST_HEAD(&page->lru); | 121 | INIT_LIST_HEAD(&page->lru); |
100 | 122 | ||
101 | spin_lock(&hugetlb_lock); | 123 | spin_lock(&hugetlb_lock); |
102 | enqueue_huge_page(page); | 124 | if (surplus_huge_pages_node[nid]) { |
125 | update_and_free_page(page); | ||
126 | surplus_huge_pages--; | ||
127 | surplus_huge_pages_node[nid]--; | ||
128 | } else { | ||
129 | enqueue_huge_page(page); | ||
130 | } | ||
103 | spin_unlock(&hugetlb_lock); | 131 | spin_unlock(&hugetlb_lock); |
104 | } | 132 | } |
105 | 133 | ||
106 | static int alloc_fresh_huge_page(void) | 134 | /* |
135 | * Increment or decrement surplus_huge_pages. Keep node-specific counters | ||
136 | * balanced by operating on them in a round-robin fashion. | ||
137 | * Returns 1 if an adjustment was made. | ||
138 | */ | ||
139 | static int adjust_pool_surplus(int delta) | ||
107 | { | 140 | { |
108 | static int prev_nid; | 141 | static int prev_nid; |
109 | struct page *page; | 142 | int nid = prev_nid; |
110 | int nid; | 143 | int ret = 0; |
144 | |||
145 | VM_BUG_ON(delta != -1 && delta != 1); | ||
146 | do { | ||
147 | nid = next_node(nid, node_online_map); | ||
148 | if (nid == MAX_NUMNODES) | ||
149 | nid = first_node(node_online_map); | ||
150 | |||
151 | /* To shrink on this node, there must be a surplus page */ | ||
152 | if (delta < 0 && !surplus_huge_pages_node[nid]) | ||
153 | continue; | ||
154 | /* Surplus cannot exceed the total number of pages */ | ||
155 | if (delta > 0 && surplus_huge_pages_node[nid] >= | ||
156 | nr_huge_pages_node[nid]) | ||
157 | continue; | ||
158 | |||
159 | surplus_huge_pages += delta; | ||
160 | surplus_huge_pages_node[nid] += delta; | ||
161 | ret = 1; | ||
162 | break; | ||
163 | } while (nid != prev_nid); | ||
111 | 164 | ||
112 | /* | ||
113 | * Copy static prev_nid to local nid, work on that, then copy it | ||
114 | * back to prev_nid afterwards: otherwise there's a window in which | ||
115 | * a racer might pass invalid nid MAX_NUMNODES to alloc_pages_node. | ||
116 | * But we don't need to use a spin_lock here: it really doesn't | ||
117 | * matter if occasionally a racer chooses the same nid as we do. | ||
118 | */ | ||
119 | nid = next_node(prev_nid, node_online_map); | ||
120 | if (nid == MAX_NUMNODES) | ||
121 | nid = first_node(node_online_map); | ||
122 | prev_nid = nid; | 165 | prev_nid = nid; |
166 | return ret; | ||
167 | } | ||
168 | |||
169 | static struct page *alloc_fresh_huge_page_node(int nid) | ||
170 | { | ||
171 | struct page *page; | ||
123 | 172 | ||
124 | page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, | 173 | page = alloc_pages_node(nid, |
174 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, | ||
175 | HUGETLB_PAGE_ORDER); | ||
176 | if (page) { | ||
177 | set_compound_page_dtor(page, free_huge_page); | ||
178 | spin_lock(&hugetlb_lock); | ||
179 | nr_huge_pages++; | ||
180 | nr_huge_pages_node[nid]++; | ||
181 | spin_unlock(&hugetlb_lock); | ||
182 | put_page(page); /* free it into the hugepage allocator */ | ||
183 | } | ||
184 | |||
185 | return page; | ||
186 | } | ||
187 | |||
188 | static int alloc_fresh_huge_page(void) | ||
189 | { | ||
190 | struct page *page; | ||
191 | int start_nid; | ||
192 | int next_nid; | ||
193 | int ret = 0; | ||
194 | |||
195 | start_nid = hugetlb_next_nid; | ||
196 | |||
197 | do { | ||
198 | page = alloc_fresh_huge_page_node(hugetlb_next_nid); | ||
199 | if (page) | ||
200 | ret = 1; | ||
201 | /* | ||
202 | * Use a helper variable to find the next node and then | ||
203 | * copy it back to hugetlb_next_nid afterwards: | ||
204 | * otherwise there's a window in which a racer might | ||
205 | * pass invalid nid MAX_NUMNODES to alloc_pages_node. | ||
206 | * But we don't need to use a spin_lock here: it really | ||
207 | * doesn't matter if occasionally a racer chooses the | ||
208 | * same nid as we do. Move nid forward in the mask even | ||
209 | * if we just successfully allocated a hugepage so that | ||
210 | * the next caller gets hugepages on the next node. | ||
211 | */ | ||
212 | next_nid = next_node(hugetlb_next_nid, node_online_map); | ||
213 | if (next_nid == MAX_NUMNODES) | ||
214 | next_nid = first_node(node_online_map); | ||
215 | hugetlb_next_nid = next_nid; | ||
216 | } while (!page && hugetlb_next_nid != start_nid); | ||
217 | |||
218 | return ret; | ||
219 | } | ||
220 | |||
221 | static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | ||
222 | unsigned long address) | ||
223 | { | ||
224 | struct page *page; | ||
225 | |||
226 | /* Check if the dynamic pool is enabled */ | ||
227 | if (!hugetlb_dynamic_pool) | ||
228 | return NULL; | ||
229 | |||
230 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, | ||
125 | HUGETLB_PAGE_ORDER); | 231 | HUGETLB_PAGE_ORDER); |
126 | if (page) { | 232 | if (page) { |
127 | set_compound_page_dtor(page, free_huge_page); | 233 | set_compound_page_dtor(page, free_huge_page); |
128 | spin_lock(&hugetlb_lock); | 234 | spin_lock(&hugetlb_lock); |
129 | nr_huge_pages++; | 235 | nr_huge_pages++; |
130 | nr_huge_pages_node[page_to_nid(page)]++; | 236 | nr_huge_pages_node[page_to_nid(page)]++; |
237 | surplus_huge_pages++; | ||
238 | surplus_huge_pages_node[page_to_nid(page)]++; | ||
131 | spin_unlock(&hugetlb_lock); | 239 | spin_unlock(&hugetlb_lock); |
132 | put_page(page); /* free it into the hugepage allocator */ | ||
133 | return 1; | ||
134 | } | 240 | } |
135 | return 0; | 241 | |
242 | return page; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Increase the hugetlb pool such that it can accomodate a reservation | ||
247 | * of size 'delta'. | ||
248 | */ | ||
249 | static int gather_surplus_pages(int delta) | ||
250 | { | ||
251 | struct list_head surplus_list; | ||
252 | struct page *page, *tmp; | ||
253 | int ret, i; | ||
254 | int needed, allocated; | ||
255 | |||
256 | needed = (resv_huge_pages + delta) - free_huge_pages; | ||
257 | if (needed <= 0) | ||
258 | return 0; | ||
259 | |||
260 | allocated = 0; | ||
261 | INIT_LIST_HEAD(&surplus_list); | ||
262 | |||
263 | ret = -ENOMEM; | ||
264 | retry: | ||
265 | spin_unlock(&hugetlb_lock); | ||
266 | for (i = 0; i < needed; i++) { | ||
267 | page = alloc_buddy_huge_page(NULL, 0); | ||
268 | if (!page) { | ||
269 | /* | ||
270 | * We were not able to allocate enough pages to | ||
271 | * satisfy the entire reservation so we free what | ||
272 | * we've allocated so far. | ||
273 | */ | ||
274 | spin_lock(&hugetlb_lock); | ||
275 | needed = 0; | ||
276 | goto free; | ||
277 | } | ||
278 | |||
279 | list_add(&page->lru, &surplus_list); | ||
280 | } | ||
281 | allocated += needed; | ||
282 | |||
283 | /* | ||
284 | * After retaking hugetlb_lock, we need to recalculate 'needed' | ||
285 | * because either resv_huge_pages or free_huge_pages may have changed. | ||
286 | */ | ||
287 | spin_lock(&hugetlb_lock); | ||
288 | needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); | ||
289 | if (needed > 0) | ||
290 | goto retry; | ||
291 | |||
292 | /* | ||
293 | * The surplus_list now contains _at_least_ the number of extra pages | ||
294 | * needed to accomodate the reservation. Add the appropriate number | ||
295 | * of pages to the hugetlb pool and free the extras back to the buddy | ||
296 | * allocator. | ||
297 | */ | ||
298 | needed += allocated; | ||
299 | ret = 0; | ||
300 | free: | ||
301 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | ||
302 | list_del(&page->lru); | ||
303 | if ((--needed) >= 0) | ||
304 | enqueue_huge_page(page); | ||
305 | else { | ||
306 | /* | ||
307 | * Decrement the refcount and free the page using its | ||
308 | * destructor. This must be done with hugetlb_lock | ||
309 | * unlocked which is safe because free_huge_page takes | ||
310 | * hugetlb_lock before deciding how to free the page. | ||
311 | */ | ||
312 | spin_unlock(&hugetlb_lock); | ||
313 | put_page(page); | ||
314 | spin_lock(&hugetlb_lock); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | return ret; | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * When releasing a hugetlb pool reservation, any surplus pages that were | ||
323 | * allocated to satisfy the reservation must be explicitly freed if they were | ||
324 | * never used. | ||
325 | */ | ||
326 | void return_unused_surplus_pages(unsigned long unused_resv_pages) | ||
327 | { | ||
328 | static int nid = -1; | ||
329 | struct page *page; | ||
330 | unsigned long nr_pages; | ||
331 | |||
332 | nr_pages = min(unused_resv_pages, surplus_huge_pages); | ||
333 | |||
334 | while (nr_pages) { | ||
335 | nid = next_node(nid, node_online_map); | ||
336 | if (nid == MAX_NUMNODES) | ||
337 | nid = first_node(node_online_map); | ||
338 | |||
339 | if (!surplus_huge_pages_node[nid]) | ||
340 | continue; | ||
341 | |||
342 | if (!list_empty(&hugepage_freelists[nid])) { | ||
343 | page = list_entry(hugepage_freelists[nid].next, | ||
344 | struct page, lru); | ||
345 | list_del(&page->lru); | ||
346 | update_and_free_page(page); | ||
347 | free_huge_pages--; | ||
348 | free_huge_pages_node[nid]--; | ||
349 | surplus_huge_pages--; | ||
350 | surplus_huge_pages_node[nid]--; | ||
351 | nr_pages--; | ||
352 | } | ||
353 | } | ||
136 | } | 354 | } |
137 | 355 | ||
138 | static struct page *alloc_huge_page(struct vm_area_struct *vma, | 356 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
139 | unsigned long addr) | 357 | unsigned long addr) |
140 | { | 358 | { |
141 | struct page *page; | 359 | struct page *page = NULL; |
360 | int use_reserved_page = vma->vm_flags & VM_MAYSHARE; | ||
142 | 361 | ||
143 | spin_lock(&hugetlb_lock); | 362 | spin_lock(&hugetlb_lock); |
144 | if (vma->vm_flags & VM_MAYSHARE) | 363 | if (!use_reserved_page && (free_huge_pages <= resv_huge_pages)) |
145 | resv_huge_pages--; | ||
146 | else if (free_huge_pages <= resv_huge_pages) | ||
147 | goto fail; | 364 | goto fail; |
148 | 365 | ||
149 | page = dequeue_huge_page(vma, addr); | 366 | page = dequeue_huge_page(vma, addr); |
@@ -155,10 +372,17 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
155 | return page; | 372 | return page; |
156 | 373 | ||
157 | fail: | 374 | fail: |
158 | if (vma->vm_flags & VM_MAYSHARE) | ||
159 | resv_huge_pages++; | ||
160 | spin_unlock(&hugetlb_lock); | 375 | spin_unlock(&hugetlb_lock); |
161 | return NULL; | 376 | |
377 | /* | ||
378 | * Private mappings do not use reserved huge pages so the allocation | ||
379 | * may have failed due to an undersized hugetlb pool. Try to grab a | ||
380 | * surplus huge page from the buddy allocator. | ||
381 | */ | ||
382 | if (!use_reserved_page) | ||
383 | page = alloc_buddy_huge_page(vma, addr); | ||
384 | |||
385 | return page; | ||
162 | } | 386 | } |
163 | 387 | ||
164 | static int __init hugetlb_init(void) | 388 | static int __init hugetlb_init(void) |
@@ -171,6 +395,8 @@ static int __init hugetlb_init(void) | |||
171 | for (i = 0; i < MAX_NUMNODES; ++i) | 395 | for (i = 0; i < MAX_NUMNODES; ++i) |
172 | INIT_LIST_HEAD(&hugepage_freelists[i]); | 396 | INIT_LIST_HEAD(&hugepage_freelists[i]); |
173 | 397 | ||
398 | hugetlb_next_nid = first_node(node_online_map); | ||
399 | |||
174 | for (i = 0; i < max_huge_pages; ++i) { | 400 | for (i = 0; i < max_huge_pages; ++i) { |
175 | if (!alloc_fresh_huge_page()) | 401 | if (!alloc_fresh_huge_page()) |
176 | break; | 402 | break; |
@@ -201,21 +427,6 @@ static unsigned int cpuset_mems_nr(unsigned int *array) | |||
201 | } | 427 | } |
202 | 428 | ||
203 | #ifdef CONFIG_SYSCTL | 429 | #ifdef CONFIG_SYSCTL |
204 | static void update_and_free_page(struct page *page) | ||
205 | { | ||
206 | int i; | ||
207 | nr_huge_pages--; | ||
208 | nr_huge_pages_node[page_to_nid(page)]--; | ||
209 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { | ||
210 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | ||
211 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | ||
212 | 1 << PG_private | 1<< PG_writeback); | ||
213 | } | ||
214 | set_compound_page_dtor(page, NULL); | ||
215 | set_page_refcounted(page); | ||
216 | __free_pages(page, HUGETLB_PAGE_ORDER); | ||
217 | } | ||
218 | |||
219 | #ifdef CONFIG_HIGHMEM | 430 | #ifdef CONFIG_HIGHMEM |
220 | static void try_to_free_low(unsigned long count) | 431 | static void try_to_free_low(unsigned long count) |
221 | { | 432 | { |
@@ -224,14 +435,14 @@ static void try_to_free_low(unsigned long count) | |||
224 | for (i = 0; i < MAX_NUMNODES; ++i) { | 435 | for (i = 0; i < MAX_NUMNODES; ++i) { |
225 | struct page *page, *next; | 436 | struct page *page, *next; |
226 | list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { | 437 | list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { |
438 | if (count >= nr_huge_pages) | ||
439 | return; | ||
227 | if (PageHighMem(page)) | 440 | if (PageHighMem(page)) |
228 | continue; | 441 | continue; |
229 | list_del(&page->lru); | 442 | list_del(&page->lru); |
230 | update_and_free_page(page); | 443 | update_and_free_page(page); |
231 | free_huge_pages--; | 444 | free_huge_pages--; |
232 | free_huge_pages_node[page_to_nid(page)]--; | 445 | free_huge_pages_node[page_to_nid(page)]--; |
233 | if (count >= nr_huge_pages) | ||
234 | return; | ||
235 | } | 446 | } |
236 | } | 447 | } |
237 | } | 448 | } |
@@ -241,26 +452,61 @@ static inline void try_to_free_low(unsigned long count) | |||
241 | } | 452 | } |
242 | #endif | 453 | #endif |
243 | 454 | ||
455 | #define persistent_huge_pages (nr_huge_pages - surplus_huge_pages) | ||
244 | static unsigned long set_max_huge_pages(unsigned long count) | 456 | static unsigned long set_max_huge_pages(unsigned long count) |
245 | { | 457 | { |
246 | while (count > nr_huge_pages) { | 458 | unsigned long min_count, ret; |
247 | if (!alloc_fresh_huge_page()) | ||
248 | return nr_huge_pages; | ||
249 | } | ||
250 | if (count >= nr_huge_pages) | ||
251 | return nr_huge_pages; | ||
252 | 459 | ||
460 | /* | ||
461 | * Increase the pool size | ||
462 | * First take pages out of surplus state. Then make up the | ||
463 | * remaining difference by allocating fresh huge pages. | ||
464 | */ | ||
253 | spin_lock(&hugetlb_lock); | 465 | spin_lock(&hugetlb_lock); |
254 | count = max(count, resv_huge_pages); | 466 | while (surplus_huge_pages && count > persistent_huge_pages) { |
255 | try_to_free_low(count); | 467 | if (!adjust_pool_surplus(-1)) |
256 | while (count < nr_huge_pages) { | 468 | break; |
469 | } | ||
470 | |||
471 | while (count > persistent_huge_pages) { | ||
472 | int ret; | ||
473 | /* | ||
474 | * If this allocation races such that we no longer need the | ||
475 | * page, free_huge_page will handle it by freeing the page | ||
476 | * and reducing the surplus. | ||
477 | */ | ||
478 | spin_unlock(&hugetlb_lock); | ||
479 | ret = alloc_fresh_huge_page(); | ||
480 | spin_lock(&hugetlb_lock); | ||
481 | if (!ret) | ||
482 | goto out; | ||
483 | |||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Decrease the pool size | ||
488 | * First return free pages to the buddy allocator (being careful | ||
489 | * to keep enough around to satisfy reservations). Then place | ||
490 | * pages into surplus state as needed so the pool will shrink | ||
491 | * to the desired size as pages become free. | ||
492 | */ | ||
493 | min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; | ||
494 | min_count = max(count, min_count); | ||
495 | try_to_free_low(min_count); | ||
496 | while (min_count < persistent_huge_pages) { | ||
257 | struct page *page = dequeue_huge_page(NULL, 0); | 497 | struct page *page = dequeue_huge_page(NULL, 0); |
258 | if (!page) | 498 | if (!page) |
259 | break; | 499 | break; |
260 | update_and_free_page(page); | 500 | update_and_free_page(page); |
261 | } | 501 | } |
502 | while (count < persistent_huge_pages) { | ||
503 | if (!adjust_pool_surplus(1)) | ||
504 | break; | ||
505 | } | ||
506 | out: | ||
507 | ret = persistent_huge_pages; | ||
262 | spin_unlock(&hugetlb_lock); | 508 | spin_unlock(&hugetlb_lock); |
263 | return nr_huge_pages; | 509 | return ret; |
264 | } | 510 | } |
265 | 511 | ||
266 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 512 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, |
@@ -292,10 +538,12 @@ int hugetlb_report_meminfo(char *buf) | |||
292 | "HugePages_Total: %5lu\n" | 538 | "HugePages_Total: %5lu\n" |
293 | "HugePages_Free: %5lu\n" | 539 | "HugePages_Free: %5lu\n" |
294 | "HugePages_Rsvd: %5lu\n" | 540 | "HugePages_Rsvd: %5lu\n" |
541 | "HugePages_Surp: %5lu\n" | ||
295 | "Hugepagesize: %5lu kB\n", | 542 | "Hugepagesize: %5lu kB\n", |
296 | nr_huge_pages, | 543 | nr_huge_pages, |
297 | free_huge_pages, | 544 | free_huge_pages, |
298 | resv_huge_pages, | 545 | resv_huge_pages, |
546 | surplus_huge_pages, | ||
299 | HPAGE_SIZE/1024); | 547 | HPAGE_SIZE/1024); |
300 | } | 548 | } |
301 | 549 | ||
@@ -355,7 +603,6 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, | |||
355 | entry = pte_mkwrite(pte_mkdirty(*ptep)); | 603 | entry = pte_mkwrite(pte_mkdirty(*ptep)); |
356 | if (ptep_set_access_flags(vma, address, ptep, entry, 1)) { | 604 | if (ptep_set_access_flags(vma, address, ptep, entry, 1)) { |
357 | update_mmu_cache(vma, address, entry); | 605 | update_mmu_cache(vma, address, entry); |
358 | lazy_mmu_prot_update(entry); | ||
359 | } | 606 | } |
360 | } | 607 | } |
361 | 608 | ||
@@ -708,7 +955,6 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
708 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 955 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
709 | pte = pte_mkhuge(pte_modify(pte, newprot)); | 956 | pte = pte_mkhuge(pte_modify(pte, newprot)); |
710 | set_huge_pte_at(mm, address, ptep, pte); | 957 | set_huge_pte_at(mm, address, ptep, pte); |
711 | lazy_mmu_prot_update(pte); | ||
712 | } | 958 | } |
713 | } | 959 | } |
714 | spin_unlock(&mm->page_table_lock); | 960 | spin_unlock(&mm->page_table_lock); |
@@ -843,21 +1089,6 @@ static int hugetlb_acct_memory(long delta) | |||
843 | int ret = -ENOMEM; | 1089 | int ret = -ENOMEM; |
844 | 1090 | ||
845 | spin_lock(&hugetlb_lock); | 1091 | spin_lock(&hugetlb_lock); |
846 | if ((delta + resv_huge_pages) <= free_huge_pages) { | ||
847 | resv_huge_pages += delta; | ||
848 | ret = 0; | ||
849 | } | ||
850 | spin_unlock(&hugetlb_lock); | ||
851 | return ret; | ||
852 | } | ||
853 | |||
854 | int hugetlb_reserve_pages(struct inode *inode, long from, long to) | ||
855 | { | ||
856 | long ret, chg; | ||
857 | |||
858 | chg = region_chg(&inode->i_mapping->private_list, from, to); | ||
859 | if (chg < 0) | ||
860 | return chg; | ||
861 | /* | 1092 | /* |
862 | * When cpuset is configured, it breaks the strict hugetlb page | 1093 | * When cpuset is configured, it breaks the strict hugetlb page |
863 | * reservation as the accounting is done on a global variable. Such | 1094 | * reservation as the accounting is done on a global variable. Such |
@@ -875,8 +1106,31 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) | |||
875 | * a best attempt and hopefully to minimize the impact of changing | 1106 | * a best attempt and hopefully to minimize the impact of changing |
876 | * semantics that cpuset has. | 1107 | * semantics that cpuset has. |
877 | */ | 1108 | */ |
878 | if (chg > cpuset_mems_nr(free_huge_pages_node)) | 1109 | if (delta > 0) { |
879 | return -ENOMEM; | 1110 | if (gather_surplus_pages(delta) < 0) |
1111 | goto out; | ||
1112 | |||
1113 | if (delta > cpuset_mems_nr(free_huge_pages_node)) | ||
1114 | goto out; | ||
1115 | } | ||
1116 | |||
1117 | ret = 0; | ||
1118 | resv_huge_pages += delta; | ||
1119 | if (delta < 0) | ||
1120 | return_unused_surplus_pages((unsigned long) -delta); | ||
1121 | |||
1122 | out: | ||
1123 | spin_unlock(&hugetlb_lock); | ||
1124 | return ret; | ||
1125 | } | ||
1126 | |||
1127 | int hugetlb_reserve_pages(struct inode *inode, long from, long to) | ||
1128 | { | ||
1129 | long ret, chg; | ||
1130 | |||
1131 | chg = region_chg(&inode->i_mapping->private_list, from, to); | ||
1132 | if (chg < 0) | ||
1133 | return chg; | ||
880 | 1134 | ||
881 | ret = hugetlb_acct_memory(chg); | 1135 | ret = hugetlb_acct_memory(chg); |
882 | if (ret < 0) | 1136 | if (ret < 0) |