aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c70
1 files changed, 57 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..97d6827c7d66 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
51EXPORT_SYMBOL(node_possible_map); 51EXPORT_SYMBOL(node_possible_map);
52unsigned long totalram_pages __read_mostly; 52unsigned long totalram_pages __read_mostly;
53unsigned long totalhigh_pages __read_mostly; 53unsigned long totalhigh_pages __read_mostly;
54unsigned long totalreserve_pages __read_mostly;
54long nr_swap_pages; 55long nr_swap_pages;
55int percpu_pagelist_fraction; 56int percpu_pagelist_fraction;
56 57
@@ -151,7 +152,8 @@ static void bad_page(struct page *page)
151 1 << PG_reclaim | 152 1 << PG_reclaim |
152 1 << PG_slab | 153 1 << PG_slab |
153 1 << PG_swapcache | 154 1 << PG_swapcache |
154 1 << PG_writeback ); 155 1 << PG_writeback |
156 1 << PG_buddy );
155 set_page_count(page, 0); 157 set_page_count(page, 0);
156 reset_page_mapcount(page); 158 reset_page_mapcount(page);
157 page->mapping = NULL; 159 page->mapping = NULL;
@@ -236,12 +238,12 @@ static inline unsigned long page_order(struct page *page) {
236 238
237static inline void set_page_order(struct page *page, int order) { 239static inline void set_page_order(struct page *page, int order) {
238 set_page_private(page, order); 240 set_page_private(page, order);
239 __SetPagePrivate(page); 241 __SetPageBuddy(page);
240} 242}
241 243
242static inline void rmv_page_order(struct page *page) 244static inline void rmv_page_order(struct page *page)
243{ 245{
244 __ClearPagePrivate(page); 246 __ClearPageBuddy(page);
245 set_page_private(page, 0); 247 set_page_private(page, 0);
246} 248}
247 249
@@ -280,11 +282,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
280 * This function checks whether a page is free && is the buddy 282 * This function checks whether a page is free && is the buddy
281 * we can do coalesce a page and its buddy if 283 * we can do coalesce a page and its buddy if
282 * (a) the buddy is not in a hole && 284 * (a) the buddy is not in a hole &&
283 * (b) the buddy is free && 285 * (b) the buddy is in the buddy system &&
284 * (c) the buddy is on the buddy system && 286 * (c) a page and its buddy have the same order.
285 * (d) a page and its buddy have the same order. 287 *
286 * for recording page's order, we use page_private(page) and PG_private. 288 * For recording whether a page is in the buddy system, we use PG_buddy.
289 * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
287 * 290 *
291 * For recording page's order, we use page_private(page).
288 */ 292 */
289static inline int page_is_buddy(struct page *page, int order) 293static inline int page_is_buddy(struct page *page, int order)
290{ 294{
@@ -293,10 +297,10 @@ static inline int page_is_buddy(struct page *page, int order)
293 return 0; 297 return 0;
294#endif 298#endif
295 299
296 if (PagePrivate(page) && 300 if (PageBuddy(page) && page_order(page) == order) {
297 (page_order(page) == order) && 301 BUG_ON(page_count(page) != 0);
298 page_count(page) == 0)
299 return 1; 302 return 1;
303 }
300 return 0; 304 return 0;
301} 305}
302 306
@@ -313,7 +317,7 @@ static inline int page_is_buddy(struct page *page, int order)
313 * as necessary, plus some accounting needed to play nicely with other 317 * as necessary, plus some accounting needed to play nicely with other
314 * parts of the VM system. 318 * parts of the VM system.
315 * At each level, we keep a list of pages, which are heads of continuous 319 * At each level, we keep a list of pages, which are heads of continuous
316 * free pages of length of (1 << order) and marked with PG_Private.Page's 320 * free pages of length of (1 << order) and marked with PG_buddy. Page's
317 * order is recorded in page_private(page) field. 321 * order is recorded in page_private(page) field.
318 * So when we are allocating or freeing one, we can derive the state of the 322 * So when we are allocating or freeing one, we can derive the state of the
319 * other. That is, if we allocate a small block, and both were 323 * other. That is, if we allocate a small block, and both were
@@ -376,7 +380,8 @@ static inline int free_pages_check(struct page *page)
376 1 << PG_slab | 380 1 << PG_slab |
377 1 << PG_swapcache | 381 1 << PG_swapcache |
378 1 << PG_writeback | 382 1 << PG_writeback |
379 1 << PG_reserved )))) 383 1 << PG_reserved |
384 1 << PG_buddy ))))
380 bad_page(page); 385 bad_page(page);
381 if (PageDirty(page)) 386 if (PageDirty(page))
382 __ClearPageDirty(page); 387 __ClearPageDirty(page);
@@ -524,7 +529,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
524 1 << PG_slab | 529 1 << PG_slab |
525 1 << PG_swapcache | 530 1 << PG_swapcache |
526 1 << PG_writeback | 531 1 << PG_writeback |
527 1 << PG_reserved )))) 532 1 << PG_reserved |
533 1 << PG_buddy ))))
528 bad_page(page); 534 bad_page(page);
529 535
530 /* 536 /*
@@ -2472,6 +2478,38 @@ void __init page_alloc_init(void)
2472} 2478}
2473 2479
2474/* 2480/*
2481 * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
2482 * or min_free_kbytes changes.
2483 */
2484static void calculate_totalreserve_pages(void)
2485{
2486 struct pglist_data *pgdat;
2487 unsigned long reserve_pages = 0;
2488 int i, j;
2489
2490 for_each_online_pgdat(pgdat) {
2491 for (i = 0; i < MAX_NR_ZONES; i++) {
2492 struct zone *zone = pgdat->node_zones + i;
2493 unsigned long max = 0;
2494
2495 /* Find valid and maximum lowmem_reserve in the zone */
2496 for (j = i; j < MAX_NR_ZONES; j++) {
2497 if (zone->lowmem_reserve[j] > max)
2498 max = zone->lowmem_reserve[j];
2499 }
2500
2501 /* we treat pages_high as reserved pages. */
2502 max += zone->pages_high;
2503
2504 if (max > zone->present_pages)
2505 max = zone->present_pages;
2506 reserve_pages += max;
2507 }
2508 }
2509 totalreserve_pages = reserve_pages;
2510}
2511
2512/*
2475 * setup_per_zone_lowmem_reserve - called whenever 2513 * setup_per_zone_lowmem_reserve - called whenever
2476 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone 2514 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
2477 * has a correct pages reserved value, so an adequate number of 2515 * has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
2502 } 2540 }
2503 } 2541 }
2504 } 2542 }
2543
2544 /* update totalreserve_pages */
2545 calculate_totalreserve_pages();
2505} 2546}
2506 2547
2507/* 2548/*
@@ -2556,6 +2597,9 @@ void setup_per_zone_pages_min(void)
2556 zone->pages_high = zone->pages_min + tmp / 2; 2597 zone->pages_high = zone->pages_min + tmp / 2;
2557 spin_unlock_irqrestore(&zone->lru_lock, flags); 2598 spin_unlock_irqrestore(&zone->lru_lock, flags);
2558 } 2599 }
2600
2601 /* update totalreserve_pages */
2602 calculate_totalreserve_pages();
2559} 2603}
2560 2604
2561/* 2605/*