diff options
author | Mel Gorman <mel@csn.ul.ie> | 2007-10-16 04:26:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:43:00 -0400 |
commit | d9c2340052278d8eb2ffb16b0484f8f794def4de (patch) | |
tree | aec7e4e11473a4fcdfd389c718544780a042c6df /mm/page_alloc.c | |
parent | d100313fd615cc30374ff92e0b3facb053838330 (diff) |
Do not depend on MAX_ORDER when grouping pages by mobility
Currently mobility grouping works at the MAX_ORDER_NR_PAGES level. This makes
sense for the majority of users where this is also the huge page size.
However, on platforms like ia64 where the huge page size is runtime
configurable it is desirable to group at a lower order. On x86_64 and
occasionally on x86, the hugepage size may not always be MAX_ORDER_NR_PAGES.
This patch groups pages together based on the value of HUGETLB_PAGE_ORDER. It
uses a compile-time constant if possible and a variable where the huge page
size is runtime configurable.
It is assumed that grouping should be done at the lowest sensible order and
that the user would not want to override this. If this is not true,
page_block order could be forced to a variable initialised via a boot-time
kernel parameter.
One potential issue with this patch is that IA64 now parses hugepagesz with
early_param() instead of __setup(). __setup() is called after the memory
allocator has been initialised and the pageblock bitmaps already setup. In
tests on one IA64 there did not seem to be any problem with using
early_param() and in fact may be more correct as it guarantees the parameter
is handled before the parsing of hugepages=.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 67 |
1 files changed, 47 insertions, 20 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 942498fba942..b9bc7369c48d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -67,6 +67,10 @@ unsigned long totalreserve_pages __read_mostly; | |||
67 | long nr_swap_pages; | 67 | long nr_swap_pages; |
68 | int percpu_pagelist_fraction; | 68 | int percpu_pagelist_fraction; |
69 | 69 | ||
70 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | ||
71 | int pageblock_order __read_mostly; | ||
72 | #endif | ||
73 | |||
70 | static void __free_pages_ok(struct page *page, unsigned int order); | 74 | static void __free_pages_ok(struct page *page, unsigned int order); |
71 | 75 | ||
72 | /* | 76 | /* |
@@ -709,7 +713,7 @@ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { | |||
709 | 713 | ||
710 | /* | 714 | /* |
711 | * Move the free pages in a range to the free lists of the requested type. | 715 | * Move the free pages in a range to the free lists of the requested type. |
712 | * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES | 716 | * Note that start_page and end_pages are not aligned on a pageblock |
713 | * boundary. If alignment is required, use move_freepages_block() | 717 | * boundary. If alignment is required, use move_freepages_block() |
714 | */ | 718 | */ |
715 | int move_freepages(struct zone *zone, | 719 | int move_freepages(struct zone *zone, |
@@ -759,10 +763,10 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype) | |||
759 | struct page *start_page, *end_page; | 763 | struct page *start_page, *end_page; |
760 | 764 | ||
761 | start_pfn = page_to_pfn(page); | 765 | start_pfn = page_to_pfn(page); |
762 | start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1); | 766 | start_pfn = start_pfn & ~(pageblock_nr_pages-1); |
763 | start_page = pfn_to_page(start_pfn); | 767 | start_page = pfn_to_page(start_pfn); |
764 | end_page = start_page + MAX_ORDER_NR_PAGES - 1; | 768 | end_page = start_page + pageblock_nr_pages - 1; |
765 | end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1; | 769 | end_pfn = start_pfn + pageblock_nr_pages - 1; |
766 | 770 | ||
767 | /* Do not cross zone boundaries */ | 771 | /* Do not cross zone boundaries */ |
768 | if (start_pfn < zone->zone_start_pfn) | 772 | if (start_pfn < zone->zone_start_pfn) |
@@ -826,14 +830,14 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order, | |||
826 | * back for a reclaimable kernel allocation, be more | 830 | * back for a reclaimable kernel allocation, be more |
827 | * agressive about taking ownership of free pages | 831 | * agressive about taking ownership of free pages |
828 | */ | 832 | */ |
829 | if (unlikely(current_order >= MAX_ORDER / 2) || | 833 | if (unlikely(current_order >= (pageblock_order >> 1)) || |
830 | start_migratetype == MIGRATE_RECLAIMABLE) { | 834 | start_migratetype == MIGRATE_RECLAIMABLE) { |
831 | unsigned long pages; | 835 | unsigned long pages; |
832 | pages = move_freepages_block(zone, page, | 836 | pages = move_freepages_block(zone, page, |
833 | start_migratetype); | 837 | start_migratetype); |
834 | 838 | ||
835 | /* Claim the whole block if over half of it is free */ | 839 | /* Claim the whole block if over half of it is free */ |
836 | if (pages >= (1 << (MAX_ORDER-2))) | 840 | if (pages >= (1 << (pageblock_order-1))) |
837 | set_pageblock_migratetype(page, | 841 | set_pageblock_migratetype(page, |
838 | start_migratetype); | 842 | start_migratetype); |
839 | 843 | ||
@@ -846,7 +850,7 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order, | |||
846 | __mod_zone_page_state(zone, NR_FREE_PAGES, | 850 | __mod_zone_page_state(zone, NR_FREE_PAGES, |
847 | -(1UL << order)); | 851 | -(1UL << order)); |
848 | 852 | ||
849 | if (current_order == MAX_ORDER - 1) | 853 | if (current_order == pageblock_order) |
850 | set_pageblock_migratetype(page, | 854 | set_pageblock_migratetype(page, |
851 | start_migratetype); | 855 | start_migratetype); |
852 | 856 | ||
@@ -2385,7 +2389,7 @@ void build_all_zonelists(void) | |||
2385 | * made on memory-hotadd so a system can start with mobility | 2389 | * made on memory-hotadd so a system can start with mobility |
2386 | * disabled and enable it later | 2390 | * disabled and enable it later |
2387 | */ | 2391 | */ |
2388 | if (vm_total_pages < (MAX_ORDER_NR_PAGES * MIGRATE_TYPES)) | 2392 | if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES)) |
2389 | page_group_by_mobility_disabled = 1; | 2393 | page_group_by_mobility_disabled = 1; |
2390 | else | 2394 | else |
2391 | page_group_by_mobility_disabled = 0; | 2395 | page_group_by_mobility_disabled = 0; |
@@ -2470,7 +2474,7 @@ static inline unsigned long wait_table_bits(unsigned long size) | |||
2470 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) | 2474 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) |
2471 | 2475 | ||
2472 | /* | 2476 | /* |
2473 | * Mark a number of MAX_ORDER_NR_PAGES blocks as MIGRATE_RESERVE. The number | 2477 | * Mark a number of pageblocks as MIGRATE_RESERVE. The number |
2474 | * of blocks reserved is based on zone->pages_min. The memory within the | 2478 | * of blocks reserved is based on zone->pages_min. The memory within the |
2475 | * reserve will tend to store contiguous free pages. Setting min_free_kbytes | 2479 | * reserve will tend to store contiguous free pages. Setting min_free_kbytes |
2476 | * higher will lead to a bigger reserve which will get freed as contiguous | 2480 | * higher will lead to a bigger reserve which will get freed as contiguous |
@@ -2485,9 +2489,10 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2485 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 2489 | /* Get the start pfn, end pfn and the number of blocks to reserve */ |
2486 | start_pfn = zone->zone_start_pfn; | 2490 | start_pfn = zone->zone_start_pfn; |
2487 | end_pfn = start_pfn + zone->spanned_pages; | 2491 | end_pfn = start_pfn + zone->spanned_pages; |
2488 | reserve = roundup(zone->pages_min, MAX_ORDER_NR_PAGES) >> (MAX_ORDER-1); | 2492 | reserve = roundup(zone->pages_min, pageblock_nr_pages) >> |
2493 | pageblock_order; | ||
2489 | 2494 | ||
2490 | for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { | 2495 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
2491 | if (!pfn_valid(pfn)) | 2496 | if (!pfn_valid(pfn)) |
2492 | continue; | 2497 | continue; |
2493 | page = pfn_to_page(pfn); | 2498 | page = pfn_to_page(pfn); |
@@ -2562,7 +2567,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
2562 | * the start are marked MIGRATE_RESERVE by | 2567 | * the start are marked MIGRATE_RESERVE by |
2563 | * setup_zone_migrate_reserve() | 2568 | * setup_zone_migrate_reserve() |
2564 | */ | 2569 | */ |
2565 | if ((pfn & (MAX_ORDER_NR_PAGES-1))) | 2570 | if ((pfn & (pageblock_nr_pages-1))) |
2566 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 2571 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
2567 | 2572 | ||
2568 | INIT_LIST_HEAD(&page->lru); | 2573 | INIT_LIST_HEAD(&page->lru); |
@@ -3266,8 +3271,8 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | |||
3266 | #ifndef CONFIG_SPARSEMEM | 3271 | #ifndef CONFIG_SPARSEMEM |
3267 | /* | 3272 | /* |
3268 | * Calculate the size of the zone->blockflags rounded to an unsigned long | 3273 | * Calculate the size of the zone->blockflags rounded to an unsigned long |
3269 | * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up | 3274 | * Start by making sure zonesize is a multiple of pageblock_order by rounding |
3270 | * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally | 3275 | * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally |
3271 | * round what is now in bits to nearest long in bits, then return it in | 3276 | * round what is now in bits to nearest long in bits, then return it in |
3272 | * bytes. | 3277 | * bytes. |
3273 | */ | 3278 | */ |
@@ -3275,8 +3280,8 @@ static unsigned long __init usemap_size(unsigned long zonesize) | |||
3275 | { | 3280 | { |
3276 | unsigned long usemapsize; | 3281 | unsigned long usemapsize; |
3277 | 3282 | ||
3278 | usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES); | 3283 | usemapsize = roundup(zonesize, pageblock_nr_pages); |
3279 | usemapsize = usemapsize >> (MAX_ORDER-1); | 3284 | usemapsize = usemapsize >> pageblock_order; |
3280 | usemapsize *= NR_PAGEBLOCK_BITS; | 3285 | usemapsize *= NR_PAGEBLOCK_BITS; |
3281 | usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long)); | 3286 | usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long)); |
3282 | 3287 | ||
@@ -3298,6 +3303,27 @@ static void inline setup_usemap(struct pglist_data *pgdat, | |||
3298 | struct zone *zone, unsigned long zonesize) {} | 3303 | struct zone *zone, unsigned long zonesize) {} |
3299 | #endif /* CONFIG_SPARSEMEM */ | 3304 | #endif /* CONFIG_SPARSEMEM */ |
3300 | 3305 | ||
3306 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | ||
3307 | /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ | ||
3308 | static inline void __init set_pageblock_order(unsigned int order) | ||
3309 | { | ||
3310 | /* Check that pageblock_nr_pages has not already been setup */ | ||
3311 | if (pageblock_order) | ||
3312 | return; | ||
3313 | |||
3314 | /* | ||
3315 | * Assume the largest contiguous order of interest is a huge page. | ||
3316 | * This value may be variable depending on boot parameters on IA64 | ||
3317 | */ | ||
3318 | pageblock_order = order; | ||
3319 | } | ||
3320 | #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ | ||
3321 | |||
3322 | /* Defined this way to avoid accidently referencing HUGETLB_PAGE_ORDER */ | ||
3323 | #define set_pageblock_order(x) do {} while (0) | ||
3324 | |||
3325 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ | ||
3326 | |||
3301 | /* | 3327 | /* |
3302 | * Set up the zone data structures: | 3328 | * Set up the zone data structures: |
3303 | * - mark all pages reserved | 3329 | * - mark all pages reserved |
@@ -3378,6 +3404,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
3378 | if (!size) | 3404 | if (!size) |
3379 | continue; | 3405 | continue; |
3380 | 3406 | ||
3407 | set_pageblock_order(HUGETLB_PAGE_ORDER); | ||
3381 | setup_usemap(pgdat, zone, size); | 3408 | setup_usemap(pgdat, zone, size); |
3382 | ret = init_currently_empty_zone(zone, zone_start_pfn, | 3409 | ret = init_currently_empty_zone(zone, zone_start_pfn, |
3383 | size, MEMMAP_EARLY); | 3410 | size, MEMMAP_EARLY); |
@@ -4375,15 +4402,15 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) | |||
4375 | { | 4402 | { |
4376 | #ifdef CONFIG_SPARSEMEM | 4403 | #ifdef CONFIG_SPARSEMEM |
4377 | pfn &= (PAGES_PER_SECTION-1); | 4404 | pfn &= (PAGES_PER_SECTION-1); |
4378 | return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; | 4405 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
4379 | #else | 4406 | #else |
4380 | pfn = pfn - zone->zone_start_pfn; | 4407 | pfn = pfn - zone->zone_start_pfn; |
4381 | return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; | 4408 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
4382 | #endif /* CONFIG_SPARSEMEM */ | 4409 | #endif /* CONFIG_SPARSEMEM */ |
4383 | } | 4410 | } |
4384 | 4411 | ||
4385 | /** | 4412 | /** |
4386 | * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages | 4413 | * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages |
4387 | * @page: The page within the block of interest | 4414 | * @page: The page within the block of interest |
4388 | * @start_bitidx: The first bit of interest to retrieve | 4415 | * @start_bitidx: The first bit of interest to retrieve |
4389 | * @end_bitidx: The last bit of interest | 4416 | * @end_bitidx: The last bit of interest |
@@ -4411,7 +4438,7 @@ unsigned long get_pageblock_flags_group(struct page *page, | |||
4411 | } | 4438 | } |
4412 | 4439 | ||
4413 | /** | 4440 | /** |
4414 | * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages | 4441 | * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages |
4415 | * @page: The page within the block of interest | 4442 | * @page: The page within the block of interest |
4416 | * @start_bitidx: The first bit of interest | 4443 | * @start_bitidx: The first bit of interest |
4417 | * @end_bitidx: The last bit of interest | 4444 | * @end_bitidx: The last bit of interest |