diff options
author | Mel Gorman <mel@csn.ul.ie> | 2007-10-16 04:25:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:59 -0400 |
commit | b2a0ac8875a0a3b9f0739b60526f8c5977d2200f (patch) | |
tree | 31826716b3209751a5468b840ff14190b4a5a8a2 /mm | |
parent | 835c134ec4dd755e5c4470af566db226d1e96742 (diff) |
Split the free lists for movable and unmovable allocations
This patch adds the core of the fragmentation reduction strategy. It works by
grouping pages together based on their ability to migrate or be reclaimed.
Basically, it works by breaking the list in zone->free_area list into
MIGRATE_TYPES number of lists.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 141 |
1 files changed, 117 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 332f46d70d2d..d54ecf41b44c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -158,6 +158,22 @@ int nr_node_ids __read_mostly = MAX_NUMNODES; | |||
158 | EXPORT_SYMBOL(nr_node_ids); | 158 | EXPORT_SYMBOL(nr_node_ids); |
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | static inline int get_pageblock_migratetype(struct page *page) | ||
162 | { | ||
163 | return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); | ||
164 | } | ||
165 | |||
166 | static void set_pageblock_migratetype(struct page *page, int migratetype) | ||
167 | { | ||
168 | set_pageblock_flags_group(page, (unsigned long)migratetype, | ||
169 | PB_migrate, PB_migrate_end); | ||
170 | } | ||
171 | |||
172 | static inline int gfpflags_to_migratetype(gfp_t gfp_flags) | ||
173 | { | ||
174 | return ((gfp_flags & __GFP_MOVABLE) != 0); | ||
175 | } | ||
176 | |||
161 | #ifdef CONFIG_DEBUG_VM | 177 | #ifdef CONFIG_DEBUG_VM |
162 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 178 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
163 | { | 179 | { |
@@ -412,6 +428,7 @@ static inline void __free_one_page(struct page *page, | |||
412 | { | 428 | { |
413 | unsigned long page_idx; | 429 | unsigned long page_idx; |
414 | int order_size = 1 << order; | 430 | int order_size = 1 << order; |
431 | int migratetype = get_pageblock_migratetype(page); | ||
415 | 432 | ||
416 | if (unlikely(PageCompound(page))) | 433 | if (unlikely(PageCompound(page))) |
417 | destroy_compound_page(page, order); | 434 | destroy_compound_page(page, order); |
@@ -424,7 +441,6 @@ static inline void __free_one_page(struct page *page, | |||
424 | __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); | 441 | __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); |
425 | while (order < MAX_ORDER-1) { | 442 | while (order < MAX_ORDER-1) { |
426 | unsigned long combined_idx; | 443 | unsigned long combined_idx; |
427 | struct free_area *area; | ||
428 | struct page *buddy; | 444 | struct page *buddy; |
429 | 445 | ||
430 | buddy = __page_find_buddy(page, page_idx, order); | 446 | buddy = __page_find_buddy(page, page_idx, order); |
@@ -432,8 +448,7 @@ static inline void __free_one_page(struct page *page, | |||
432 | break; /* Move the buddy up one level. */ | 448 | break; /* Move the buddy up one level. */ |
433 | 449 | ||
434 | list_del(&buddy->lru); | 450 | list_del(&buddy->lru); |
435 | area = zone->free_area + order; | 451 | zone->free_area[order].nr_free--; |
436 | area->nr_free--; | ||
437 | rmv_page_order(buddy); | 452 | rmv_page_order(buddy); |
438 | combined_idx = __find_combined_index(page_idx, order); | 453 | combined_idx = __find_combined_index(page_idx, order); |
439 | page = page + (combined_idx - page_idx); | 454 | page = page + (combined_idx - page_idx); |
@@ -441,7 +456,8 @@ static inline void __free_one_page(struct page *page, | |||
441 | order++; | 456 | order++; |
442 | } | 457 | } |
443 | set_page_order(page, order); | 458 | set_page_order(page, order); |
444 | list_add(&page->lru, &zone->free_area[order].free_list); | 459 | list_add(&page->lru, |
460 | &zone->free_area[order].free_list[migratetype]); | ||
445 | zone->free_area[order].nr_free++; | 461 | zone->free_area[order].nr_free++; |
446 | } | 462 | } |
447 | 463 | ||
@@ -575,7 +591,8 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | |||
575 | * -- wli | 591 | * -- wli |
576 | */ | 592 | */ |
577 | static inline void expand(struct zone *zone, struct page *page, | 593 | static inline void expand(struct zone *zone, struct page *page, |
578 | int low, int high, struct free_area *area) | 594 | int low, int high, struct free_area *area, |
595 | int migratetype) | ||
579 | { | 596 | { |
580 | unsigned long size = 1 << high; | 597 | unsigned long size = 1 << high; |
581 | 598 | ||
@@ -584,7 +601,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
584 | high--; | 601 | high--; |
585 | size >>= 1; | 602 | size >>= 1; |
586 | VM_BUG_ON(bad_range(zone, &page[size])); | 603 | VM_BUG_ON(bad_range(zone, &page[size])); |
587 | list_add(&page[size].lru, &area->free_list); | 604 | list_add(&page[size].lru, &area->free_list[migratetype]); |
588 | area->nr_free++; | 605 | area->nr_free++; |
589 | set_page_order(&page[size], high); | 606 | set_page_order(&page[size], high); |
590 | } | 607 | } |
@@ -636,31 +653,95 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
636 | return 0; | 653 | return 0; |
637 | } | 654 | } |
638 | 655 | ||
656 | /* | ||
657 | * This array describes the order lists are fallen back to when | ||
658 | * the free lists for the desirable migrate type are depleted | ||
659 | */ | ||
660 | static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { | ||
661 | [MIGRATE_UNMOVABLE] = { MIGRATE_MOVABLE }, | ||
662 | [MIGRATE_MOVABLE] = { MIGRATE_UNMOVABLE }, | ||
663 | }; | ||
664 | |||
665 | /* Remove an element from the buddy allocator from the fallback list */ | ||
666 | static struct page *__rmqueue_fallback(struct zone *zone, int order, | ||
667 | int start_migratetype) | ||
668 | { | ||
669 | struct free_area * area; | ||
670 | int current_order; | ||
671 | struct page *page; | ||
672 | int migratetype, i; | ||
673 | |||
674 | /* Find the largest possible block of pages in the other list */ | ||
675 | for (current_order = MAX_ORDER-1; current_order >= order; | ||
676 | --current_order) { | ||
677 | for (i = 0; i < MIGRATE_TYPES - 1; i++) { | ||
678 | migratetype = fallbacks[start_migratetype][i]; | ||
679 | |||
680 | area = &(zone->free_area[current_order]); | ||
681 | if (list_empty(&area->free_list[migratetype])) | ||
682 | continue; | ||
683 | |||
684 | page = list_entry(area->free_list[migratetype].next, | ||
685 | struct page, lru); | ||
686 | area->nr_free--; | ||
687 | |||
688 | /* | ||
689 | * If breaking a large block of pages, place the buddies | ||
690 | * on the preferred allocation list | ||
691 | */ | ||
692 | if (unlikely(current_order >= MAX_ORDER / 2)) | ||
693 | migratetype = start_migratetype; | ||
694 | |||
695 | /* Remove the page from the freelists */ | ||
696 | list_del(&page->lru); | ||
697 | rmv_page_order(page); | ||
698 | __mod_zone_page_state(zone, NR_FREE_PAGES, | ||
699 | -(1UL << order)); | ||
700 | |||
701 | if (current_order == MAX_ORDER - 1) | ||
702 | set_pageblock_migratetype(page, | ||
703 | start_migratetype); | ||
704 | |||
705 | expand(zone, page, order, current_order, area, migratetype); | ||
706 | return page; | ||
707 | } | ||
708 | } | ||
709 | |||
710 | return NULL; | ||
711 | } | ||
712 | |||
639 | /* | 713 | /* |
640 | * Do the hard work of removing an element from the buddy allocator. | 714 | * Do the hard work of removing an element from the buddy allocator. |
641 | * Call me with the zone->lock already held. | 715 | * Call me with the zone->lock already held. |
642 | */ | 716 | */ |
643 | static struct page *__rmqueue(struct zone *zone, unsigned int order) | 717 | static struct page *__rmqueue(struct zone *zone, unsigned int order, |
718 | int migratetype) | ||
644 | { | 719 | { |
645 | struct free_area * area; | 720 | struct free_area * area; |
646 | unsigned int current_order; | 721 | unsigned int current_order; |
647 | struct page *page; | 722 | struct page *page; |
648 | 723 | ||
724 | /* Find a page of the appropriate size in the preferred list */ | ||
649 | for (current_order = order; current_order < MAX_ORDER; ++current_order) { | 725 | for (current_order = order; current_order < MAX_ORDER; ++current_order) { |
650 | area = zone->free_area + current_order; | 726 | area = &(zone->free_area[current_order]); |
651 | if (list_empty(&area->free_list)) | 727 | if (list_empty(&area->free_list[migratetype])) |
652 | continue; | 728 | continue; |
653 | 729 | ||
654 | page = list_entry(area->free_list.next, struct page, lru); | 730 | page = list_entry(area->free_list[migratetype].next, |
731 | struct page, lru); | ||
655 | list_del(&page->lru); | 732 | list_del(&page->lru); |
656 | rmv_page_order(page); | 733 | rmv_page_order(page); |
657 | area->nr_free--; | 734 | area->nr_free--; |
658 | __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order)); | 735 | __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order)); |
659 | expand(zone, page, order, current_order, area); | 736 | expand(zone, page, order, current_order, area, migratetype); |
660 | return page; | 737 | goto got_page; |
661 | } | 738 | } |
662 | 739 | ||
663 | return NULL; | 740 | page = __rmqueue_fallback(zone, order, migratetype); |
741 | |||
742 | got_page: | ||
743 | |||
744 | return page; | ||
664 | } | 745 | } |
665 | 746 | ||
666 | /* | 747 | /* |
@@ -669,13 +750,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) | |||
669 | * Returns the number of new pages which were placed at *list. | 750 | * Returns the number of new pages which were placed at *list. |
670 | */ | 751 | */ |
671 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 752 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
672 | unsigned long count, struct list_head *list) | 753 | unsigned long count, struct list_head *list, |
754 | int migratetype) | ||
673 | { | 755 | { |
674 | int i; | 756 | int i; |
675 | 757 | ||
676 | spin_lock(&zone->lock); | 758 | spin_lock(&zone->lock); |
677 | for (i = 0; i < count; ++i) { | 759 | for (i = 0; i < count; ++i) { |
678 | struct page *page = __rmqueue(zone, order); | 760 | struct page *page = __rmqueue(zone, order, migratetype); |
679 | if (unlikely(page == NULL)) | 761 | if (unlikely(page == NULL)) |
680 | break; | 762 | break; |
681 | list_add_tail(&page->lru, list); | 763 | list_add_tail(&page->lru, list); |
@@ -740,7 +822,7 @@ void mark_free_pages(struct zone *zone) | |||
740 | { | 822 | { |
741 | unsigned long pfn, max_zone_pfn; | 823 | unsigned long pfn, max_zone_pfn; |
742 | unsigned long flags; | 824 | unsigned long flags; |
743 | int order; | 825 | int order, t; |
744 | struct list_head *curr; | 826 | struct list_head *curr; |
745 | 827 | ||
746 | if (!zone->spanned_pages) | 828 | if (!zone->spanned_pages) |
@@ -757,15 +839,15 @@ void mark_free_pages(struct zone *zone) | |||
757 | swsusp_unset_page_free(page); | 839 | swsusp_unset_page_free(page); |
758 | } | 840 | } |
759 | 841 | ||
760 | for (order = MAX_ORDER - 1; order >= 0; --order) | 842 | for_each_migratetype_order(order, t) { |
761 | list_for_each(curr, &zone->free_area[order].free_list) { | 843 | list_for_each(curr, &zone->free_area[order].free_list[t]) { |
762 | unsigned long i; | 844 | unsigned long i; |
763 | 845 | ||
764 | pfn = page_to_pfn(list_entry(curr, struct page, lru)); | 846 | pfn = page_to_pfn(list_entry(curr, struct page, lru)); |
765 | for (i = 0; i < (1UL << order); i++) | 847 | for (i = 0; i < (1UL << order); i++) |
766 | swsusp_set_page_free(pfn_to_page(pfn + i)); | 848 | swsusp_set_page_free(pfn_to_page(pfn + i)); |
767 | } | 849 | } |
768 | 850 | } | |
769 | spin_unlock_irqrestore(&zone->lock, flags); | 851 | spin_unlock_irqrestore(&zone->lock, flags); |
770 | } | 852 | } |
771 | 853 | ||
@@ -854,6 +936,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist, | |||
854 | struct page *page; | 936 | struct page *page; |
855 | int cold = !!(gfp_flags & __GFP_COLD); | 937 | int cold = !!(gfp_flags & __GFP_COLD); |
856 | int cpu; | 938 | int cpu; |
939 | int migratetype = gfpflags_to_migratetype(gfp_flags); | ||
857 | 940 | ||
858 | again: | 941 | again: |
859 | cpu = get_cpu(); | 942 | cpu = get_cpu(); |
@@ -864,7 +947,7 @@ again: | |||
864 | local_irq_save(flags); | 947 | local_irq_save(flags); |
865 | if (!pcp->count) { | 948 | if (!pcp->count) { |
866 | pcp->count = rmqueue_bulk(zone, 0, | 949 | pcp->count = rmqueue_bulk(zone, 0, |
867 | pcp->batch, &pcp->list); | 950 | pcp->batch, &pcp->list, migratetype); |
868 | if (unlikely(!pcp->count)) | 951 | if (unlikely(!pcp->count)) |
869 | goto failed; | 952 | goto failed; |
870 | } | 953 | } |
@@ -873,7 +956,7 @@ again: | |||
873 | pcp->count--; | 956 | pcp->count--; |
874 | } else { | 957 | } else { |
875 | spin_lock_irqsave(&zone->lock, flags); | 958 | spin_lock_irqsave(&zone->lock, flags); |
876 | page = __rmqueue(zone, order); | 959 | page = __rmqueue(zone, order, migratetype); |
877 | spin_unlock(&zone->lock); | 960 | spin_unlock(&zone->lock); |
878 | if (!page) | 961 | if (!page) |
879 | goto failed; | 962 | goto failed; |
@@ -2233,6 +2316,16 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
2233 | init_page_count(page); | 2316 | init_page_count(page); |
2234 | reset_page_mapcount(page); | 2317 | reset_page_mapcount(page); |
2235 | SetPageReserved(page); | 2318 | SetPageReserved(page); |
2319 | |||
2320 | /* | ||
2321 | * Mark the block movable so that blocks are reserved for | ||
2322 | * movable at startup. This will force kernel allocations | ||
2323 | * to reserve their blocks rather than leaking throughout | ||
2324 | * the address space during boot when many long-lived | ||
2325 | * kernel allocations are made | ||
2326 | */ | ||
2327 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | ||
2328 | |||
2236 | INIT_LIST_HEAD(&page->lru); | 2329 | INIT_LIST_HEAD(&page->lru); |
2237 | #ifdef WANT_PAGE_VIRTUAL | 2330 | #ifdef WANT_PAGE_VIRTUAL |
2238 | /* The shift won't overflow because ZONE_NORMAL is below 4G. */ | 2331 | /* The shift won't overflow because ZONE_NORMAL is below 4G. */ |
@@ -2245,9 +2338,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
2245 | static void __meminit zone_init_free_lists(struct pglist_data *pgdat, | 2338 | static void __meminit zone_init_free_lists(struct pglist_data *pgdat, |
2246 | struct zone *zone, unsigned long size) | 2339 | struct zone *zone, unsigned long size) |
2247 | { | 2340 | { |
2248 | int order; | 2341 | int order, t; |
2249 | for (order = 0; order < MAX_ORDER ; order++) { | 2342 | for_each_migratetype_order(order, t) { |
2250 | INIT_LIST_HEAD(&zone->free_area[order].free_list); | 2343 | INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); |
2251 | zone->free_area[order].nr_free = 0; | 2344 | zone->free_area[order].nr_free = 0; |
2252 | } | 2345 | } |
2253 | } | 2346 | } |