summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2017-02-24 17:56:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:54 -0500
commit066b23935578d3913c2df9bed7addbcdf4711f1a (patch)
tree65647ee31931bec7dc37262d2403953011e38234 /mm/page_alloc.c
parentc55e8d035b28b2867e68b0e2d0eee2c0f1016b43 (diff)
mm, page_alloc: split buffered_rmqueue()
Patch series "Use per-cpu allocator for !irq requests and prepare for a bulk allocator", v5. This series is motivated by a conversation led by Jesper Dangaard Brouer at the last LSF/MM proposing a generic page pool for DMA-coherent pages. Part of his motivation was due to the overhead of allocating multiple order-0 that led some drivers to use high-order allocations and splitting them. This is very slow in some cases. The first two patches in this series restructure the page allocator such that it is relatively easy to introduce an order-0 bulk page allocator. A patch exists to do that and has been handed over to Jesper until an in-kernel users is created. The third patch prevents the per-cpu allocator being drained from IPI context as that can potentially corrupt the list after patch four is merged. The final patch alters the per-cpu alloctor to make it exclusive to !irq requests. This cuts allocation/free overhead by roughly 30%. Performance tests from both Jesper and me are included in the patch. This patch (of 4): buffered_rmqueue removes a page from a given zone and uses the per-cpu list for order-0. This is fine but a hypothetical caller that wanted multiple order-0 pages has to disable/reenable interrupts multiple times. This patch structures buffere_rmqueue such that it's relatively easy to build a bulk order-0 page allocator. There is no functional change. [mgorman@techsingularity.net: failed per-cpu refill may blow up] Link: http://lkml.kernel.org/r/20170124112723.mshmgwq2ihxku2um@techsingularity.net Link: http://lkml.kernel.org/r/20170123153906.3122-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c128
1 files changed, 79 insertions, 49 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c21b33668133..284153d3e0fc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2600,74 +2600,104 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
2600#endif 2600#endif
2601} 2601}
2602 2602
2603/* Remove page from the per-cpu list, caller must protect the list */
2604static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
2605 bool cold, struct per_cpu_pages *pcp,
2606 struct list_head *list)
2607{
2608 struct page *page;
2609
2610 do {
2611 if (list_empty(list)) {
2612 pcp->count += rmqueue_bulk(zone, 0,
2613 pcp->batch, list,
2614 migratetype, cold);
2615 if (unlikely(list_empty(list)))
2616 return NULL;
2617 }
2618
2619 if (cold)
2620 page = list_last_entry(list, struct page, lru);
2621 else
2622 page = list_first_entry(list, struct page, lru);
2623
2624 list_del(&page->lru);
2625 pcp->count--;
2626 } while (check_new_pcp(page));
2627
2628 return page;
2629}
2630
2631/* Lock and remove page from the per-cpu list */
2632static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2633 struct zone *zone, unsigned int order,
2634 gfp_t gfp_flags, int migratetype)
2635{
2636 struct per_cpu_pages *pcp;
2637 struct list_head *list;
2638 bool cold = ((gfp_flags & __GFP_COLD) != 0);
2639 struct page *page;
2640 unsigned long flags;
2641
2642 local_irq_save(flags);
2643 pcp = &this_cpu_ptr(zone->pageset)->pcp;
2644 list = &pcp->lists[migratetype];
2645 page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
2646 if (page) {
2647 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2648 zone_statistics(preferred_zone, zone);
2649 }
2650 local_irq_restore(flags);
2651 return page;
2652}
2653
2603/* 2654/*
2604 * Allocate a page from the given zone. Use pcplists for order-0 allocations. 2655 * Allocate a page from the given zone. Use pcplists for order-0 allocations.
2605 */ 2656 */
2606static inline 2657static inline
2607struct page *buffered_rmqueue(struct zone *preferred_zone, 2658struct page *rmqueue(struct zone *preferred_zone,
2608 struct zone *zone, unsigned int order, 2659 struct zone *zone, unsigned int order,
2609 gfp_t gfp_flags, unsigned int alloc_flags, 2660 gfp_t gfp_flags, unsigned int alloc_flags,
2610 int migratetype) 2661 int migratetype)
2611{ 2662{
2612 unsigned long flags; 2663 unsigned long flags;
2613 struct page *page; 2664 struct page *page;
2614 bool cold = ((gfp_flags & __GFP_COLD) != 0);
2615 2665
2616 if (likely(order == 0)) { 2666 if (likely(order == 0)) {
2617 struct per_cpu_pages *pcp; 2667 page = rmqueue_pcplist(preferred_zone, zone, order,
2618 struct list_head *list; 2668 gfp_flags, migratetype);
2619 2669 goto out;
2620 local_irq_save(flags); 2670 }
2621 do {
2622 pcp = &this_cpu_ptr(zone->pageset)->pcp;
2623 list = &pcp->lists[migratetype];
2624 if (list_empty(list)) {
2625 pcp->count += rmqueue_bulk(zone, 0,
2626 pcp->batch, list,
2627 migratetype, cold);
2628 if (unlikely(list_empty(list)))
2629 goto failed;
2630 }
2631
2632 if (cold)
2633 page = list_last_entry(list, struct page, lru);
2634 else
2635 page = list_first_entry(list, struct page, lru);
2636
2637 list_del(&page->lru);
2638 pcp->count--;
2639 2671
2640 } while (check_new_pcp(page)); 2672 /*
2641 } else { 2673 * We most definitely don't want callers attempting to
2642 /* 2674 * allocate greater than order-1 page units with __GFP_NOFAIL.
2643 * We most definitely don't want callers attempting to 2675 */
2644 * allocate greater than order-1 page units with __GFP_NOFAIL. 2676 WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
2645 */ 2677 spin_lock_irqsave(&zone->lock, flags);
2646 WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
2647 spin_lock_irqsave(&zone->lock, flags);
2648 2678
2649 do { 2679 do {
2650 page = NULL; 2680 page = NULL;
2651 if (alloc_flags & ALLOC_HARDER) { 2681 if (alloc_flags & ALLOC_HARDER) {
2652 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); 2682 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
2653 if (page) 2683 if (page)
2654 trace_mm_page_alloc_zone_locked(page, order, migratetype); 2684 trace_mm_page_alloc_zone_locked(page, order, migratetype);
2655 } 2685 }
2656 if (!page)
2657 page = __rmqueue(zone, order, migratetype);
2658 } while (page && check_new_pages(page, order));
2659 spin_unlock(&zone->lock);
2660 if (!page) 2686 if (!page)
2661 goto failed; 2687 page = __rmqueue(zone, order, migratetype);
2662 __mod_zone_freepage_state(zone, -(1 << order), 2688 } while (page && check_new_pages(page, order));
2663 get_pcppage_migratetype(page)); 2689 spin_unlock(&zone->lock);
2664 } 2690 if (!page)
2691 goto failed;
2692 __mod_zone_freepage_state(zone, -(1 << order),
2693 get_pcppage_migratetype(page));
2665 2694
2666 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); 2695 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2667 zone_statistics(preferred_zone, zone); 2696 zone_statistics(preferred_zone, zone);
2668 local_irq_restore(flags); 2697 local_irq_restore(flags);
2669 2698
2670 VM_BUG_ON_PAGE(bad_range(zone, page), page); 2699out:
2700 VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
2671 return page; 2701 return page;
2672 2702
2673failed: 2703failed:
@@ -2972,7 +3002,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
2972 } 3002 }
2973 3003
2974try_this_zone: 3004try_this_zone:
2975 page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order, 3005 page = rmqueue(ac->preferred_zoneref->zone, zone, order,
2976 gfp_mask, alloc_flags, ac->migratetype); 3006 gfp_mask, alloc_flags, ac->migratetype);
2977 if (page) { 3007 if (page) {
2978 prep_new_page(page, order, gfp_mask, alloc_flags); 3008 prep_new_page(page, order, gfp_mask, alloc_flags);