mm, page_alloc: split buffered_rmqueue()

Patch series "Use per-cpu allocator for !irq requests and prepare for a bulk allocator", v5. This series is motivated by a conversation led by Jesper Dangaard Brouer at the last LSF/MM proposing a generic page pool for DMA-coherent pages. Part of his motivation was due to the overhead of allocating multiple order-0 that led some drivers to use high-order allocations and splitting them. This is very slow in some cases. The first two patches in this series restructure the page allocator such that it is relatively easy to introduce an order-0 bulk page allocator. A patch exists to do that and has been handed over to Jesper until an in-kernel users is created. The third patch prevents the per-cpu allocator being drained from IPI context as that can potentially corrupt the list after patch four is merged. The final patch alters the per-cpu alloctor to make it exclusive to !irq requests. This cuts allocation/free overhead by roughly 30%. Performance tests from both Jesper and me are included in the patch. This patch (of 4): buffered_rmqueue removes a page from a given zone and uses the per-cpu list for order-0. This is fine but a hypothetical caller that wanted multiple order-0 pages has to disable/reenable interrupts multiple times. This patch structures buffere_rmqueue such that it's relatively easy to build a bulk order-0 page allocator. There is no functional change. [mgorman@techsingularity.net: failed per-cpu refill may blow up] Link: http://lkml.kernel.org/r/20170124112723.mshmgwq2ihxku2um@techsingularity.net Link: http://lkml.kernel.org/r/20170123153906.3122-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@techsingularity.net> 2017-02-24 17:56:26 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-24 20:46:54 -0500
commit: 066b23935578d3913c2df9bed7addbcdf4711f1a (patch)
tree: 65647ee31931bec7dc37262d2403953011e38234 /mm/page_alloc.c
parent: c55e8d035b28b2867e68b0e2d0eee2c0f1016b43 (diff)
1 files changed, 79 insertions, 49 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c21b33668133..284153d3e0fc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2600,74 +2600,104 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 #endif
 }
+/* Remove page from the per-cpu list, caller must protect the list */
+static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+                        bool cold, struct per_cpu_pages *pcp,
+                        struct list_head *list)
+{
+        struct page *page;
+        do {
+                if (list_empty(list)) {
+                        pcp->count += rmqueue_bulk(zone, 0,
+                                        pcp->batch, list,
+                                        migratetype, cold);
+                        if (unlikely(list_empty(list)))
+                                return NULL;
+                }
+                if (cold)
+                        page = list_last_entry(list, struct page, lru);
+                else
+                        page = list_first_entry(list, struct page, lru);
+                list_del(&page->lru);
+                pcp->count--;
+        } while (check_new_pcp(page));
+        return page;
+}
+/* Lock and remove page from the per-cpu list */
+static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+                        struct zone *zone, unsigned int order,
+                        gfp_t gfp_flags, int migratetype)
+{
+        struct per_cpu_pages *pcp;
+        struct list_head *list;
+        bool cold = ((gfp_flags & __GFP_COLD) != 0);
+        struct page *page;
+        unsigned long flags;
+        local_irq_save(flags);
+        pcp = &this_cpu_ptr(zone->pageset)->pcp;
+        list = &pcp->lists[migratetype];
+        page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
+        if (page) {
+                __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+                zone_statistics(preferred_zone, zone);
+        }
+        local_irq_restore(flags);
+        return page;
+}
 /*
 * Allocate a page from the given zone. Use pcplists for order-0 allocations.
 */
 static inline
-struct page *buffered_rmqueue(struct zone *preferred_zone,
+struct page *rmqueue(struct zone *preferred_zone,
                        struct zone *zone, unsigned int order,
                        gfp_t gfp_flags, unsigned int alloc_flags,
                        int migratetype)
 {
        unsigned long flags;
        struct page *page;
-        bool cold = ((gfp_flags & __GFP_COLD) != 0);
        if (likely(order == 0)) {
-                struct per_cpu_pages *pcp;
+                page = rmqueue_pcplist(preferred_zone, zone, order,
-                struct list_head *list;
+                                gfp_flags, migratetype);
+                goto out;
-                local_irq_save(flags);
+        }
-                do {
-                        pcp = &this_cpu_ptr(zone->pageset)->pcp;
-                        list = &pcp->lists[migratetype];
-                        if (list_empty(list)) {
-                                pcp->count += rmqueue_bulk(zone, 0,
-                                                pcp->batch, list,
-                                                migratetype, cold);
-                                if (unlikely(list_empty(list)))
-                                        goto failed;
-                        }
-                        if (cold)
-                                page = list_last_entry(list, struct page, lru);
-                        else
-                                page = list_first_entry(list, struct page, lru);
-                        list_del(&page->lru);
-                        pcp->count--;
-                } while (check_new_pcp(page));
+        /*
-        } else {
+         * We most definitely don't want callers attempting to
-                /*
+         * allocate greater than order-1 page units with __GFP_NOFAIL.
-                 * We most definitely don't want callers attempting to
+         */
-                 * allocate greater than order-1 page units with __GFP_NOFAIL.
+        WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-                 */
+        spin_lock_irqsave(&zone->lock, flags);
-                WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-                spin_lock_irqsave(&zone->lock, flags);
-                do {
+        do {
-                        page = NULL;
+                page = NULL;
-                        if (alloc_flags & ALLOC_HARDER) {
+                if (alloc_flags & ALLOC_HARDER) {
-                                page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+                        page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
-                                if (page)
+                        if (page)
-                                        trace_mm_page_alloc_zone_locked(page, order, migratetype);
+                                trace_mm_page_alloc_zone_locked(page, order, migratetype);
-                        }
+                }
-                        if (!page)
-                                page = __rmqueue(zone, order, migratetype);
-                } while (page && check_new_pages(page, order));
-                spin_unlock(&zone->lock);
                if (!page)
-                        goto failed;
+                        page = __rmqueue(zone, order, migratetype);
-                __mod_zone_freepage_state(zone, -(1 << order),
+        } while (page && check_new_pages(page, order));
-                                          get_pcppage_migratetype(page));
+        spin_unlock(&zone->lock);
-        }
+        if (!page)
+                goto failed;
+        __mod_zone_freepage_state(zone, -(1 << order),
+                                  get_pcppage_migratetype(page));
        __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
        zone_statistics(preferred_zone, zone);
        local_irq_restore(flags);
-        VM_BUG_ON_PAGE(bad_range(zone, page), page);
+out:
+        VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
        return page;
 failed:
@@ -2972,7 +3002,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                }
 try_this_zone:
-                page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order,
+                page = rmqueue(ac->preferred_zoneref->zone, zone, order,
                                gfp_mask, alloc_flags, ac->migratetype);
                if (page) {
                        prep_new_page(page, order, gfp_mask, alloc_flags);
author	Mel Gorman <mgorman@techsingularity.net>	2017-02-24 17:56:26 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-24 20:46:54 -0500
commit	066b23935578d3913c2df9bed7addbcdf4711f1a (patch)
tree	65647ee31931bec7dc37262d2403953011e38234 /mm/page_alloc.c
parent	c55e8d035b28b2867e68b0e2d0eee2c0f1016b43 (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c21b33668133..284153d3e0fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -2600,74 +2600,104 @@ static inline void zone_statistics(struct zone preferred_zone, struct zone z)
2600	#endif	2600	#endif
2601	}	2601	}
2602		2602
		2603	/* Remove page from the per-cpu list, caller must protect the list */
		2604	static struct page __rmqueue_pcplist(struct zone zone, int migratetype,
		2605	bool cold, struct per_cpu_pages *pcp,
		2606	struct list_head *list)
		2607	{
		2608	struct page *page;
		2609
		2610	do {
		2611	if (list_empty(list)) {
		2612	pcp->count += rmqueue_bulk(zone, 0,
		2613	pcp->batch, list,
		2614	migratetype, cold);
		2615	if (unlikely(list_empty(list)))
		2616	return NULL;
		2617	}
		2618
		2619	if (cold)
		2620	page = list_last_entry(list, struct page, lru);
		2621	else
		2622	page = list_first_entry(list, struct page, lru);
		2623
		2624	list_del(&page->lru);
		2625	pcp->count--;
		2626	} while (check_new_pcp(page));
		2627
		2628	return page;
		2629	}
		2630
		2631	/* Lock and remove page from the per-cpu list */
		2632	static struct page rmqueue_pcplist(struct zone preferred_zone,
		2633	struct zone *zone, unsigned int order,
		2634	gfp_t gfp_flags, int migratetype)
		2635	{
		2636	struct per_cpu_pages *pcp;
		2637	struct list_head *list;
		2638	bool cold = ((gfp_flags & __GFP_COLD) != 0);
		2639	struct page *page;
		2640	unsigned long flags;
		2641
		2642	local_irq_save(flags);
		2643	pcp = &this_cpu_ptr(zone->pageset)->pcp;
		2644	list = &pcp->lists[migratetype];
		2645	page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
		2646	if (page) {
		2647	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
		2648	zone_statistics(preferred_zone, zone);
		2649	}
		2650	local_irq_restore(flags);
		2651	return page;
		2652	}
		2653
2603	/*	2654	/*
2604	* Allocate a page from the given zone. Use pcplists for order-0 allocations.	2655	* Allocate a page from the given zone. Use pcplists for order-0 allocations.
2605	*/	2656	*/
2606	static inline	2657	static inline
2607	struct page buffered_rmqueue(struct zone preferred_zone,	2658	struct page rmqueue(struct zone preferred_zone,
2608	struct zone *zone, unsigned int order,	2659	struct zone *zone, unsigned int order,
2609	gfp_t gfp_flags, unsigned int alloc_flags,	2660	gfp_t gfp_flags, unsigned int alloc_flags,
2610	int migratetype)	2661	int migratetype)
2611	{	2662	{
2612	unsigned long flags;	2663	unsigned long flags;
2613	struct page *page;	2664	struct page *page;
2614	bool cold = ((gfp_flags & __GFP_COLD) != 0);
2615		2665
2616	if (likely(order == 0)) {	2666	if (likely(order == 0)) {
2617	struct per_cpu_pages *pcp;	2667	page = rmqueue_pcplist(preferred_zone, zone, order,
2618	struct list_head *list;	2668	gfp_flags, migratetype);
2619		2669	goto out;
2620	local_irq_save(flags);	2670	}
2621	do {
2622	pcp = &this_cpu_ptr(zone->pageset)->pcp;
2623	list = &pcp->lists[migratetype];
2624	if (list_empty(list)) {
2625	pcp->count += rmqueue_bulk(zone, 0,
2626	pcp->batch, list,
2627	migratetype, cold);
2628	if (unlikely(list_empty(list)))
2629	goto failed;
2630	}
2631
2632	if (cold)
2633	page = list_last_entry(list, struct page, lru);
2634	else
2635	page = list_first_entry(list, struct page, lru);
2636
2637	list_del(&page->lru);
2638	pcp->count--;
2639		2671
2640	} while (check_new_pcp(page));	2672	/*
2641	} else {	2673	* We most definitely don't want callers attempting to
2642	/*	2674	* allocate greater than order-1 page units with __GFP_NOFAIL.
2643	* We most definitely don't want callers attempting to	2675	*/
2644	* allocate greater than order-1 page units with __GFP_NOFAIL.	2676	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
2645	*/	2677	spin_lock_irqsave(&zone->lock, flags);
2646	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
2647	spin_lock_irqsave(&zone->lock, flags);
2648		2678
2649	do {	2679	do {
2650	page = NULL;	2680	page = NULL;
2651	if (alloc_flags & ALLOC_HARDER) {	2681	if (alloc_flags & ALLOC_HARDER) {
2652	page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);	2682	page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
2653	if (page)	2683	if (page)
2654	trace_mm_page_alloc_zone_locked(page, order, migratetype);	2684	trace_mm_page_alloc_zone_locked(page, order, migratetype);
2655	}	2685	}
2656	if (!page)
2657	page = __rmqueue(zone, order, migratetype);
2658	} while (page && check_new_pages(page, order));
2659	spin_unlock(&zone->lock);
2660	if (!page)	2686	if (!page)
2661	goto failed;	2687	page = __rmqueue(zone, order, migratetype);
2662	__mod_zone_freepage_state(zone, -(1 << order),	2688	} while (page && check_new_pages(page, order));
2663	get_pcppage_migratetype(page));	2689	spin_unlock(&zone->lock);
2664	}	2690	if (!page)
		2691	goto failed;
		2692	__mod_zone_freepage_state(zone, -(1 << order),
		2693	get_pcppage_migratetype(page));
2665		2694
2666	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);	2695	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2667	zone_statistics(preferred_zone, zone);	2696	zone_statistics(preferred_zone, zone);
2668	local_irq_restore(flags);	2697	local_irq_restore(flags);
2669		2698
2670	VM_BUG_ON_PAGE(bad_range(zone, page), page);	2699	out:
		2700	VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
2671	return page;	2701	return page;
2672		2702
2673	failed:	2703	failed:
@@ -2972,7 +3002,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
2972	}	3002	}
2973		3003
2974	try_this_zone:	3004	try_this_zone:
2975	page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order,	3005	page = rmqueue(ac->preferred_zoneref->zone, zone, order,
2976	gfp_mask, alloc_flags, ac->migratetype);	3006	gfp_mask, alloc_flags, ac->migratetype);
2977	if (page) {	3007	if (page) {
2978	prep_new_page(page, order, gfp_mask, alloc_flags);	3008	prep_new_page(page, order, gfp_mask, alloc_flags);