aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/compaction.h4
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/compaction.c90
-rw-r--r--mm/internal.h1
-rw-r--r--mm/page_alloc.c63
5 files changed, 130 insertions, 29 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ef658147e4e8..0e38a1deeb23 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
22extern int fragmentation_index(struct zone *zone, unsigned int order); 22extern int fragmentation_index(struct zone *zone, unsigned int order);
23extern unsigned long try_to_compact_pages(struct zonelist *zonelist, 23extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
24 int order, gfp_t gfp_mask, nodemask_t *mask, 24 int order, gfp_t gfp_mask, nodemask_t *mask,
25 bool sync, bool *contended); 25 bool sync, bool *contended, struct page **page);
26extern int compact_pgdat(pg_data_t *pgdat, int order); 26extern int compact_pgdat(pg_data_t *pgdat, int order);
27extern unsigned long compaction_suitable(struct zone *zone, int order); 27extern unsigned long compaction_suitable(struct zone *zone, int order);
28 28
@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
64#else 64#else
65static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, 65static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
66 int order, gfp_t gfp_mask, nodemask_t *nodemask, 66 int order, gfp_t gfp_mask, nodemask_t *nodemask,
67 bool sync, bool *contended) 67 bool sync, bool *contended, struct page **page)
68{ 68{
69 return COMPACT_CONTINUE; 69 return COMPACT_CONTINUE;
70} 70}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0514fe9d3c84..5ddb11b2b4bb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -442,6 +442,7 @@ void put_pages_list(struct list_head *pages);
442 442
443void split_page(struct page *page, unsigned int order); 443void split_page(struct page *page, unsigned int order);
444int split_free_page(struct page *page); 444int split_free_page(struct page *page);
445int capture_free_page(struct page *page, int alloc_order, int migratetype);
445 446
446/* 447/*
447 * Compound pages have a destructor function. Provide a 448 * Compound pages have a destructor function. Provide a
diff --git a/mm/compaction.c b/mm/compaction.c
index 7168edc7592c..0fbc6b73a522 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
91 return compact_checklock_irqsave(lock, flags, false, cc); 91 return compact_checklock_irqsave(lock, flags, false, cc);
92} 92}
93 93
94static void compact_capture_page(struct compact_control *cc)
95{
96 unsigned long flags;
97 int mtype, mtype_low, mtype_high;
98
99 if (!cc->page || *cc->page)
100 return;
101
102 /*
103 * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
104 * regardless of the migratetype of the freelist is is captured from.
105 * This is fine because the order for a high-order MIGRATE_MOVABLE
106 * allocation is typically at least a pageblock size and overall
107 * fragmentation is not impaired. Other allocation types must
108 * capture pages from their own migratelist because otherwise they
109 * could pollute other pageblocks like MIGRATE_MOVABLE with
110 * difficult to move pages and making fragmentation worse overall.
111 */
112 if (cc->migratetype == MIGRATE_MOVABLE) {
113 mtype_low = 0;
114 mtype_high = MIGRATE_PCPTYPES;
115 } else {
116 mtype_low = cc->migratetype;
117 mtype_high = cc->migratetype + 1;
118 }
119
120 /* Speculatively examine the free lists without zone lock */
121 for (mtype = mtype_low; mtype < mtype_high; mtype++) {
122 int order;
123 for (order = cc->order; order < MAX_ORDER; order++) {
124 struct page *page;
125 struct free_area *area;
126 area = &(cc->zone->free_area[order]);
127 if (list_empty(&area->free_list[mtype]))
128 continue;
129
130 /* Take the lock and attempt capture of the page */
131 if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
132 return;
133 if (!list_empty(&area->free_list[mtype])) {
134 page = list_entry(area->free_list[mtype].next,
135 struct page, lru);
136 if (capture_free_page(page, cc->order, mtype)) {
137 spin_unlock_irqrestore(&cc->zone->lock,
138 flags);
139 *cc->page = page;
140 return;
141 }
142 }
143 spin_unlock_irqrestore(&cc->zone->lock, flags);
144 }
145 }
146}
147
94/* 148/*
95 * Isolate free pages onto a private freelist. Caller must hold zone->lock. 149 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
96 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free 150 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
645static int compact_finished(struct zone *zone, 699static int compact_finished(struct zone *zone,
646 struct compact_control *cc) 700 struct compact_control *cc)
647{ 701{
648 unsigned int order;
649 unsigned long watermark; 702 unsigned long watermark;
650 703
651 if (fatal_signal_pending(current)) 704 if (fatal_signal_pending(current))
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone,
688 return COMPACT_CONTINUE; 741 return COMPACT_CONTINUE;
689 742
690 /* Direct compactor: Is a suitable page free? */ 743 /* Direct compactor: Is a suitable page free? */
691 for (order = cc->order; order < MAX_ORDER; order++) { 744 if (cc->page) {
692 /* Job done if page is free of the right migratetype */ 745 /* Was a suitable page captured? */
693 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) 746 if (*cc->page)
694 return COMPACT_PARTIAL;
695
696 /* Job done if allocation would set block type */
697 if (order >= pageblock_order && zone->free_area[order].nr_free)
698 return COMPACT_PARTIAL; 747 return COMPACT_PARTIAL;
748 } else {
749 unsigned int order;
750 for (order = cc->order; order < MAX_ORDER; order++) {
751 struct free_area *area = &zone->free_area[cc->order];
752 /* Job done if page is free of the right migratetype */
753 if (!list_empty(&area->free_list[cc->migratetype]))
754 return COMPACT_PARTIAL;
755
756 /* Job done if allocation would set block type */
757 if (cc->order >= pageblock_order && area->nr_free)
758 return COMPACT_PARTIAL;
759 }
699 } 760 }
700 761
701 return COMPACT_CONTINUE; 762 return COMPACT_CONTINUE;
@@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
817 goto out; 878 goto out;
818 } 879 }
819 } 880 }
881
882 /* Capture a page now if it is a suitable size */
883 compact_capture_page(cc);
820 } 884 }
821 885
822out: 886out:
@@ -829,7 +893,8 @@ out:
829 893
830static unsigned long compact_zone_order(struct zone *zone, 894static unsigned long compact_zone_order(struct zone *zone,
831 int order, gfp_t gfp_mask, 895 int order, gfp_t gfp_mask,
832 bool sync, bool *contended) 896 bool sync, bool *contended,
897 struct page **page)
833{ 898{
834 struct compact_control cc = { 899 struct compact_control cc = {
835 .nr_freepages = 0, 900 .nr_freepages = 0,
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone,
839 .zone = zone, 904 .zone = zone,
840 .sync = sync, 905 .sync = sync,
841 .contended = contended, 906 .contended = contended,
907 .page = page,
842 }; 908 };
843 INIT_LIST_HEAD(&cc.freepages); 909 INIT_LIST_HEAD(&cc.freepages);
844 INIT_LIST_HEAD(&cc.migratepages); 910 INIT_LIST_HEAD(&cc.migratepages);
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500;
860 */ 926 */
861unsigned long try_to_compact_pages(struct zonelist *zonelist, 927unsigned long try_to_compact_pages(struct zonelist *zonelist,
862 int order, gfp_t gfp_mask, nodemask_t *nodemask, 928 int order, gfp_t gfp_mask, nodemask_t *nodemask,
863 bool sync, bool *contended) 929 bool sync, bool *contended, struct page **page)
864{ 930{
865 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 931 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
866 int may_enter_fs = gfp_mask & __GFP_FS; 932 int may_enter_fs = gfp_mask & __GFP_FS;
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
881 int status; 947 int status;
882 948
883 status = compact_zone_order(zone, order, gfp_mask, sync, 949 status = compact_zone_order(zone, order, gfp_mask, sync,
884 contended); 950 contended, page);
885 rc = max(status, rc); 951 rc = max(status, rc);
886 952
887 /* If a normal allocation would succeed, stop compacting */ 953 /* If a normal allocation would succeed, stop compacting */
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
936 struct compact_control cc = { 1002 struct compact_control cc = {
937 .order = order, 1003 .order = order,
938 .sync = false, 1004 .sync = false,
1005 .page = NULL,
939 }; 1006 };
940 1007
941 return __compact_pgdat(pgdat, &cc); 1008 return __compact_pgdat(pgdat, &cc);
@@ -946,6 +1013,7 @@ static int compact_node(int nid)
946 struct compact_control cc = { 1013 struct compact_control cc = {
947 .order = -1, 1014 .order = -1,
948 .sync = true, 1015 .sync = true,
1016 .page = NULL,
949 }; 1017 };
950 1018
951 return __compact_pgdat(NODE_DATA(nid), &cc); 1019 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index b8c91b342e24..e549a7fbc296 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -131,6 +131,7 @@ struct compact_control {
131 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 131 int migratetype; /* MOVABLE, RECLAIMABLE etc */
132 struct zone *zone; 132 struct zone *zone;
133 bool *contended; /* True if a lock was contended */ 133 bool *contended; /* True if a lock was contended */
134 struct page **page; /* Page captured of requested size */
134}; 135};
135 136
136unsigned long 137unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5e92698e5395..cfd565dbe124 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1380,16 +1380,11 @@ void split_page(struct page *page, unsigned int order)
1380} 1380}
1381 1381
1382/* 1382/*
1383 * Similar to split_page except the page is already free. As this is only 1383 * Similar to the split_page family of functions except that the page
1384 * being used for migration, the migratetype of the block also changes. 1384 * required at the given order and being isolated now to prevent races
1385 * As this is called with interrupts disabled, the caller is responsible 1385 * with parallel allocators
1386 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1387 * are enabled.
1388 *
1389 * Note: this is probably too low level an operation for use in drivers.
1390 * Please consult with lkml before using this in your driver.
1391 */ 1386 */
1392int split_free_page(struct page *page) 1387int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393{ 1388{
1394 unsigned int order; 1389 unsigned int order;
1395 unsigned long watermark; 1390 unsigned long watermark;
@@ -1411,10 +1406,11 @@ int split_free_page(struct page *page)
1411 rmv_page_order(page); 1406 rmv_page_order(page);
1412 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); 1407 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
1413 1408
1414 /* Split into individual pages */ 1409 if (alloc_order != order)
1415 set_page_refcounted(page); 1410 expand(zone, page, alloc_order, order,
1416 split_page(page, order); 1411 &zone->free_area[order], migratetype);
1417 1412
1413 /* Set the pageblock if the captured page is at least a pageblock */
1418 if (order >= pageblock_order - 1) { 1414 if (order >= pageblock_order - 1) {
1419 struct page *endpage = page + (1 << order) - 1; 1415 struct page *endpage = page + (1 << order) - 1;
1420 for (; page < endpage; page += pageblock_nr_pages) { 1416 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1421,35 @@ int split_free_page(struct page *page)
1425 } 1421 }
1426 } 1422 }
1427 1423
1428 return 1 << order; 1424 return 1UL << order;
1425}
1426
1427/*
1428 * Similar to split_page except the page is already free. As this is only
1429 * being used for migration, the migratetype of the block also changes.
1430 * As this is called with interrupts disabled, the caller is responsible
1431 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1432 * are enabled.
1433 *
1434 * Note: this is probably too low level an operation for use in drivers.
1435 * Please consult with lkml before using this in your driver.
1436 */
1437int split_free_page(struct page *page)
1438{
1439 unsigned int order;
1440 int nr_pages;
1441
1442 BUG_ON(!PageBuddy(page));
1443 order = page_order(page);
1444
1445 nr_pages = capture_free_page(page, order, 0);
1446 if (!nr_pages)
1447 return 0;
1448
1449 /* Split into individual pages */
1450 set_page_refcounted(page);
1451 split_page(page, order);
1452 return nr_pages;
1429} 1453}
1430 1454
1431/* 1455/*
@@ -2105,7 +2129,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2105 bool *contended_compaction, bool *deferred_compaction, 2129 bool *contended_compaction, bool *deferred_compaction,
2106 unsigned long *did_some_progress) 2130 unsigned long *did_some_progress)
2107{ 2131{
2108 struct page *page; 2132 struct page *page = NULL;
2109 2133
2110 if (!order) 2134 if (!order)
2111 return NULL; 2135 return NULL;
@@ -2118,10 +2142,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2118 current->flags |= PF_MEMALLOC; 2142 current->flags |= PF_MEMALLOC;
2119 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2143 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2120 nodemask, sync_migration, 2144 nodemask, sync_migration,
2121 contended_compaction); 2145 contended_compaction, &page);
2122 current->flags &= ~PF_MEMALLOC; 2146 current->flags &= ~PF_MEMALLOC;
2123 if (*did_some_progress != COMPACT_SKIPPED) {
2124 2147
2148 /* If compaction captured a page, prep and use it */
2149 if (page) {
2150 prep_new_page(page, order, gfp_mask);
2151 goto got_page;
2152 }
2153
2154 if (*did_some_progress != COMPACT_SKIPPED) {
2125 /* Page migration frees to the PCP lists but we want merging */ 2155 /* Page migration frees to the PCP lists but we want merging */
2126 drain_pages(get_cpu()); 2156 drain_pages(get_cpu());
2127 put_cpu(); 2157 put_cpu();
@@ -2131,6 +2161,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2131 alloc_flags & ~ALLOC_NO_WATERMARKS, 2161 alloc_flags & ~ALLOC_NO_WATERMARKS,
2132 preferred_zone, migratetype); 2162 preferred_zone, migratetype);
2133 if (page) { 2163 if (page) {
2164got_page:
2134 preferred_zone->compact_considered = 0; 2165 preferred_zone->compact_considered = 0;
2135 preferred_zone->compact_defer_shift = 0; 2166 preferred_zone->compact_defer_shift = 0;
2136 if (order >= preferred_zone->compact_order_failed) 2167 if (order >= preferred_zone->compact_order_failed)