diff options
-rw-r--r-- | include/linux/compaction.h | 4 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/compaction.c | 90 | ||||
-rw-r--r-- | mm/internal.h | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 63 |
5 files changed, 130 insertions, 29 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ef658147e4e8..0e38a1deeb23 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, | |||
22 | extern int fragmentation_index(struct zone *zone, unsigned int order); | 22 | extern int fragmentation_index(struct zone *zone, unsigned int order); |
23 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | 23 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, |
24 | int order, gfp_t gfp_mask, nodemask_t *mask, | 24 | int order, gfp_t gfp_mask, nodemask_t *mask, |
25 | bool sync, bool *contended); | 25 | bool sync, bool *contended, struct page **page); |
26 | extern int compact_pgdat(pg_data_t *pgdat, int order); | 26 | extern int compact_pgdat(pg_data_t *pgdat, int order); |
27 | extern unsigned long compaction_suitable(struct zone *zone, int order); | 27 | extern unsigned long compaction_suitable(struct zone *zone, int order); |
28 | 28 | ||
@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order) | |||
64 | #else | 64 | #else |
65 | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | 65 | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, |
66 | int order, gfp_t gfp_mask, nodemask_t *nodemask, | 66 | int order, gfp_t gfp_mask, nodemask_t *nodemask, |
67 | bool sync, bool *contended) | 67 | bool sync, bool *contended, struct page **page) |
68 | { | 68 | { |
69 | return COMPACT_CONTINUE; | 69 | return COMPACT_CONTINUE; |
70 | } | 70 | } |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0514fe9d3c84..5ddb11b2b4bb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -442,6 +442,7 @@ void put_pages_list(struct list_head *pages); | |||
442 | 442 | ||
443 | void split_page(struct page *page, unsigned int order); | 443 | void split_page(struct page *page, unsigned int order); |
444 | int split_free_page(struct page *page); | 444 | int split_free_page(struct page *page); |
445 | int capture_free_page(struct page *page, int alloc_order, int migratetype); | ||
445 | 446 | ||
446 | /* | 447 | /* |
447 | * Compound pages have a destructor function. Provide a | 448 | * Compound pages have a destructor function. Provide a |
diff --git a/mm/compaction.c b/mm/compaction.c index 7168edc7592c..0fbc6b73a522 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock, | |||
91 | return compact_checklock_irqsave(lock, flags, false, cc); | 91 | return compact_checklock_irqsave(lock, flags, false, cc); |
92 | } | 92 | } |
93 | 93 | ||
94 | static void compact_capture_page(struct compact_control *cc) | ||
95 | { | ||
96 | unsigned long flags; | ||
97 | int mtype, mtype_low, mtype_high; | ||
98 | |||
99 | if (!cc->page || *cc->page) | ||
100 | return; | ||
101 | |||
102 | /* | ||
103 | * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP | ||
104 | * regardless of the migratetype of the freelist is is captured from. | ||
105 | * This is fine because the order for a high-order MIGRATE_MOVABLE | ||
106 | * allocation is typically at least a pageblock size and overall | ||
107 | * fragmentation is not impaired. Other allocation types must | ||
108 | * capture pages from their own migratelist because otherwise they | ||
109 | * could pollute other pageblocks like MIGRATE_MOVABLE with | ||
110 | * difficult to move pages and making fragmentation worse overall. | ||
111 | */ | ||
112 | if (cc->migratetype == MIGRATE_MOVABLE) { | ||
113 | mtype_low = 0; | ||
114 | mtype_high = MIGRATE_PCPTYPES; | ||
115 | } else { | ||
116 | mtype_low = cc->migratetype; | ||
117 | mtype_high = cc->migratetype + 1; | ||
118 | } | ||
119 | |||
120 | /* Speculatively examine the free lists without zone lock */ | ||
121 | for (mtype = mtype_low; mtype < mtype_high; mtype++) { | ||
122 | int order; | ||
123 | for (order = cc->order; order < MAX_ORDER; order++) { | ||
124 | struct page *page; | ||
125 | struct free_area *area; | ||
126 | area = &(cc->zone->free_area[order]); | ||
127 | if (list_empty(&area->free_list[mtype])) | ||
128 | continue; | ||
129 | |||
130 | /* Take the lock and attempt capture of the page */ | ||
131 | if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) | ||
132 | return; | ||
133 | if (!list_empty(&area->free_list[mtype])) { | ||
134 | page = list_entry(area->free_list[mtype].next, | ||
135 | struct page, lru); | ||
136 | if (capture_free_page(page, cc->order, mtype)) { | ||
137 | spin_unlock_irqrestore(&cc->zone->lock, | ||
138 | flags); | ||
139 | *cc->page = page; | ||
140 | return; | ||
141 | } | ||
142 | } | ||
143 | spin_unlock_irqrestore(&cc->zone->lock, flags); | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | |||
94 | /* | 148 | /* |
95 | * Isolate free pages onto a private freelist. Caller must hold zone->lock. | 149 | * Isolate free pages onto a private freelist. Caller must hold zone->lock. |
96 | * If @strict is true, will abort returning 0 on any invalid PFNs or non-free | 150 | * If @strict is true, will abort returning 0 on any invalid PFNs or non-free |
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
645 | static int compact_finished(struct zone *zone, | 699 | static int compact_finished(struct zone *zone, |
646 | struct compact_control *cc) | 700 | struct compact_control *cc) |
647 | { | 701 | { |
648 | unsigned int order; | ||
649 | unsigned long watermark; | 702 | unsigned long watermark; |
650 | 703 | ||
651 | if (fatal_signal_pending(current)) | 704 | if (fatal_signal_pending(current)) |
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone, | |||
688 | return COMPACT_CONTINUE; | 741 | return COMPACT_CONTINUE; |
689 | 742 | ||
690 | /* Direct compactor: Is a suitable page free? */ | 743 | /* Direct compactor: Is a suitable page free? */ |
691 | for (order = cc->order; order < MAX_ORDER; order++) { | 744 | if (cc->page) { |
692 | /* Job done if page is free of the right migratetype */ | 745 | /* Was a suitable page captured? */ |
693 | if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) | 746 | if (*cc->page) |
694 | return COMPACT_PARTIAL; | ||
695 | |||
696 | /* Job done if allocation would set block type */ | ||
697 | if (order >= pageblock_order && zone->free_area[order].nr_free) | ||
698 | return COMPACT_PARTIAL; | 747 | return COMPACT_PARTIAL; |
748 | } else { | ||
749 | unsigned int order; | ||
750 | for (order = cc->order; order < MAX_ORDER; order++) { | ||
751 | struct free_area *area = &zone->free_area[cc->order]; | ||
752 | /* Job done if page is free of the right migratetype */ | ||
753 | if (!list_empty(&area->free_list[cc->migratetype])) | ||
754 | return COMPACT_PARTIAL; | ||
755 | |||
756 | /* Job done if allocation would set block type */ | ||
757 | if (cc->order >= pageblock_order && area->nr_free) | ||
758 | return COMPACT_PARTIAL; | ||
759 | } | ||
699 | } | 760 | } |
700 | 761 | ||
701 | return COMPACT_CONTINUE; | 762 | return COMPACT_CONTINUE; |
@@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
817 | goto out; | 878 | goto out; |
818 | } | 879 | } |
819 | } | 880 | } |
881 | |||
882 | /* Capture a page now if it is a suitable size */ | ||
883 | compact_capture_page(cc); | ||
820 | } | 884 | } |
821 | 885 | ||
822 | out: | 886 | out: |
@@ -829,7 +893,8 @@ out: | |||
829 | 893 | ||
830 | static unsigned long compact_zone_order(struct zone *zone, | 894 | static unsigned long compact_zone_order(struct zone *zone, |
831 | int order, gfp_t gfp_mask, | 895 | int order, gfp_t gfp_mask, |
832 | bool sync, bool *contended) | 896 | bool sync, bool *contended, |
897 | struct page **page) | ||
833 | { | 898 | { |
834 | struct compact_control cc = { | 899 | struct compact_control cc = { |
835 | .nr_freepages = 0, | 900 | .nr_freepages = 0, |
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone, | |||
839 | .zone = zone, | 904 | .zone = zone, |
840 | .sync = sync, | 905 | .sync = sync, |
841 | .contended = contended, | 906 | .contended = contended, |
907 | .page = page, | ||
842 | }; | 908 | }; |
843 | INIT_LIST_HEAD(&cc.freepages); | 909 | INIT_LIST_HEAD(&cc.freepages); |
844 | INIT_LIST_HEAD(&cc.migratepages); | 910 | INIT_LIST_HEAD(&cc.migratepages); |
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500; | |||
860 | */ | 926 | */ |
861 | unsigned long try_to_compact_pages(struct zonelist *zonelist, | 927 | unsigned long try_to_compact_pages(struct zonelist *zonelist, |
862 | int order, gfp_t gfp_mask, nodemask_t *nodemask, | 928 | int order, gfp_t gfp_mask, nodemask_t *nodemask, |
863 | bool sync, bool *contended) | 929 | bool sync, bool *contended, struct page **page) |
864 | { | 930 | { |
865 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 931 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
866 | int may_enter_fs = gfp_mask & __GFP_FS; | 932 | int may_enter_fs = gfp_mask & __GFP_FS; |
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
881 | int status; | 947 | int status; |
882 | 948 | ||
883 | status = compact_zone_order(zone, order, gfp_mask, sync, | 949 | status = compact_zone_order(zone, order, gfp_mask, sync, |
884 | contended); | 950 | contended, page); |
885 | rc = max(status, rc); | 951 | rc = max(status, rc); |
886 | 952 | ||
887 | /* If a normal allocation would succeed, stop compacting */ | 953 | /* If a normal allocation would succeed, stop compacting */ |
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) | |||
936 | struct compact_control cc = { | 1002 | struct compact_control cc = { |
937 | .order = order, | 1003 | .order = order, |
938 | .sync = false, | 1004 | .sync = false, |
1005 | .page = NULL, | ||
939 | }; | 1006 | }; |
940 | 1007 | ||
941 | return __compact_pgdat(pgdat, &cc); | 1008 | return __compact_pgdat(pgdat, &cc); |
@@ -946,6 +1013,7 @@ static int compact_node(int nid) | |||
946 | struct compact_control cc = { | 1013 | struct compact_control cc = { |
947 | .order = -1, | 1014 | .order = -1, |
948 | .sync = true, | 1015 | .sync = true, |
1016 | .page = NULL, | ||
949 | }; | 1017 | }; |
950 | 1018 | ||
951 | return __compact_pgdat(NODE_DATA(nid), &cc); | 1019 | return __compact_pgdat(NODE_DATA(nid), &cc); |
diff --git a/mm/internal.h b/mm/internal.h index b8c91b342e24..e549a7fbc296 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -131,6 +131,7 @@ struct compact_control { | |||
131 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | 131 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ |
132 | struct zone *zone; | 132 | struct zone *zone; |
133 | bool *contended; /* True if a lock was contended */ | 133 | bool *contended; /* True if a lock was contended */ |
134 | struct page **page; /* Page captured of requested size */ | ||
134 | }; | 135 | }; |
135 | 136 | ||
136 | unsigned long | 137 | unsigned long |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5e92698e5395..cfd565dbe124 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1380,16 +1380,11 @@ void split_page(struct page *page, unsigned int order) | |||
1380 | } | 1380 | } |
1381 | 1381 | ||
1382 | /* | 1382 | /* |
1383 | * Similar to split_page except the page is already free. As this is only | 1383 | * Similar to the split_page family of functions except that the page |
1384 | * being used for migration, the migratetype of the block also changes. | 1384 | * required at the given order and being isolated now to prevent races |
1385 | * As this is called with interrupts disabled, the caller is responsible | 1385 | * with parallel allocators |
1386 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
1387 | * are enabled. | ||
1388 | * | ||
1389 | * Note: this is probably too low level an operation for use in drivers. | ||
1390 | * Please consult with lkml before using this in your driver. | ||
1391 | */ | 1386 | */ |
1392 | int split_free_page(struct page *page) | 1387 | int capture_free_page(struct page *page, int alloc_order, int migratetype) |
1393 | { | 1388 | { |
1394 | unsigned int order; | 1389 | unsigned int order; |
1395 | unsigned long watermark; | 1390 | unsigned long watermark; |
@@ -1411,10 +1406,11 @@ int split_free_page(struct page *page) | |||
1411 | rmv_page_order(page); | 1406 | rmv_page_order(page); |
1412 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); | 1407 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); |
1413 | 1408 | ||
1414 | /* Split into individual pages */ | 1409 | if (alloc_order != order) |
1415 | set_page_refcounted(page); | 1410 | expand(zone, page, alloc_order, order, |
1416 | split_page(page, order); | 1411 | &zone->free_area[order], migratetype); |
1417 | 1412 | ||
1413 | /* Set the pageblock if the captured page is at least a pageblock */ | ||
1418 | if (order >= pageblock_order - 1) { | 1414 | if (order >= pageblock_order - 1) { |
1419 | struct page *endpage = page + (1 << order) - 1; | 1415 | struct page *endpage = page + (1 << order) - 1; |
1420 | for (; page < endpage; page += pageblock_nr_pages) { | 1416 | for (; page < endpage; page += pageblock_nr_pages) { |
@@ -1425,7 +1421,35 @@ int split_free_page(struct page *page) | |||
1425 | } | 1421 | } |
1426 | } | 1422 | } |
1427 | 1423 | ||
1428 | return 1 << order; | 1424 | return 1UL << order; |
1425 | } | ||
1426 | |||
1427 | /* | ||
1428 | * Similar to split_page except the page is already free. As this is only | ||
1429 | * being used for migration, the migratetype of the block also changes. | ||
1430 | * As this is called with interrupts disabled, the caller is responsible | ||
1431 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
1432 | * are enabled. | ||
1433 | * | ||
1434 | * Note: this is probably too low level an operation for use in drivers. | ||
1435 | * Please consult with lkml before using this in your driver. | ||
1436 | */ | ||
1437 | int split_free_page(struct page *page) | ||
1438 | { | ||
1439 | unsigned int order; | ||
1440 | int nr_pages; | ||
1441 | |||
1442 | BUG_ON(!PageBuddy(page)); | ||
1443 | order = page_order(page); | ||
1444 | |||
1445 | nr_pages = capture_free_page(page, order, 0); | ||
1446 | if (!nr_pages) | ||
1447 | return 0; | ||
1448 | |||
1449 | /* Split into individual pages */ | ||
1450 | set_page_refcounted(page); | ||
1451 | split_page(page, order); | ||
1452 | return nr_pages; | ||
1429 | } | 1453 | } |
1430 | 1454 | ||
1431 | /* | 1455 | /* |
@@ -2105,7 +2129,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2105 | bool *contended_compaction, bool *deferred_compaction, | 2129 | bool *contended_compaction, bool *deferred_compaction, |
2106 | unsigned long *did_some_progress) | 2130 | unsigned long *did_some_progress) |
2107 | { | 2131 | { |
2108 | struct page *page; | 2132 | struct page *page = NULL; |
2109 | 2133 | ||
2110 | if (!order) | 2134 | if (!order) |
2111 | return NULL; | 2135 | return NULL; |
@@ -2118,10 +2142,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2118 | current->flags |= PF_MEMALLOC; | 2142 | current->flags |= PF_MEMALLOC; |
2119 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2143 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
2120 | nodemask, sync_migration, | 2144 | nodemask, sync_migration, |
2121 | contended_compaction); | 2145 | contended_compaction, &page); |
2122 | current->flags &= ~PF_MEMALLOC; | 2146 | current->flags &= ~PF_MEMALLOC; |
2123 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
2124 | 2147 | ||
2148 | /* If compaction captured a page, prep and use it */ | ||
2149 | if (page) { | ||
2150 | prep_new_page(page, order, gfp_mask); | ||
2151 | goto got_page; | ||
2152 | } | ||
2153 | |||
2154 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
2125 | /* Page migration frees to the PCP lists but we want merging */ | 2155 | /* Page migration frees to the PCP lists but we want merging */ |
2126 | drain_pages(get_cpu()); | 2156 | drain_pages(get_cpu()); |
2127 | put_cpu(); | 2157 | put_cpu(); |
@@ -2131,6 +2161,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2131 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2161 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
2132 | preferred_zone, migratetype); | 2162 | preferred_zone, migratetype); |
2133 | if (page) { | 2163 | if (page) { |
2164 | got_page: | ||
2134 | preferred_zone->compact_considered = 0; | 2165 | preferred_zone->compact_considered = 0; |
2135 | preferred_zone->compact_defer_shift = 0; | 2166 | preferred_zone->compact_defer_shift = 0; |
2136 | if (order >= preferred_zone->compact_order_failed) | 2167 | if (order >= preferred_zone->compact_order_failed) |