aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 12:18:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 12:18:59 -0400
commitd484864dd96e1830e7689510597707c1df8cd681 (patch)
tree51551708ba3f26d05575fa91daaf0c0d970a77c3 /mm
parentbe87cfb47c5c740f7b17929bcd7c480b228513e0 (diff)
parent0f51596bd39a5c928307ffcffc9ba07f90f42a8b (diff)
Merge branch 'for-linus' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
Pull CMA and ARM DMA-mapping updates from Marek Szyprowski: "These patches contain two major updates for DMA mapping subsystem (mainly for ARM architecture). First one is Contiguous Memory Allocator (CMA) which makes it possible for device drivers to allocate big contiguous chunks of memory after the system has booted. The main difference from the similar frameworks is the fact that CMA allows to transparently reuse the memory region reserved for the big chunk allocation as a system memory, so no memory is wasted when no big chunk is allocated. Once the alloc request is issued, the framework migrates system pages to create space for the required big chunk of physically contiguous memory. For more information one can refer to nice LWN articles: - 'A reworked contiguous memory allocator': http://lwn.net/Articles/447405/ - 'CMA and ARM': http://lwn.net/Articles/450286/ - 'A deep dive into CMA': http://lwn.net/Articles/486301/ - and the following thread with the patches and links to all previous versions: https://lkml.org/lkml/2012/4/3/204 The main client for this new framework is ARM DMA-mapping subsystem. The second part provides a complete redesign in ARM DMA-mapping subsystem. The core implementation has been changed to use common struct dma_map_ops based infrastructure with the recent updates for new dma attributes merged in v3.4-rc2. This allows to use more than one implementation of dma-mapping calls and change/select them on the struct device basis. The first client of this new infractructure is dmabounce implementation which has been completely cut out of the core, common code. The last patch of this redesign update introduces a new, experimental implementation of dma-mapping calls on top of generic IOMMU framework. This lets ARM sub-platform to transparently use IOMMU for DMA-mapping calls if one provides required IOMMU hardware. For more information please refer to the following thread: http://www.spinics.net/lists/arm-kernel/msg175729.html The last patch merges changes from both updates and provides a resolution for the conflicts which cannot be avoided when patches have been applied on the same files (mainly arch/arm/mm/dma-mapping.c)." Acked by Andrew Morton <akpm@linux-foundation.org>: "Yup, this one please. It's had much work, plenty of review and I think even Russell is happy with it." * 'for-linus' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping: (28 commits) ARM: dma-mapping: use PMD size for section unmap cma: fix migration mode ARM: integrate CMA with DMA-mapping subsystem X86: integrate CMA with DMA-mapping subsystem drivers: add Contiguous Memory Allocator mm: trigger page reclaim in alloc_contig_range() to stabilise watermarks mm: extract reclaim code from __alloc_pages_direct_reclaim() mm: Serialize access to min_free_kbytes mm: page_isolation: MIGRATE_CMA isolation functions added mm: mmzone: MIGRATE_CMA migration type added mm: page_alloc: change fallbacks array handling mm: page_alloc: introduce alloc_contig_range() mm: compaction: export some of the functions mm: compaction: introduce isolate_freepages_range() mm: compaction: introduce map_pages() mm: compaction: introduce isolate_migratepages_range() mm: page_alloc: remove trailing whitespace ARM: dma-mapping: add support for IOMMU mapper ARM: dma-mapping: use alloc, mmap, free from dma_ops ARM: dma-mapping: remove redundant code and do the cleanup ... Conflicts: arch/x86/include/asm/dma-mapping.h
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/Makefile3
-rw-r--r--mm/compaction.c418
-rw-r--r--mm/internal.h33
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/page_alloc.c409
-rw-r--r--mm/page_isolation.c15
-rw-r--r--mm/vmstat.c3
9 files changed, 675 insertions, 216 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e338407f1225..39220026c797 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -198,7 +198,7 @@ config COMPACTION
198config MIGRATION 198config MIGRATION
199 bool "Page migration" 199 bool "Page migration"
200 def_bool y 200 def_bool y
201 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION 201 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA
202 help 202 help
203 Allows the migration of the physical location of pages of processes 203 Allows the migration of the physical location of pages of processes
204 while the virtual addresses are not changed. This is useful in 204 while the virtual addresses are not changed. This is useful in
diff --git a/mm/Makefile b/mm/Makefile
index 50ec00ef2a0e..8aada89efbbb 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
13 readahead.o swap.o truncate.o vmscan.o shmem.o \ 13 readahead.o swap.o truncate.o vmscan.o shmem.o \
14 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 14 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
15 page_isolation.o mm_init.o mmu_context.o percpu.o \ 15 page_isolation.o mm_init.o mmu_context.o percpu.o \
16 $(mmu-y) 16 compaction.o $(mmu-y)
17obj-y += init-mm.o 17obj-y += init-mm.o
18 18
19ifdef CONFIG_NO_BOOTMEM 19ifdef CONFIG_NO_BOOTMEM
@@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o
32obj-$(CONFIG_SPARSEMEM) += sparse.o 32obj-$(CONFIG_SPARSEMEM) += sparse.o
33obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o 33obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
34obj-$(CONFIG_SLOB) += slob.o 34obj-$(CONFIG_SLOB) += slob.o
35obj-$(CONFIG_COMPACTION) += compaction.o
36obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 35obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
37obj-$(CONFIG_KSM) += ksm.o 36obj-$(CONFIG_KSM) += ksm.o
38obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o 37obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 74a8c825ff28..da7d35ea5103 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,30 +16,11 @@
16#include <linux/sysfs.h> 16#include <linux/sysfs.h>
17#include "internal.h" 17#include "internal.h"
18 18
19#if defined CONFIG_COMPACTION || defined CONFIG_CMA
20
19#define CREATE_TRACE_POINTS 21#define CREATE_TRACE_POINTS
20#include <trace/events/compaction.h> 22#include <trace/events/compaction.h>
21 23
22/*
23 * compact_control is used to track pages being migrated and the free pages
24 * they are being migrated to during memory compaction. The free_pfn starts
25 * at the end of a zone and migrate_pfn begins at the start. Movable pages
26 * are moved to the end of a zone during a compaction run and the run
27 * completes when free_pfn <= migrate_pfn
28 */
29struct compact_control {
30 struct list_head freepages; /* List of free pages to migrate to */
31 struct list_head migratepages; /* List of pages being migrated */
32 unsigned long nr_freepages; /* Number of isolated free pages */
33 unsigned long nr_migratepages; /* Number of pages to migrate */
34 unsigned long free_pfn; /* isolate_freepages search base */
35 unsigned long migrate_pfn; /* isolate_migratepages search base */
36 bool sync; /* Synchronous migration */
37
38 int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */
40 struct zone *zone;
41};
42
43static unsigned long release_freepages(struct list_head *freelist) 24static unsigned long release_freepages(struct list_head *freelist)
44{ 25{
45 struct page *page, *next; 26 struct page *page, *next;
@@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist)
54 return count; 35 return count;
55} 36}
56 37
57/* Isolate free pages onto a private freelist. Must hold zone->lock */ 38static void map_pages(struct list_head *list)
58static unsigned long isolate_freepages_block(struct zone *zone, 39{
59 unsigned long blockpfn, 40 struct page *page;
60 struct list_head *freelist) 41
42 list_for_each_entry(page, list, lru) {
43 arch_alloc_page(page, 0);
44 kernel_map_pages(page, 1, 1);
45 }
46}
47
48static inline bool migrate_async_suitable(int migratetype)
49{
50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
51}
52
53/*
54 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
55 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
56 * pages inside of the pageblock (even though it may still end up isolating
57 * some pages).
58 */
59static unsigned long isolate_freepages_block(unsigned long blockpfn,
60 unsigned long end_pfn,
61 struct list_head *freelist,
62 bool strict)
61{ 63{
62 unsigned long zone_end_pfn, end_pfn;
63 int nr_scanned = 0, total_isolated = 0; 64 int nr_scanned = 0, total_isolated = 0;
64 struct page *cursor; 65 struct page *cursor;
65 66
66 /* Get the last PFN we should scan for free pages at */
67 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
68 end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
69
70 /* Find the first usable PFN in the block to initialse page cursor */
71 for (; blockpfn < end_pfn; blockpfn++) {
72 if (pfn_valid_within(blockpfn))
73 break;
74 }
75 cursor = pfn_to_page(blockpfn); 67 cursor = pfn_to_page(blockpfn);
76 68
77 /* Isolate free pages. This assumes the block is valid */ 69 /* Isolate free pages. This assumes the block is valid */
@@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone,
79 int isolated, i; 71 int isolated, i;
80 struct page *page = cursor; 72 struct page *page = cursor;
81 73
82 if (!pfn_valid_within(blockpfn)) 74 if (!pfn_valid_within(blockpfn)) {
75 if (strict)
76 return 0;
83 continue; 77 continue;
78 }
84 nr_scanned++; 79 nr_scanned++;
85 80
86 if (!PageBuddy(page)) 81 if (!PageBuddy(page)) {
82 if (strict)
83 return 0;
87 continue; 84 continue;
85 }
88 86
89 /* Found a free page, break it into order-0 pages */ 87 /* Found a free page, break it into order-0 pages */
90 isolated = split_free_page(page); 88 isolated = split_free_page(page);
89 if (!isolated && strict)
90 return 0;
91 total_isolated += isolated; 91 total_isolated += isolated;
92 for (i = 0; i < isolated; i++) { 92 for (i = 0; i < isolated; i++) {
93 list_add(&page->lru, freelist); 93 list_add(&page->lru, freelist);
@@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone,
105 return total_isolated; 105 return total_isolated;
106} 106}
107 107
108/* Returns true if the page is within a block suitable for migration to */ 108/**
109static bool suitable_migration_target(struct page *page) 109 * isolate_freepages_range() - isolate free pages.
110{ 110 * @start_pfn: The first PFN to start isolating.
111 111 * @end_pfn: The one-past-last PFN.
112 int migratetype = get_pageblock_migratetype(page); 112 *
113 113 * Non-free pages, invalid PFNs, or zone boundaries within the
114 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 114 * [start_pfn, end_pfn) range are considered errors, cause function to
115 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 115 * undo its actions and return zero.
116 return false; 116 *
117 117 * Otherwise, function returns one-past-the-last PFN of isolated page
118 /* If the page is a large free page, then allow migration */ 118 * (which may be greater then end_pfn if end fell in a middle of
119 if (PageBuddy(page) && page_order(page) >= pageblock_order) 119 * a free page).
120 return true;
121
122 /* If the block is MIGRATE_MOVABLE, allow migration */
123 if (migratetype == MIGRATE_MOVABLE)
124 return true;
125
126 /* Otherwise skip the block */
127 return false;
128}
129
130/*
131 * Based on information in the current compact_control, find blocks
132 * suitable for isolating free pages from and then isolate them.
133 */ 120 */
134static void isolate_freepages(struct zone *zone, 121unsigned long
135 struct compact_control *cc) 122isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
136{ 123{
137 struct page *page; 124 unsigned long isolated, pfn, block_end_pfn, flags;
138 unsigned long high_pfn, low_pfn, pfn; 125 struct zone *zone = NULL;
139 unsigned long flags; 126 LIST_HEAD(freelist);
140 int nr_freepages = cc->nr_freepages;
141 struct list_head *freelist = &cc->freepages;
142
143 /*
144 * Initialise the free scanner. The starting point is where we last
145 * scanned from (or the end of the zone if starting). The low point
146 * is the end of the pageblock the migration scanner is using.
147 */
148 pfn = cc->free_pfn;
149 low_pfn = cc->migrate_pfn + pageblock_nr_pages;
150 127
151 /* 128 if (pfn_valid(start_pfn))
152 * Take care that if the migration scanner is at the end of the zone 129 zone = page_zone(pfn_to_page(start_pfn));
153 * that the free scanner does not accidentally move to the next zone
154 * in the next isolation cycle.
155 */
156 high_pfn = min(low_pfn, pfn);
157
158 /*
159 * Isolate free pages until enough are available to migrate the
160 * pages on cc->migratepages. We stop searching if the migrate
161 * and free page scanners meet or enough free pages are isolated.
162 */
163 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
164 pfn -= pageblock_nr_pages) {
165 unsigned long isolated;
166 130
167 if (!pfn_valid(pfn)) 131 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
168 continue; 132 if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn)))
133 break;
169 134
170 /* 135 /*
171 * Check for overlapping nodes/zones. It's possible on some 136 * On subsequent iterations ALIGN() is actually not needed,
172 * configurations to have a setup like 137 * but we keep it that we not to complicate the code.
173 * node0 node1 node0
174 * i.e. it's possible that all pages within a zones range of
175 * pages do not belong to a single zone.
176 */ 138 */
177 page = pfn_to_page(pfn); 139 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
178 if (page_zone(page) != zone) 140 block_end_pfn = min(block_end_pfn, end_pfn);
179 continue;
180 141
181 /* Check the block is suitable for migration */ 142 spin_lock_irqsave(&zone->lock, flags);
182 if (!suitable_migration_target(page)) 143 isolated = isolate_freepages_block(pfn, block_end_pfn,
183 continue; 144 &freelist, true);
145 spin_unlock_irqrestore(&zone->lock, flags);
184 146
185 /* 147 /*
186 * Found a block suitable for isolating free pages from. Now 148 * In strict mode, isolate_freepages_block() returns 0 if
187 * we disabled interrupts, double check things are ok and 149 * there are any holes in the block (ie. invalid PFNs or
188 * isolate the pages. This is to minimise the time IRQs 150 * non-free pages).
189 * are disabled
190 */ 151 */
191 isolated = 0; 152 if (!isolated)
192 spin_lock_irqsave(&zone->lock, flags); 153 break;
193 if (suitable_migration_target(page)) {
194 isolated = isolate_freepages_block(zone, pfn, freelist);
195 nr_freepages += isolated;
196 }
197 spin_unlock_irqrestore(&zone->lock, flags);
198 154
199 /* 155 /*
200 * Record the highest PFN we isolated pages from. When next 156 * If we managed to isolate pages, it is always (1 << n) *
201 * looking for free pages, the search will restart here as 157 * pageblock_nr_pages for some non-negative n. (Max order
202 * page migration may have returned some pages to the allocator 158 * page may span two pageblocks).
203 */ 159 */
204 if (isolated)
205 high_pfn = max(high_pfn, pfn);
206 } 160 }
207 161
208 /* split_free_page does not map the pages */ 162 /* split_free_page does not map the pages */
209 list_for_each_entry(page, freelist, lru) { 163 map_pages(&freelist);
210 arch_alloc_page(page, 0); 164
211 kernel_map_pages(page, 1, 1); 165 if (pfn < end_pfn) {
166 /* Loop terminated early, cleanup. */
167 release_freepages(&freelist);
168 return 0;
212 } 169 }
213 170
214 cc->free_pfn = high_pfn; 171 /* We don't use freelists for anything. */
215 cc->nr_freepages = nr_freepages; 172 return pfn;
216} 173}
217 174
218/* Update the number of anon and file isolated pages in the zone */ 175/* Update the number of anon and file isolated pages in the zone */
@@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone)
243 return isolated > (inactive + active) / 2; 200 return isolated > (inactive + active) / 2;
244} 201}
245 202
246/* possible outcome of isolate_migratepages */ 203/**
247typedef enum { 204 * isolate_migratepages_range() - isolate all migrate-able pages in range.
248 ISOLATE_ABORT, /* Abort compaction now */ 205 * @zone: Zone pages are in.
249 ISOLATE_NONE, /* No pages isolated, continue scanning */ 206 * @cc: Compaction control structure.
250 ISOLATE_SUCCESS, /* Pages isolated, migrate */ 207 * @low_pfn: The first PFN of the range.
251} isolate_migrate_t; 208 * @end_pfn: The one-past-the-last PFN of the range.
252 209 *
253/* 210 * Isolate all pages that can be migrated from the range specified by
254 * Isolate all pages that can be migrated from the block pointed to by 211 * [low_pfn, end_pfn). Returns zero if there is a fatal signal
255 * the migrate scanner within compact_control. 212 * pending), otherwise PFN of the first page that was not scanned
213 * (which may be both less, equal to or more then end_pfn).
214 *
215 * Assumes that cc->migratepages is empty and cc->nr_migratepages is
216 * zero.
217 *
218 * Apart from cc->migratepages and cc->nr_migratetypes this function
219 * does not modify any cc's fields, in particular it does not modify
220 * (or read for that matter) cc->migrate_pfn.
256 */ 221 */
257static isolate_migrate_t isolate_migratepages(struct zone *zone, 222unsigned long
258 struct compact_control *cc) 223isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
224 unsigned long low_pfn, unsigned long end_pfn)
259{ 225{
260 unsigned long low_pfn, end_pfn;
261 unsigned long last_pageblock_nr = 0, pageblock_nr; 226 unsigned long last_pageblock_nr = 0, pageblock_nr;
262 unsigned long nr_scanned = 0, nr_isolated = 0; 227 unsigned long nr_scanned = 0, nr_isolated = 0;
263 struct list_head *migratelist = &cc->migratepages; 228 struct list_head *migratelist = &cc->migratepages;
264 isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; 229 isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE;
265 230
266 /* Do not scan outside zone boundaries */
267 low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
268
269 /* Only scan within a pageblock boundary */
270 end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
271
272 /* Do not cross the free scanner or scan within a memory hole */
273 if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
274 cc->migrate_pfn = end_pfn;
275 return ISOLATE_NONE;
276 }
277
278 /* 231 /*
279 * Ensure that there are not too many pages isolated from the LRU 232 * Ensure that there are not too many pages isolated from the LRU
280 * list by either parallel reclaimers or compaction. If there are, 233 * list by either parallel reclaimers or compaction. If there are,
@@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
283 while (unlikely(too_many_isolated(zone))) { 236 while (unlikely(too_many_isolated(zone))) {
284 /* async migration should just abort */ 237 /* async migration should just abort */
285 if (!cc->sync) 238 if (!cc->sync)
286 return ISOLATE_ABORT; 239 return 0;
287 240
288 congestion_wait(BLK_RW_ASYNC, HZ/10); 241 congestion_wait(BLK_RW_ASYNC, HZ/10);
289 242
290 if (fatal_signal_pending(current)) 243 if (fatal_signal_pending(current))
291 return ISOLATE_ABORT; 244 return 0;
292 } 245 }
293 246
294 /* Time to isolate some pages for migration */ 247 /* Time to isolate some pages for migration */
@@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
351 */ 304 */
352 pageblock_nr = low_pfn >> pageblock_order; 305 pageblock_nr = low_pfn >> pageblock_order;
353 if (!cc->sync && last_pageblock_nr != pageblock_nr && 306 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
354 get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { 307 !migrate_async_suitable(get_pageblock_migratetype(page))) {
355 low_pfn += pageblock_nr_pages; 308 low_pfn += pageblock_nr_pages;
356 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 309 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
357 last_pageblock_nr = pageblock_nr; 310 last_pageblock_nr = pageblock_nr;
@@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
396 acct_isolated(zone, cc); 349 acct_isolated(zone, cc);
397 350
398 spin_unlock_irq(&zone->lru_lock); 351 spin_unlock_irq(&zone->lru_lock);
399 cc->migrate_pfn = low_pfn;
400 352
401 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 353 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
402 354
403 return ISOLATE_SUCCESS; 355 return low_pfn;
356}
357
358#endif /* CONFIG_COMPACTION || CONFIG_CMA */
359#ifdef CONFIG_COMPACTION
360
361/* Returns true if the page is within a block suitable for migration to */
362static bool suitable_migration_target(struct page *page)
363{
364
365 int migratetype = get_pageblock_migratetype(page);
366
367 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
368 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
369 return false;
370
371 /* If the page is a large free page, then allow migration */
372 if (PageBuddy(page) && page_order(page) >= pageblock_order)
373 return true;
374
375 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
376 if (migrate_async_suitable(migratetype))
377 return true;
378
379 /* Otherwise skip the block */
380 return false;
381}
382
383/*
384 * Based on information in the current compact_control, find blocks
385 * suitable for isolating free pages from and then isolate them.
386 */
387static void isolate_freepages(struct zone *zone,
388 struct compact_control *cc)
389{
390 struct page *page;
391 unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
392 unsigned long flags;
393 int nr_freepages = cc->nr_freepages;
394 struct list_head *freelist = &cc->freepages;
395
396 /*
397 * Initialise the free scanner. The starting point is where we last
398 * scanned from (or the end of the zone if starting). The low point
399 * is the end of the pageblock the migration scanner is using.
400 */
401 pfn = cc->free_pfn;
402 low_pfn = cc->migrate_pfn + pageblock_nr_pages;
403
404 /*
405 * Take care that if the migration scanner is at the end of the zone
406 * that the free scanner does not accidentally move to the next zone
407 * in the next isolation cycle.
408 */
409 high_pfn = min(low_pfn, pfn);
410
411 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
412
413 /*
414 * Isolate free pages until enough are available to migrate the
415 * pages on cc->migratepages. We stop searching if the migrate
416 * and free page scanners meet or enough free pages are isolated.
417 */
418 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
419 pfn -= pageblock_nr_pages) {
420 unsigned long isolated;
421
422 if (!pfn_valid(pfn))
423 continue;
424
425 /*
426 * Check for overlapping nodes/zones. It's possible on some
427 * configurations to have a setup like
428 * node0 node1 node0
429 * i.e. it's possible that all pages within a zones range of
430 * pages do not belong to a single zone.
431 */
432 page = pfn_to_page(pfn);
433 if (page_zone(page) != zone)
434 continue;
435
436 /* Check the block is suitable for migration */
437 if (!suitable_migration_target(page))
438 continue;
439
440 /*
441 * Found a block suitable for isolating free pages from. Now
442 * we disabled interrupts, double check things are ok and
443 * isolate the pages. This is to minimise the time IRQs
444 * are disabled
445 */
446 isolated = 0;
447 spin_lock_irqsave(&zone->lock, flags);
448 if (suitable_migration_target(page)) {
449 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
450 isolated = isolate_freepages_block(pfn, end_pfn,
451 freelist, false);
452 nr_freepages += isolated;
453 }
454 spin_unlock_irqrestore(&zone->lock, flags);
455
456 /*
457 * Record the highest PFN we isolated pages from. When next
458 * looking for free pages, the search will restart here as
459 * page migration may have returned some pages to the allocator
460 */
461 if (isolated)
462 high_pfn = max(high_pfn, pfn);
463 }
464
465 /* split_free_page does not map the pages */
466 map_pages(freelist);
467
468 cc->free_pfn = high_pfn;
469 cc->nr_freepages = nr_freepages;
404} 470}
405 471
406/* 472/*
@@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc)
449 cc->nr_freepages = nr_freepages; 515 cc->nr_freepages = nr_freepages;
450} 516}
451 517
518/* possible outcome of isolate_migratepages */
519typedef enum {
520 ISOLATE_ABORT, /* Abort compaction now */
521 ISOLATE_NONE, /* No pages isolated, continue scanning */
522 ISOLATE_SUCCESS, /* Pages isolated, migrate */
523} isolate_migrate_t;
524
525/*
526 * Isolate all pages that can be migrated from the block pointed to by
527 * the migrate scanner within compact_control.
528 */
529static isolate_migrate_t isolate_migratepages(struct zone *zone,
530 struct compact_control *cc)
531{
532 unsigned long low_pfn, end_pfn;
533
534 /* Do not scan outside zone boundaries */
535 low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
536
537 /* Only scan within a pageblock boundary */
538 end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
539
540 /* Do not cross the free scanner or scan within a memory hole */
541 if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
542 cc->migrate_pfn = end_pfn;
543 return ISOLATE_NONE;
544 }
545
546 /* Perform the isolation */
547 low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn);
548 if (!low_pfn)
549 return ISOLATE_ABORT;
550
551 cc->migrate_pfn = low_pfn;
552
553 return ISOLATE_SUCCESS;
554}
555
452static int compact_finished(struct zone *zone, 556static int compact_finished(struct zone *zone,
453 struct compact_control *cc) 557 struct compact_control *cc)
454{ 558{
@@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node)
795 return device_remove_file(&node->dev, &dev_attr_compact); 899 return device_remove_file(&node->dev, &dev_attr_compact);
796} 900}
797#endif /* CONFIG_SYSFS && CONFIG_NUMA */ 901#endif /* CONFIG_SYSFS && CONFIG_NUMA */
902
903#endif /* CONFIG_COMPACTION */
diff --git a/mm/internal.h b/mm/internal.h
index 2189af491783..aee4761cf9a9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order);
100extern bool is_free_buddy_page(struct page *page); 100extern bool is_free_buddy_page(struct page *page);
101#endif 101#endif
102 102
103#if defined CONFIG_COMPACTION || defined CONFIG_CMA
104
105/*
106 * in mm/compaction.c
107 */
108/*
109 * compact_control is used to track pages being migrated and the free pages
110 * they are being migrated to during memory compaction. The free_pfn starts
111 * at the end of a zone and migrate_pfn begins at the start. Movable pages
112 * are moved to the end of a zone during a compaction run and the run
113 * completes when free_pfn <= migrate_pfn
114 */
115struct compact_control {
116 struct list_head freepages; /* List of free pages to migrate to */
117 struct list_head migratepages; /* List of pages being migrated */
118 unsigned long nr_freepages; /* Number of isolated free pages */
119 unsigned long nr_migratepages; /* Number of pages to migrate */
120 unsigned long free_pfn; /* isolate_freepages search base */
121 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 bool sync; /* Synchronous migration */
123
124 int order; /* order a direct compactor needs */
125 int migratetype; /* MOVABLE, RECLAIMABLE etc */
126 struct zone *zone;
127};
128
129unsigned long
130isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn);
131unsigned long
132isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
133 unsigned long low_pfn, unsigned long end_pfn);
134
135#endif
103 136
104/* 137/*
105 * function for dealing with page's order in buddy system. 138 * function for dealing with page's order in buddy system.
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 97cc2733551a..c99ad4e6b88c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1404 /* Not a free page */ 1404 /* Not a free page */
1405 ret = 1; 1405 ret = 1;
1406 } 1406 }
1407 unset_migratetype_isolate(p); 1407 unset_migratetype_isolate(p, MIGRATE_MOVABLE);
1408 unlock_memory_hotplug(); 1408 unlock_memory_hotplug();
1409 return ret; 1409 return ret;
1410} 1410}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6629fafd6ce4..fc898cb4fe8f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn,
891 nr_pages = end_pfn - start_pfn; 891 nr_pages = end_pfn - start_pfn;
892 892
893 /* set above range as isolated */ 893 /* set above range as isolated */
894 ret = start_isolate_page_range(start_pfn, end_pfn); 894 ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
895 if (ret) 895 if (ret)
896 goto out; 896 goto out;
897 897
@@ -956,7 +956,7 @@ repeat:
956 We cannot do rollback at this point. */ 956 We cannot do rollback at this point. */
957 offline_isolated_pages(start_pfn, end_pfn); 957 offline_isolated_pages(start_pfn, end_pfn);
958 /* reset pagetype flags and makes migrate type to be MOVABLE */ 958 /* reset pagetype flags and makes migrate type to be MOVABLE */
959 undo_isolate_page_range(start_pfn, end_pfn); 959 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
960 /* removal success */ 960 /* removal success */
961 zone->present_pages -= offlined_pages; 961 zone->present_pages -= offlined_pages;
962 zone->zone_pgdat->node_present_pages -= offlined_pages; 962 zone->zone_pgdat->node_present_pages -= offlined_pages;
@@ -981,7 +981,7 @@ failed_removal:
981 start_pfn, end_pfn); 981 start_pfn, end_pfn);
982 memory_notify(MEM_CANCEL_OFFLINE, &arg); 982 memory_notify(MEM_CANCEL_OFFLINE, &arg);
983 /* pushback to free area */ 983 /* pushback to free area */
984 undo_isolate_page_range(start_pfn, end_pfn); 984 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
985 985
986out: 986out:
987 unlock_memory_hotplug(); 987 unlock_memory_hotplug();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1851df600438..bab8e3bc4202 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
57#include <linux/ftrace_event.h> 57#include <linux/ftrace_event.h>
58#include <linux/memcontrol.h> 58#include <linux/memcontrol.h>
59#include <linux/prefetch.h> 59#include <linux/prefetch.h>
60#include <linux/migrate.h>
60#include <linux/page-debug-flags.h> 61#include <linux/page-debug-flags.h>
61 62
62#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
@@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
513 * free pages of length of (1 << order) and marked with _mapcount -2. Page's 514 * free pages of length of (1 << order) and marked with _mapcount -2. Page's
514 * order is recorded in page_private(page) field. 515 * order is recorded in page_private(page) field.
515 * So when we are allocating or freeing one, we can derive the state of the 516 * So when we are allocating or freeing one, we can derive the state of the
516 * other. That is, if we allocate a small block, and both were 517 * other. That is, if we allocate a small block, and both were
517 * free, the remainder of the region must be split into blocks. 518 * free, the remainder of the region must be split into blocks.
518 * If a block is freed, and its buddy is also free, then this 519 * If a block is freed, and its buddy is also free, then this
519 * triggers coalescing into a block of larger size. 520 * triggers coalescing into a block of larger size.
520 * 521 *
521 * -- wli 522 * -- wli
522 */ 523 */
@@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
749 __free_pages(page, order); 750 __free_pages(page, order);
750} 751}
751 752
753#ifdef CONFIG_CMA
754/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
755void __init init_cma_reserved_pageblock(struct page *page)
756{
757 unsigned i = pageblock_nr_pages;
758 struct page *p = page;
759
760 do {
761 __ClearPageReserved(p);
762 set_page_count(p, 0);
763 } while (++p, --i);
764
765 set_page_refcounted(page);
766 set_pageblock_migratetype(page, MIGRATE_CMA);
767 __free_pages(page, pageblock_order);
768 totalram_pages += pageblock_nr_pages;
769}
770#endif
752 771
753/* 772/*
754 * The order of subdivision here is critical for the IO subsystem. 773 * The order of subdivision here is critical for the IO subsystem.
@@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
874 * This array describes the order lists are fallen back to when 893 * This array describes the order lists are fallen back to when
875 * the free lists for the desirable migrate type are depleted 894 * the free lists for the desirable migrate type are depleted
876 */ 895 */
877static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { 896static int fallbacks[MIGRATE_TYPES][4] = {
878 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 897 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
879 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 898 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
880 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, 899#ifdef CONFIG_CMA
881 [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ 900 [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
901 [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */
902#else
903 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
904#endif
905 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
906 [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */
882}; 907};
883 908
884/* 909/*
@@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
973 /* Find the largest possible block of pages in the other list */ 998 /* Find the largest possible block of pages in the other list */
974 for (current_order = MAX_ORDER-1; current_order >= order; 999 for (current_order = MAX_ORDER-1; current_order >= order;
975 --current_order) { 1000 --current_order) {
976 for (i = 0; i < MIGRATE_TYPES - 1; i++) { 1001 for (i = 0;; i++) {
977 migratetype = fallbacks[start_migratetype][i]; 1002 migratetype = fallbacks[start_migratetype][i];
978 1003
979 /* MIGRATE_RESERVE handled later if necessary */ 1004 /* MIGRATE_RESERVE handled later if necessary */
980 if (migratetype == MIGRATE_RESERVE) 1005 if (migratetype == MIGRATE_RESERVE)
981 continue; 1006 break;
982 1007
983 area = &(zone->free_area[current_order]); 1008 area = &(zone->free_area[current_order]);
984 if (list_empty(&area->free_list[migratetype])) 1009 if (list_empty(&area->free_list[migratetype]))
@@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
993 * pages to the preferred allocation list. If falling 1018 * pages to the preferred allocation list. If falling
994 * back for a reclaimable kernel allocation, be more 1019 * back for a reclaimable kernel allocation, be more
995 * aggressive about taking ownership of free pages 1020 * aggressive about taking ownership of free pages
1021 *
1022 * On the other hand, never change migration
1023 * type of MIGRATE_CMA pageblocks nor move CMA
1024 * pages on different free lists. We don't
1025 * want unmovable pages to be allocated from
1026 * MIGRATE_CMA areas.
996 */ 1027 */
997 if (unlikely(current_order >= (pageblock_order >> 1)) || 1028 if (!is_migrate_cma(migratetype) &&
998 start_migratetype == MIGRATE_RECLAIMABLE || 1029 (unlikely(current_order >= pageblock_order / 2) ||
999 page_group_by_mobility_disabled) { 1030 start_migratetype == MIGRATE_RECLAIMABLE ||
1000 unsigned long pages; 1031 page_group_by_mobility_disabled)) {
1032 int pages;
1001 pages = move_freepages_block(zone, page, 1033 pages = move_freepages_block(zone, page,
1002 start_migratetype); 1034 start_migratetype);
1003 1035
@@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
1015 rmv_page_order(page); 1047 rmv_page_order(page);
1016 1048
1017 /* Take ownership for orders >= pageblock_order */ 1049 /* Take ownership for orders >= pageblock_order */
1018 if (current_order >= pageblock_order) 1050 if (current_order >= pageblock_order &&
1051 !is_migrate_cma(migratetype))
1019 change_pageblock_range(page, current_order, 1052 change_pageblock_range(page, current_order,
1020 start_migratetype); 1053 start_migratetype);
1021 1054
1022 expand(zone, page, order, current_order, area, migratetype); 1055 expand(zone, page, order, current_order, area,
1056 is_migrate_cma(migratetype)
1057 ? migratetype : start_migratetype);
1023 1058
1024 trace_mm_page_alloc_extfrag(page, order, current_order, 1059 trace_mm_page_alloc_extfrag(page, order, current_order,
1025 start_migratetype, migratetype); 1060 start_migratetype, migratetype);
@@ -1061,17 +1096,17 @@ retry_reserve:
1061 return page; 1096 return page;
1062} 1097}
1063 1098
1064/* 1099/*
1065 * Obtain a specified number of elements from the buddy allocator, all under 1100 * Obtain a specified number of elements from the buddy allocator, all under
1066 * a single hold of the lock, for efficiency. Add them to the supplied list. 1101 * a single hold of the lock, for efficiency. Add them to the supplied list.
1067 * Returns the number of new pages which were placed at *list. 1102 * Returns the number of new pages which were placed at *list.
1068 */ 1103 */
1069static int rmqueue_bulk(struct zone *zone, unsigned int order, 1104static int rmqueue_bulk(struct zone *zone, unsigned int order,
1070 unsigned long count, struct list_head *list, 1105 unsigned long count, struct list_head *list,
1071 int migratetype, int cold) 1106 int migratetype, int cold)
1072{ 1107{
1073 int i; 1108 int mt = migratetype, i;
1074 1109
1075 spin_lock(&zone->lock); 1110 spin_lock(&zone->lock);
1076 for (i = 0; i < count; ++i) { 1111 for (i = 0; i < count; ++i) {
1077 struct page *page = __rmqueue(zone, order, migratetype); 1112 struct page *page = __rmqueue(zone, order, migratetype);
@@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1091 list_add(&page->lru, list); 1126 list_add(&page->lru, list);
1092 else 1127 else
1093 list_add_tail(&page->lru, list); 1128 list_add_tail(&page->lru, list);
1094 set_page_private(page, migratetype); 1129 if (IS_ENABLED(CONFIG_CMA)) {
1130 mt = get_pageblock_migratetype(page);
1131 if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
1132 mt = migratetype;
1133 }
1134 set_page_private(page, mt);
1095 list = &page->lru; 1135 list = &page->lru;
1096 } 1136 }
1097 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); 1137 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
@@ -1371,8 +1411,12 @@ int split_free_page(struct page *page)
1371 1411
1372 if (order >= pageblock_order - 1) { 1412 if (order >= pageblock_order - 1) {
1373 struct page *endpage = page + (1 << order) - 1; 1413 struct page *endpage = page + (1 << order) - 1;
1374 for (; page < endpage; page += pageblock_nr_pages) 1414 for (; page < endpage; page += pageblock_nr_pages) {
1375 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 1415 int mt = get_pageblock_migratetype(page);
1416 if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
1417 set_pageblock_migratetype(page,
1418 MIGRATE_MOVABLE);
1419 }
1376 } 1420 }
1377 1421
1378 return 1 << order; 1422 return 1 << order;
@@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2086} 2130}
2087#endif /* CONFIG_COMPACTION */ 2131#endif /* CONFIG_COMPACTION */
2088 2132
2089/* The really slow allocator path where we enter direct reclaim */ 2133/* Perform direct synchronous page reclaim */
2090static inline struct page * 2134static int
2091__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, 2135__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
2092 struct zonelist *zonelist, enum zone_type high_zoneidx, 2136 nodemask_t *nodemask)
2093 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2094 int migratetype, unsigned long *did_some_progress)
2095{ 2137{
2096 struct page *page = NULL;
2097 struct reclaim_state reclaim_state; 2138 struct reclaim_state reclaim_state;
2098 bool drained = false; 2139 int progress;
2099 2140
2100 cond_resched(); 2141 cond_resched();
2101 2142
@@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2106 reclaim_state.reclaimed_slab = 0; 2147 reclaim_state.reclaimed_slab = 0;
2107 current->reclaim_state = &reclaim_state; 2148 current->reclaim_state = &reclaim_state;
2108 2149
2109 *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); 2150 progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask);
2110 2151
2111 current->reclaim_state = NULL; 2152 current->reclaim_state = NULL;
2112 lockdep_clear_current_reclaim_state(); 2153 lockdep_clear_current_reclaim_state();
@@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2114 2155
2115 cond_resched(); 2156 cond_resched();
2116 2157
2158 return progress;
2159}
2160
2161/* The really slow allocator path where we enter direct reclaim */
2162static inline struct page *
2163__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2164 struct zonelist *zonelist, enum zone_type high_zoneidx,
2165 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2166 int migratetype, unsigned long *did_some_progress)
2167{
2168 struct page *page = NULL;
2169 bool drained = false;
2170
2171 *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
2172 nodemask);
2117 if (unlikely(!(*did_some_progress))) 2173 if (unlikely(!(*did_some_progress)))
2118 return NULL; 2174 return NULL;
2119 2175
@@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4301 init_waitqueue_head(&pgdat->kswapd_wait); 4357 init_waitqueue_head(&pgdat->kswapd_wait);
4302 pgdat->kswapd_max_order = 0; 4358 pgdat->kswapd_max_order = 0;
4303 pgdat_page_cgroup_init(pgdat); 4359 pgdat_page_cgroup_init(pgdat);
4304 4360
4305 for (j = 0; j < MAX_NR_ZONES; j++) { 4361 for (j = 0; j < MAX_NR_ZONES; j++) {
4306 struct zone *zone = pgdat->node_zones + j; 4362 struct zone *zone = pgdat->node_zones + j;
4307 unsigned long size, realsize, memmap_pages; 4363 unsigned long size, realsize, memmap_pages;
@@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void)
4976 calculate_totalreserve_pages(); 5032 calculate_totalreserve_pages();
4977} 5033}
4978 5034
4979/** 5035static void __setup_per_zone_wmarks(void)
4980 * setup_per_zone_wmarks - called when min_free_kbytes changes
4981 * or when memory is hot-{added|removed}
4982 *
4983 * Ensures that the watermark[min,low,high] values for each zone are set
4984 * correctly with respect to min_free_kbytes.
4985 */
4986void setup_per_zone_wmarks(void)
4987{ 5036{
4988 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); 5037 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
4989 unsigned long lowmem_pages = 0; 5038 unsigned long lowmem_pages = 0;
@@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void)
5030 5079
5031 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); 5080 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
5032 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); 5081 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
5082
5083 zone->watermark[WMARK_MIN] += cma_wmark_pages(zone);
5084 zone->watermark[WMARK_LOW] += cma_wmark_pages(zone);
5085 zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone);
5086
5033 setup_zone_migrate_reserve(zone); 5087 setup_zone_migrate_reserve(zone);
5034 spin_unlock_irqrestore(&zone->lock, flags); 5088 spin_unlock_irqrestore(&zone->lock, flags);
5035 } 5089 }
@@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void)
5038 calculate_totalreserve_pages(); 5092 calculate_totalreserve_pages();
5039} 5093}
5040 5094
5095/**
5096 * setup_per_zone_wmarks - called when min_free_kbytes changes
5097 * or when memory is hot-{added|removed}
5098 *
5099 * Ensures that the watermark[min,low,high] values for each zone are set
5100 * correctly with respect to min_free_kbytes.
5101 */
5102void setup_per_zone_wmarks(void)
5103{
5104 mutex_lock(&zonelists_mutex);
5105 __setup_per_zone_wmarks();
5106 mutex_unlock(&zonelists_mutex);
5107}
5108
5041/* 5109/*
5042 * The inactive anon list should be small enough that the VM never has to 5110 * The inactive anon list should be small enough that the VM never has to
5043 * do too much work, but large enough that each inactive page has a chance 5111 * do too much work, but large enough that each inactive page has a chance
@@ -5415,14 +5483,16 @@ static int
5415__count_immobile_pages(struct zone *zone, struct page *page, int count) 5483__count_immobile_pages(struct zone *zone, struct page *page, int count)
5416{ 5484{
5417 unsigned long pfn, iter, found; 5485 unsigned long pfn, iter, found;
5486 int mt;
5487
5418 /* 5488 /*
5419 * For avoiding noise data, lru_add_drain_all() should be called 5489 * For avoiding noise data, lru_add_drain_all() should be called
5420 * If ZONE_MOVABLE, the zone never contains immobile pages 5490 * If ZONE_MOVABLE, the zone never contains immobile pages
5421 */ 5491 */
5422 if (zone_idx(zone) == ZONE_MOVABLE) 5492 if (zone_idx(zone) == ZONE_MOVABLE)
5423 return true; 5493 return true;
5424 5494 mt = get_pageblock_migratetype(page);
5425 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) 5495 if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt))
5426 return true; 5496 return true;
5427 5497
5428 pfn = page_to_pfn(page); 5498 pfn = page_to_pfn(page);
@@ -5539,7 +5609,7 @@ out:
5539 return ret; 5609 return ret;
5540} 5610}
5541 5611
5542void unset_migratetype_isolate(struct page *page) 5612void unset_migratetype_isolate(struct page *page, unsigned migratetype)
5543{ 5613{
5544 struct zone *zone; 5614 struct zone *zone;
5545 unsigned long flags; 5615 unsigned long flags;
@@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page)
5547 spin_lock_irqsave(&zone->lock, flags); 5617 spin_lock_irqsave(&zone->lock, flags);
5548 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 5618 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
5549 goto out; 5619 goto out;
5550 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 5620 set_pageblock_migratetype(page, migratetype);
5551 move_freepages_block(zone, page, MIGRATE_MOVABLE); 5621 move_freepages_block(zone, page, migratetype);
5552out: 5622out:
5553 spin_unlock_irqrestore(&zone->lock, flags); 5623 spin_unlock_irqrestore(&zone->lock, flags);
5554} 5624}
5555 5625
5626#ifdef CONFIG_CMA
5627
5628static unsigned long pfn_max_align_down(unsigned long pfn)
5629{
5630 return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
5631 pageblock_nr_pages) - 1);
5632}
5633
5634static unsigned long pfn_max_align_up(unsigned long pfn)
5635{
5636 return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
5637 pageblock_nr_pages));
5638}
5639
5640static struct page *
5641__alloc_contig_migrate_alloc(struct page *page, unsigned long private,
5642 int **resultp)
5643{
5644 return alloc_page(GFP_HIGHUSER_MOVABLE);
5645}
5646
5647/* [start, end) must belong to a single zone. */
5648static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5649{
5650 /* This function is based on compact_zone() from compaction.c. */
5651
5652 unsigned long pfn = start;
5653 unsigned int tries = 0;
5654 int ret = 0;
5655
5656 struct compact_control cc = {
5657 .nr_migratepages = 0,
5658 .order = -1,
5659 .zone = page_zone(pfn_to_page(start)),
5660 .sync = true,
5661 };
5662 INIT_LIST_HEAD(&cc.migratepages);
5663
5664 migrate_prep_local();
5665
5666 while (pfn < end || !list_empty(&cc.migratepages)) {
5667 if (fatal_signal_pending(current)) {
5668 ret = -EINTR;
5669 break;
5670 }
5671
5672 if (list_empty(&cc.migratepages)) {
5673 cc.nr_migratepages = 0;
5674 pfn = isolate_migratepages_range(cc.zone, &cc,
5675 pfn, end);
5676 if (!pfn) {
5677 ret = -EINTR;
5678 break;
5679 }
5680 tries = 0;
5681 } else if (++tries == 5) {
5682 ret = ret < 0 ? ret : -EBUSY;
5683 break;
5684 }
5685
5686 ret = migrate_pages(&cc.migratepages,
5687 __alloc_contig_migrate_alloc,
5688 0, false, MIGRATE_SYNC);
5689 }
5690
5691 putback_lru_pages(&cc.migratepages);
5692 return ret > 0 ? 0 : ret;
5693}
5694
5695/*
5696 * Update zone's cma pages counter used for watermark level calculation.
5697 */
5698static inline void __update_cma_watermarks(struct zone *zone, int count)
5699{
5700 unsigned long flags;
5701 spin_lock_irqsave(&zone->lock, flags);
5702 zone->min_cma_pages += count;
5703 spin_unlock_irqrestore(&zone->lock, flags);
5704 setup_per_zone_wmarks();
5705}
5706
5707/*
5708 * Trigger memory pressure bump to reclaim some pages in order to be able to
5709 * allocate 'count' pages in single page units. Does similar work as
5710 *__alloc_pages_slowpath() function.
5711 */
5712static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
5713{
5714 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
5715 struct zonelist *zonelist = node_zonelist(0, gfp_mask);
5716 int did_some_progress = 0;
5717 int order = 1;
5718
5719 /*
5720 * Increase level of watermarks to force kswapd do his job
5721 * to stabilise at new watermark level.
5722 */
5723 __update_cma_watermarks(zone, count);
5724
5725 /* Obey watermarks as if the page was being allocated */
5726 while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) {
5727 wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
5728
5729 did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
5730 NULL);
5731 if (!did_some_progress) {
5732 /* Exhausted what can be done so it's blamo time */
5733 out_of_memory(zonelist, gfp_mask, order, NULL, false);
5734 }
5735 }
5736
5737 /* Restore original watermark levels. */
5738 __update_cma_watermarks(zone, -count);
5739
5740 return count;
5741}
5742
5743/**
5744 * alloc_contig_range() -- tries to allocate given range of pages
5745 * @start: start PFN to allocate
5746 * @end: one-past-the-last PFN to allocate
5747 * @migratetype: migratetype of the underlaying pageblocks (either
5748 * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks
5749 * in range must have the same migratetype and it must
5750 * be either of the two.
5751 *
5752 * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
5753 * aligned, however it's the caller's responsibility to guarantee that
5754 * we are the only thread that changes migrate type of pageblocks the
5755 * pages fall in.
5756 *
5757 * The PFN range must belong to a single zone.
5758 *
5759 * Returns zero on success or negative error code. On success all
5760 * pages which PFN is in [start, end) are allocated for the caller and
5761 * need to be freed with free_contig_range().
5762 */
5763int alloc_contig_range(unsigned long start, unsigned long end,
5764 unsigned migratetype)
5765{
5766 struct zone *zone = page_zone(pfn_to_page(start));
5767 unsigned long outer_start, outer_end;
5768 int ret = 0, order;
5769
5770 /*
5771 * What we do here is we mark all pageblocks in range as
5772 * MIGRATE_ISOLATE. Because pageblock and max order pages may
5773 * have different sizes, and due to the way page allocator
5774 * work, we align the range to biggest of the two pages so
5775 * that page allocator won't try to merge buddies from
5776 * different pageblocks and change MIGRATE_ISOLATE to some
5777 * other migration type.
5778 *
5779 * Once the pageblocks are marked as MIGRATE_ISOLATE, we
5780 * migrate the pages from an unaligned range (ie. pages that
5781 * we are interested in). This will put all the pages in
5782 * range back to page allocator as MIGRATE_ISOLATE.
5783 *
5784 * When this is done, we take the pages in range from page
5785 * allocator removing them from the buddy system. This way
5786 * page allocator will never consider using them.
5787 *
5788 * This lets us mark the pageblocks back as
5789 * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
5790 * aligned range but not in the unaligned, original range are
5791 * put back to page allocator so that buddy can use them.
5792 */
5793
5794 ret = start_isolate_page_range(pfn_max_align_down(start),
5795 pfn_max_align_up(end), migratetype);
5796 if (ret)
5797 goto done;
5798
5799 ret = __alloc_contig_migrate_range(start, end);
5800 if (ret)
5801 goto done;
5802
5803 /*
5804 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
5805 * aligned blocks that are marked as MIGRATE_ISOLATE. What's
5806 * more, all pages in [start, end) are free in page allocator.
5807 * What we are going to do is to allocate all pages from
5808 * [start, end) (that is remove them from page allocator).
5809 *
5810 * The only problem is that pages at the beginning and at the
5811 * end of interesting range may be not aligned with pages that
5812 * page allocator holds, ie. they can be part of higher order
5813 * pages. Because of this, we reserve the bigger range and
5814 * once this is done free the pages we are not interested in.
5815 *
5816 * We don't have to hold zone->lock here because the pages are
5817 * isolated thus they won't get removed from buddy.
5818 */
5819
5820 lru_add_drain_all();
5821 drain_all_pages();
5822
5823 order = 0;
5824 outer_start = start;
5825 while (!PageBuddy(pfn_to_page(outer_start))) {
5826 if (++order >= MAX_ORDER) {
5827 ret = -EBUSY;
5828 goto done;
5829 }
5830 outer_start &= ~0UL << order;
5831 }
5832
5833 /* Make sure the range is really isolated. */
5834 if (test_pages_isolated(outer_start, end)) {
5835 pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
5836 outer_start, end);
5837 ret = -EBUSY;
5838 goto done;
5839 }
5840
5841 /*
5842 * Reclaim enough pages to make sure that contiguous allocation
5843 * will not starve the system.
5844 */
5845 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
5846
5847 /* Grab isolated pages from freelists. */
5848 outer_end = isolate_freepages_range(outer_start, end);
5849 if (!outer_end) {
5850 ret = -EBUSY;
5851 goto done;
5852 }
5853
5854 /* Free head and tail (if any) */
5855 if (start != outer_start)
5856 free_contig_range(outer_start, start - outer_start);
5857 if (end != outer_end)
5858 free_contig_range(end, outer_end - end);
5859
5860done:
5861 undo_isolate_page_range(pfn_max_align_down(start),
5862 pfn_max_align_up(end), migratetype);
5863 return ret;
5864}
5865
5866void free_contig_range(unsigned long pfn, unsigned nr_pages)
5867{
5868 for (; nr_pages--; ++pfn)
5869 __free_page(pfn_to_page(pfn));
5870}
5871#endif
5872
5556#ifdef CONFIG_MEMORY_HOTREMOVE 5873#ifdef CONFIG_MEMORY_HOTREMOVE
5557/* 5874/*
5558 * All pages in the range must be isolated before calling this. 5875 * All pages in the range must be isolated before calling this.
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 4ae42bb40892..c9f04774f2b8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
24 * to be MIGRATE_ISOLATE. 24 * to be MIGRATE_ISOLATE.
25 * @start_pfn: The lower PFN of the range to be isolated. 25 * @start_pfn: The lower PFN of the range to be isolated.
26 * @end_pfn: The upper PFN of the range to be isolated. 26 * @end_pfn: The upper PFN of the range to be isolated.
27 * @migratetype: migrate type to set in error recovery.
27 * 28 *
28 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 29 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
29 * the range will never be allocated. Any free pages and pages freed in the 30 * the range will never be allocated. Any free pages and pages freed in the
@@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
32 * start_pfn/end_pfn must be aligned to pageblock_order. 33 * start_pfn/end_pfn must be aligned to pageblock_order.
33 * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 34 * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
34 */ 35 */
35int 36int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
36start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 37 unsigned migratetype)
37{ 38{
38 unsigned long pfn; 39 unsigned long pfn;
39 unsigned long undo_pfn; 40 unsigned long undo_pfn;
@@ -56,7 +57,7 @@ undo:
56 for (pfn = start_pfn; 57 for (pfn = start_pfn;
57 pfn < undo_pfn; 58 pfn < undo_pfn;
58 pfn += pageblock_nr_pages) 59 pfn += pageblock_nr_pages)
59 unset_migratetype_isolate(pfn_to_page(pfn)); 60 unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
60 61
61 return -EBUSY; 62 return -EBUSY;
62} 63}
@@ -64,8 +65,8 @@ undo:
64/* 65/*
65 * Make isolated pages available again. 66 * Make isolated pages available again.
66 */ 67 */
67int 68int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
68undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 69 unsigned migratetype)
69{ 70{
70 unsigned long pfn; 71 unsigned long pfn;
71 struct page *page; 72 struct page *page;
@@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
77 page = __first_valid_page(pfn, pageblock_nr_pages); 78 page = __first_valid_page(pfn, pageblock_nr_pages);
78 if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 79 if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
79 continue; 80 continue;
80 unset_migratetype_isolate(page); 81 unset_migratetype_isolate(page, migratetype);
81 } 82 }
82 return 0; 83 return 0;
83} 84}
@@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
86 * all pages in [start_pfn...end_pfn) must be in the same zone. 87 * all pages in [start_pfn...end_pfn) must be in the same zone.
87 * zone->lock must be held before call this. 88 * zone->lock must be held before call this.
88 * 89 *
89 * Returns 1 if all pages in the range is isolated. 90 * Returns 1 if all pages in the range are isolated.
90 */ 91 */
91static int 92static int
92__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) 93__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7db1b9bab492..0dad31dc1618 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
613 "Reclaimable", 613 "Reclaimable",
614 "Movable", 614 "Movable",
615 "Reserve", 615 "Reserve",
616#ifdef CONFIG_CMA
617 "CMA",
618#endif
616 "Isolate", 619 "Isolate",
617}; 620};
618 621