aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 12:18:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 12:18:59 -0400
commitd484864dd96e1830e7689510597707c1df8cd681 (patch)
tree51551708ba3f26d05575fa91daaf0c0d970a77c3
parentbe87cfb47c5c740f7b17929bcd7c480b228513e0 (diff)
parent0f51596bd39a5c928307ffcffc9ba07f90f42a8b (diff)
Merge branch 'for-linus' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
Pull CMA and ARM DMA-mapping updates from Marek Szyprowski: "These patches contain two major updates for DMA mapping subsystem (mainly for ARM architecture). First one is Contiguous Memory Allocator (CMA) which makes it possible for device drivers to allocate big contiguous chunks of memory after the system has booted. The main difference from the similar frameworks is the fact that CMA allows to transparently reuse the memory region reserved for the big chunk allocation as a system memory, so no memory is wasted when no big chunk is allocated. Once the alloc request is issued, the framework migrates system pages to create space for the required big chunk of physically contiguous memory. For more information one can refer to nice LWN articles: - 'A reworked contiguous memory allocator': http://lwn.net/Articles/447405/ - 'CMA and ARM': http://lwn.net/Articles/450286/ - 'A deep dive into CMA': http://lwn.net/Articles/486301/ - and the following thread with the patches and links to all previous versions: https://lkml.org/lkml/2012/4/3/204 The main client for this new framework is ARM DMA-mapping subsystem. The second part provides a complete redesign in ARM DMA-mapping subsystem. The core implementation has been changed to use common struct dma_map_ops based infrastructure with the recent updates for new dma attributes merged in v3.4-rc2. This allows to use more than one implementation of dma-mapping calls and change/select them on the struct device basis. The first client of this new infractructure is dmabounce implementation which has been completely cut out of the core, common code. The last patch of this redesign update introduces a new, experimental implementation of dma-mapping calls on top of generic IOMMU framework. This lets ARM sub-platform to transparently use IOMMU for DMA-mapping calls if one provides required IOMMU hardware. For more information please refer to the following thread: http://www.spinics.net/lists/arm-kernel/msg175729.html The last patch merges changes from both updates and provides a resolution for the conflicts which cannot be avoided when patches have been applied on the same files (mainly arch/arm/mm/dma-mapping.c)." Acked by Andrew Morton <akpm@linux-foundation.org>: "Yup, this one please. It's had much work, plenty of review and I think even Russell is happy with it." * 'for-linus' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping: (28 commits) ARM: dma-mapping: use PMD size for section unmap cma: fix migration mode ARM: integrate CMA with DMA-mapping subsystem X86: integrate CMA with DMA-mapping subsystem drivers: add Contiguous Memory Allocator mm: trigger page reclaim in alloc_contig_range() to stabilise watermarks mm: extract reclaim code from __alloc_pages_direct_reclaim() mm: Serialize access to min_free_kbytes mm: page_isolation: MIGRATE_CMA isolation functions added mm: mmzone: MIGRATE_CMA migration type added mm: page_alloc: change fallbacks array handling mm: page_alloc: introduce alloc_contig_range() mm: compaction: export some of the functions mm: compaction: introduce isolate_freepages_range() mm: compaction: introduce map_pages() mm: compaction: introduce isolate_migratepages_range() mm: page_alloc: remove trailing whitespace ARM: dma-mapping: add support for IOMMU mapper ARM: dma-mapping: use alloc, mmap, free from dma_ops ARM: dma-mapping: remove redundant code and do the cleanup ... Conflicts: arch/x86/include/asm/dma-mapping.h
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/Kconfig11
-rw-r--r--arch/arm/common/dmabounce.c84
-rw-r--r--arch/arm/include/asm/device.h4
-rw-r--r--arch/arm/include/asm/dma-contiguous.h15
-rw-r--r--arch/arm/include/asm/dma-iommu.h34
-rw-r--r--arch/arm/include/asm/dma-mapping.h407
-rw-r--r--arch/arm/include/asm/mach/map.h1
-rw-r--r--arch/arm/kernel/setup.c9
-rw-r--r--arch/arm/mm/dma-mapping.c1348
-rw-r--r--arch/arm/mm/init.c23
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/mm/mmu.c31
-rw-r--r--arch/arm/mm/vmregion.h2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/dma-contiguous.h13
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/kernel/pci-dma.c18
-rw-r--r--arch/x86/kernel/pci-nommu.c8
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--drivers/base/Kconfig89
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/base/dma-coherent.c42
-rw-r--r--drivers/base/dma-contiguous.c401
-rw-r--r--include/asm-generic/dma-coherent.h4
-rw-r--r--include/asm-generic/dma-contiguous.h28
-rw-r--r--include/linux/device.h4
-rw-r--r--include/linux/dma-contiguous.h110
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/mmzone.h47
-rw-r--r--include/linux/page-isolation.h18
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/Makefile3
-rw-r--r--mm/compaction.c418
-rw-r--r--mm/internal.h33
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/page_alloc.c409
-rw-r--r--mm/page_isolation.c15
-rw-r--r--mm/vmstat.c3
41 files changed, 2898 insertions, 780 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b69cfdc12112..f1959b7d13d0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
508 Also note the kernel might malfunction if you disable 508 Also note the kernel might malfunction if you disable
509 some critical bits. 509 some critical bits.
510 510
511 cma=nn[MG] [ARM,KNL]
512 Sets the size of kernel global memory area for contiguous
513 memory allocations. For more information, see
514 include/linux/dma-contiguous.h
515
511 cmo_free_hint= [PPC] Format: { yes | no } 516 cmo_free_hint= [PPC] Format: { yes | no }
512 Specify whether pages are marked as being inactive 517 Specify whether pages are marked as being inactive
513 when they are freed. This is used in CMO environments 518 when they are freed. This is used in CMO environments
@@ -515,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
515 a hypervisor. 520 a hypervisor.
516 Default: yes 521 Default: yes
517 522
523 coherent_pool=nn[KMG] [ARM,KNL]
524 Sets the size of memory pool for coherent, atomic dma
525 allocations if Contiguous Memory Allocator (CMA) is used.
526
518 code_bytes [X86] How many bytes of object code to print 527 code_bytes [X86] How many bytes of object code to print
519 in an oops report. 528 in an oops report.
520 Range: 0 - 8192 529 Range: 0 - 8192
diff --git a/arch/Kconfig b/arch/Kconfig
index e9a910876cda..8c3d957fa8e2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -159,6 +159,9 @@ config HAVE_ARCH_TRACEHOOK
159config HAVE_DMA_ATTRS 159config HAVE_DMA_ATTRS
160 bool 160 bool
161 161
162config HAVE_DMA_CONTIGUOUS
163 bool
164
162config USE_GENERIC_SMP_HELPERS 165config USE_GENERIC_SMP_HELPERS
163 bool 166 bool
164 167
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5458aa9db067..3ca1ba981efb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,6 +5,9 @@ config ARM
5 select HAVE_AOUT 5 select HAVE_AOUT
6 select HAVE_DMA_API_DEBUG 6 select HAVE_DMA_API_DEBUG
7 select HAVE_IDE if PCI || ISA || PCMCIA 7 select HAVE_IDE if PCI || ISA || PCMCIA
8 select HAVE_DMA_ATTRS
9 select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
10 select CMA if (CPU_V6 || CPU_V6K || CPU_V7)
8 select HAVE_MEMBLOCK 11 select HAVE_MEMBLOCK
9 select RTC_LIB 12 select RTC_LIB
10 select SYS_SUPPORTS_APM_EMULATION 13 select SYS_SUPPORTS_APM_EMULATION
@@ -54,6 +57,14 @@ config ARM
54config ARM_HAS_SG_CHAIN 57config ARM_HAS_SG_CHAIN
55 bool 58 bool
56 59
60config NEED_SG_DMA_LENGTH
61 bool
62
63config ARM_DMA_USE_IOMMU
64 select NEED_SG_DMA_LENGTH
65 select ARM_HAS_SG_CHAIN
66 bool
67
57config HAVE_PWM 68config HAVE_PWM
58 bool 69 bool
59 70
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 595ecd290ebf..9d7eb530f95f 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -173,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_
173 read_lock_irqsave(&device_info->lock, flags); 173 read_lock_irqsave(&device_info->lock, flags);
174 174
175 list_for_each_entry(b, &device_info->safe_buffers, node) 175 list_for_each_entry(b, &device_info->safe_buffers, node)
176 if (b->safe_dma_addr == safe_dma_addr) { 176 if (b->safe_dma_addr <= safe_dma_addr &&
177 b->safe_dma_addr + b->size > safe_dma_addr) {
177 rb = b; 178 rb = b;
178 break; 179 break;
179 } 180 }
@@ -254,7 +255,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
254 if (buf == NULL) { 255 if (buf == NULL) {
255 dev_err(dev, "%s: unable to map unsafe buffer %p!\n", 256 dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
256 __func__, ptr); 257 __func__, ptr);
257 return ~0; 258 return DMA_ERROR_CODE;
258 } 259 }
259 260
260 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", 261 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
@@ -307,8 +308,9 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
307 * substitute the safe buffer for the unsafe one. 308 * substitute the safe buffer for the unsafe one.
308 * (basically move the buffer from an unsafe area to a safe one) 309 * (basically move the buffer from an unsafe area to a safe one)
309 */ 310 */
310dma_addr_t __dma_map_page(struct device *dev, struct page *page, 311static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
311 unsigned long offset, size_t size, enum dma_data_direction dir) 312 unsigned long offset, size_t size, enum dma_data_direction dir,
313 struct dma_attrs *attrs)
312{ 314{
313 dma_addr_t dma_addr; 315 dma_addr_t dma_addr;
314 int ret; 316 int ret;
@@ -320,21 +322,20 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page,
320 322
321 ret = needs_bounce(dev, dma_addr, size); 323 ret = needs_bounce(dev, dma_addr, size);
322 if (ret < 0) 324 if (ret < 0)
323 return ~0; 325 return DMA_ERROR_CODE;
324 326
325 if (ret == 0) { 327 if (ret == 0) {
326 __dma_page_cpu_to_dev(page, offset, size, dir); 328 arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
327 return dma_addr; 329 return dma_addr;
328 } 330 }
329 331
330 if (PageHighMem(page)) { 332 if (PageHighMem(page)) {
331 dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); 333 dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n");
332 return ~0; 334 return DMA_ERROR_CODE;
333 } 335 }
334 336
335 return map_single(dev, page_address(page) + offset, size, dir); 337 return map_single(dev, page_address(page) + offset, size, dir);
336} 338}
337EXPORT_SYMBOL(__dma_map_page);
338 339
339/* 340/*
340 * see if a mapped address was really a "safe" buffer and if so, copy 341 * see if a mapped address was really a "safe" buffer and if so, copy
@@ -342,8 +343,8 @@ EXPORT_SYMBOL(__dma_map_page);
342 * the safe buffer. (basically return things back to the way they 343 * the safe buffer. (basically return things back to the way they
343 * should be) 344 * should be)
344 */ 345 */
345void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, 346static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
346 enum dma_data_direction dir) 347 enum dma_data_direction dir, struct dma_attrs *attrs)
347{ 348{
348 struct safe_buffer *buf; 349 struct safe_buffer *buf;
349 350
@@ -352,19 +353,18 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
352 353
353 buf = find_safe_buffer_dev(dev, dma_addr, __func__); 354 buf = find_safe_buffer_dev(dev, dma_addr, __func__);
354 if (!buf) { 355 if (!buf) {
355 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)), 356 arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
356 dma_addr & ~PAGE_MASK, size, dir);
357 return; 357 return;
358 } 358 }
359 359
360 unmap_single(dev, buf, size, dir); 360 unmap_single(dev, buf, size, dir);
361} 361}
362EXPORT_SYMBOL(__dma_unmap_page);
363 362
364int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, 363static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
365 unsigned long off, size_t sz, enum dma_data_direction dir) 364 size_t sz, enum dma_data_direction dir)
366{ 365{
367 struct safe_buffer *buf; 366 struct safe_buffer *buf;
367 unsigned long off;
368 368
369 dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", 369 dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n",
370 __func__, addr, off, sz, dir); 370 __func__, addr, off, sz, dir);
@@ -373,6 +373,8 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
373 if (!buf) 373 if (!buf)
374 return 1; 374 return 1;
375 375
376 off = addr - buf->safe_dma_addr;
377
376 BUG_ON(buf->direction != dir); 378 BUG_ON(buf->direction != dir);
377 379
378 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", 380 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
@@ -388,12 +390,21 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
388 } 390 }
389 return 0; 391 return 0;
390} 392}
391EXPORT_SYMBOL(dmabounce_sync_for_cpu);
392 393
393int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, 394static void dmabounce_sync_for_cpu(struct device *dev,
394 unsigned long off, size_t sz, enum dma_data_direction dir) 395 dma_addr_t handle, size_t size, enum dma_data_direction dir)
396{
397 if (!__dmabounce_sync_for_cpu(dev, handle, size, dir))
398 return;
399
400 arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir);
401}
402
403static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
404 size_t sz, enum dma_data_direction dir)
395{ 405{
396 struct safe_buffer *buf; 406 struct safe_buffer *buf;
407 unsigned long off;
397 408
398 dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", 409 dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n",
399 __func__, addr, off, sz, dir); 410 __func__, addr, off, sz, dir);
@@ -402,6 +413,8 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
402 if (!buf) 413 if (!buf)
403 return 1; 414 return 1;
404 415
416 off = addr - buf->safe_dma_addr;
417
405 BUG_ON(buf->direction != dir); 418 BUG_ON(buf->direction != dir);
406 419
407 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", 420 dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
@@ -417,7 +430,38 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
417 } 430 }
418 return 0; 431 return 0;
419} 432}
420EXPORT_SYMBOL(dmabounce_sync_for_device); 433
434static void dmabounce_sync_for_device(struct device *dev,
435 dma_addr_t handle, size_t size, enum dma_data_direction dir)
436{
437 if (!__dmabounce_sync_for_device(dev, handle, size, dir))
438 return;
439
440 arm_dma_ops.sync_single_for_device(dev, handle, size, dir);
441}
442
443static int dmabounce_set_mask(struct device *dev, u64 dma_mask)
444{
445 if (dev->archdata.dmabounce)
446 return 0;
447
448 return arm_dma_ops.set_dma_mask(dev, dma_mask);
449}
450
451static struct dma_map_ops dmabounce_ops = {
452 .alloc = arm_dma_alloc,
453 .free = arm_dma_free,
454 .mmap = arm_dma_mmap,
455 .map_page = dmabounce_map_page,
456 .unmap_page = dmabounce_unmap_page,
457 .sync_single_for_cpu = dmabounce_sync_for_cpu,
458 .sync_single_for_device = dmabounce_sync_for_device,
459 .map_sg = arm_dma_map_sg,
460 .unmap_sg = arm_dma_unmap_sg,
461 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
462 .sync_sg_for_device = arm_dma_sync_sg_for_device,
463 .set_dma_mask = dmabounce_set_mask,
464};
421 465
422static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, 466static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev,
423 const char *name, unsigned long size) 467 const char *name, unsigned long size)
@@ -479,6 +523,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
479#endif 523#endif
480 524
481 dev->archdata.dmabounce = device_info; 525 dev->archdata.dmabounce = device_info;
526 set_dma_ops(dev, &dmabounce_ops);
482 527
483 dev_info(dev, "dmabounce: registered device\n"); 528 dev_info(dev, "dmabounce: registered device\n");
484 529
@@ -497,6 +542,7 @@ void dmabounce_unregister_dev(struct device *dev)
497 struct dmabounce_device_info *device_info = dev->archdata.dmabounce; 542 struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
498 543
499 dev->archdata.dmabounce = NULL; 544 dev->archdata.dmabounce = NULL;
545 set_dma_ops(dev, NULL);
500 546
501 if (!device_info) { 547 if (!device_info) {
502 dev_warn(dev, 548 dev_warn(dev,
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 7aa368003b05..b69c0d3285f8 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -7,12 +7,16 @@
7#define ASMARM_DEVICE_H 7#define ASMARM_DEVICE_H
8 8
9struct dev_archdata { 9struct dev_archdata {
10 struct dma_map_ops *dma_ops;
10#ifdef CONFIG_DMABOUNCE 11#ifdef CONFIG_DMABOUNCE
11 struct dmabounce_device_info *dmabounce; 12 struct dmabounce_device_info *dmabounce;
12#endif 13#endif
13#ifdef CONFIG_IOMMU_API 14#ifdef CONFIG_IOMMU_API
14 void *iommu; /* private IOMMU data */ 15 void *iommu; /* private IOMMU data */
15#endif 16#endif
17#ifdef CONFIG_ARM_DMA_USE_IOMMU
18 struct dma_iommu_mapping *mapping;
19#endif
16}; 20};
17 21
18struct omap_device; 22struct omap_device;
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
new file mode 100644
index 000000000000..3ed37b4d93da
--- /dev/null
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -0,0 +1,15 @@
1#ifndef ASMARM_DMA_CONTIGUOUS_H
2#define ASMARM_DMA_CONTIGUOUS_H
3
4#ifdef __KERNEL__
5#ifdef CONFIG_CMA
6
7#include <linux/types.h>
8#include <asm-generic/dma-contiguous.h>
9
10void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
11
12#endif
13#endif
14
15#endif
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
new file mode 100644
index 000000000000..799b09409fad
--- /dev/null
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -0,0 +1,34 @@
1#ifndef ASMARM_DMA_IOMMU_H
2#define ASMARM_DMA_IOMMU_H
3
4#ifdef __KERNEL__
5
6#include <linux/mm_types.h>
7#include <linux/scatterlist.h>
8#include <linux/dma-debug.h>
9#include <linux/kmemcheck.h>
10
11struct dma_iommu_mapping {
12 /* iommu specific data */
13 struct iommu_domain *domain;
14
15 void *bitmap;
16 size_t bits;
17 unsigned int order;
18 dma_addr_t base;
19
20 spinlock_t lock;
21 struct kref kref;
22};
23
24struct dma_iommu_mapping *
25arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
26 int order);
27
28void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
29
30int arm_iommu_attach_device(struct device *dev,
31 struct dma_iommu_mapping *mapping);
32
33#endif /* __KERNEL__ */
34#endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index cb3b7c981c4b..bbef15d04890 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -5,11 +5,35 @@
5 5
6#include <linux/mm_types.h> 6#include <linux/mm_types.h>
7#include <linux/scatterlist.h> 7#include <linux/scatterlist.h>
8#include <linux/dma-attrs.h>
8#include <linux/dma-debug.h> 9#include <linux/dma-debug.h>
9 10
10#include <asm-generic/dma-coherent.h> 11#include <asm-generic/dma-coherent.h>
11#include <asm/memory.h> 12#include <asm/memory.h>
12 13
14#define DMA_ERROR_CODE (~0)
15extern struct dma_map_ops arm_dma_ops;
16
17static inline struct dma_map_ops *get_dma_ops(struct device *dev)
18{
19 if (dev && dev->archdata.dma_ops)
20 return dev->archdata.dma_ops;
21 return &arm_dma_ops;
22}
23
24static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
25{
26 BUG_ON(!dev);
27 dev->archdata.dma_ops = ops;
28}
29
30#include <asm-generic/dma-mapping-common.h>
31
32static inline int dma_set_mask(struct device *dev, u64 mask)
33{
34 return get_dma_ops(dev)->set_dma_mask(dev, mask);
35}
36
13#ifdef __arch_page_to_dma 37#ifdef __arch_page_to_dma
14#error Please update to __arch_pfn_to_dma 38#error Please update to __arch_pfn_to_dma
15#endif 39#endif
@@ -62,68 +86,11 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
62#endif 86#endif
63 87
64/* 88/*
65 * The DMA API is built upon the notion of "buffer ownership". A buffer
66 * is either exclusively owned by the CPU (and therefore may be accessed
67 * by it) or exclusively owned by the DMA device. These helper functions
68 * represent the transitions between these two ownership states.
69 *
70 * Note, however, that on later ARMs, this notion does not work due to
71 * speculative prefetches. We model our approach on the assumption that
72 * the CPU does do speculative prefetches, which means we clean caches
73 * before transfers and delay cache invalidation until transfer completion.
74 *
75 * Private support functions: these are not part of the API and are
76 * liable to change. Drivers must not use these.
77 */
78static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size,
79 enum dma_data_direction dir)
80{
81 extern void ___dma_single_cpu_to_dev(const void *, size_t,
82 enum dma_data_direction);
83
84 if (!arch_is_coherent())
85 ___dma_single_cpu_to_dev(kaddr, size, dir);
86}
87
88static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size,
89 enum dma_data_direction dir)
90{
91 extern void ___dma_single_dev_to_cpu(const void *, size_t,
92 enum dma_data_direction);
93
94 if (!arch_is_coherent())
95 ___dma_single_dev_to_cpu(kaddr, size, dir);
96}
97
98static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
99 size_t size, enum dma_data_direction dir)
100{
101 extern void ___dma_page_cpu_to_dev(struct page *, unsigned long,
102 size_t, enum dma_data_direction);
103
104 if (!arch_is_coherent())
105 ___dma_page_cpu_to_dev(page, off, size, dir);
106}
107
108static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
109 size_t size, enum dma_data_direction dir)
110{
111 extern void ___dma_page_dev_to_cpu(struct page *, unsigned long,
112 size_t, enum dma_data_direction);
113
114 if (!arch_is_coherent())
115 ___dma_page_dev_to_cpu(page, off, size, dir);
116}
117
118extern int dma_supported(struct device *, u64);
119extern int dma_set_mask(struct device *, u64);
120
121/*
122 * DMA errors are defined by all-bits-set in the DMA address. 89 * DMA errors are defined by all-bits-set in the DMA address.
123 */ 90 */
124static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 91static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
125{ 92{
126 return dma_addr == ~0; 93 return dma_addr == DMA_ERROR_CODE;
127} 94}
128 95
129/* 96/*
@@ -141,69 +108,118 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size,
141{ 108{
142} 109}
143 110
111extern int dma_supported(struct device *dev, u64 mask);
112
144/** 113/**
145 * dma_alloc_coherent - allocate consistent memory for DMA 114 * arm_dma_alloc - allocate consistent memory for DMA
146 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 115 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
147 * @size: required memory size 116 * @size: required memory size
148 * @handle: bus-specific DMA address 117 * @handle: bus-specific DMA address
118 * @attrs: optinal attributes that specific mapping properties
149 * 119 *
150 * Allocate some uncached, unbuffered memory for a device for 120 * Allocate some memory for a device for performing DMA. This function
151 * performing DMA. This function allocates pages, and will 121 * allocates pages, and will return the CPU-viewed address, and sets @handle
152 * return the CPU-viewed address, and sets @handle to be the 122 * to be the device-viewed address.
153 * device-viewed address.
154 */ 123 */
155extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); 124extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
125 gfp_t gfp, struct dma_attrs *attrs);
126
127#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
128
129static inline void *dma_alloc_attrs(struct device *dev, size_t size,
130 dma_addr_t *dma_handle, gfp_t flag,
131 struct dma_attrs *attrs)
132{
133 struct dma_map_ops *ops = get_dma_ops(dev);
134 void *cpu_addr;
135 BUG_ON(!ops);
136
137 cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
138 debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
139 return cpu_addr;
140}
156 141
157/** 142/**
158 * dma_free_coherent - free memory allocated by dma_alloc_coherent 143 * arm_dma_free - free memory allocated by arm_dma_alloc
159 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 144 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
160 * @size: size of memory originally requested in dma_alloc_coherent 145 * @size: size of memory originally requested in dma_alloc_coherent
161 * @cpu_addr: CPU-view address returned from dma_alloc_coherent 146 * @cpu_addr: CPU-view address returned from dma_alloc_coherent
162 * @handle: device-view address returned from dma_alloc_coherent 147 * @handle: device-view address returned from dma_alloc_coherent
148 * @attrs: optinal attributes that specific mapping properties
163 * 149 *
164 * Free (and unmap) a DMA buffer previously allocated by 150 * Free (and unmap) a DMA buffer previously allocated by
165 * dma_alloc_coherent(). 151 * arm_dma_alloc().
166 * 152 *
167 * References to memory and mappings associated with cpu_addr/handle 153 * References to memory and mappings associated with cpu_addr/handle
168 * during and after this call executing are illegal. 154 * during and after this call executing are illegal.
169 */ 155 */
170extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); 156extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
157 dma_addr_t handle, struct dma_attrs *attrs);
158
159#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
160
161static inline void dma_free_attrs(struct device *dev, size_t size,
162 void *cpu_addr, dma_addr_t dma_handle,
163 struct dma_attrs *attrs)
164{
165 struct dma_map_ops *ops = get_dma_ops(dev);
166 BUG_ON(!ops);
167
168 debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
169 ops->free(dev, size, cpu_addr, dma_handle, attrs);
170}
171 171
172/** 172/**
173 * dma_mmap_coherent - map a coherent DMA allocation into user space 173 * arm_dma_mmap - map a coherent DMA allocation into user space
174 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 174 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
175 * @vma: vm_area_struct describing requested user mapping 175 * @vma: vm_area_struct describing requested user mapping
176 * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent 176 * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent
177 * @handle: device-view address returned from dma_alloc_coherent 177 * @handle: device-view address returned from dma_alloc_coherent
178 * @size: size of memory originally requested in dma_alloc_coherent 178 * @size: size of memory originally requested in dma_alloc_coherent
179 * @attrs: optinal attributes that specific mapping properties
179 * 180 *
180 * Map a coherent DMA buffer previously allocated by dma_alloc_coherent 181 * Map a coherent DMA buffer previously allocated by dma_alloc_coherent
181 * into user space. The coherent DMA buffer must not be freed by the 182 * into user space. The coherent DMA buffer must not be freed by the
182 * driver until the user space mapping has been released. 183 * driver until the user space mapping has been released.
183 */ 184 */
184int dma_mmap_coherent(struct device *, struct vm_area_struct *, 185extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
185 void *, dma_addr_t, size_t); 186 void *cpu_addr, dma_addr_t dma_addr, size_t size,
187 struct dma_attrs *attrs);
186 188
189#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
187 190
188/** 191static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
189 * dma_alloc_writecombine - allocate writecombining memory for DMA 192 void *cpu_addr, dma_addr_t dma_addr,
190 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 193 size_t size, struct dma_attrs *attrs)
191 * @size: required memory size 194{
192 * @handle: bus-specific DMA address 195 struct dma_map_ops *ops = get_dma_ops(dev);
193 * 196 BUG_ON(!ops);
194 * Allocate some uncached, buffered memory for a device for 197 return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
195 * performing DMA. This function allocates pages, and will 198}
196 * return the CPU-viewed address, and sets @handle to be the 199
197 * device-viewed address. 200static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
198 */ 201 dma_addr_t *dma_handle, gfp_t flag)
199extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *, 202{
200 gfp_t); 203 DEFINE_DMA_ATTRS(attrs);
204 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
205 return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs);
206}
201 207
202#define dma_free_writecombine(dev,size,cpu_addr,handle) \ 208static inline void dma_free_writecombine(struct device *dev, size_t size,
203 dma_free_coherent(dev,size,cpu_addr,handle) 209 void *cpu_addr, dma_addr_t dma_handle)
210{
211 DEFINE_DMA_ATTRS(attrs);
212 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
213 return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
214}
204 215
205int dma_mmap_writecombine(struct device *, struct vm_area_struct *, 216static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
206 void *, dma_addr_t, size_t); 217 void *cpu_addr, dma_addr_t dma_addr, size_t size)
218{
219 DEFINE_DMA_ATTRS(attrs);
220 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
221 return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
222}
207 223
208/* 224/*
209 * This can be called during boot to increase the size of the consistent 225 * This can be called during boot to increase the size of the consistent
@@ -212,8 +228,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
212 */ 228 */
213extern void __init init_consistent_dma_size(unsigned long size); 229extern void __init init_consistent_dma_size(unsigned long size);
214 230
215
216#ifdef CONFIG_DMABOUNCE
217/* 231/*
218 * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" 232 * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic"
219 * and utilize bounce buffers as needed to work around limited DMA windows. 233 * and utilize bounce buffers as needed to work around limited DMA windows.
@@ -253,222 +267,19 @@ extern int dmabounce_register_dev(struct device *, unsigned long,
253 */ 267 */
254extern void dmabounce_unregister_dev(struct device *); 268extern void dmabounce_unregister_dev(struct device *);
255 269
256/*
257 * The DMA API, implemented by dmabounce.c. See below for descriptions.
258 */
259extern dma_addr_t __dma_map_page(struct device *, struct page *,
260 unsigned long, size_t, enum dma_data_direction);
261extern void __dma_unmap_page(struct device *, dma_addr_t, size_t,
262 enum dma_data_direction);
263
264/*
265 * Private functions
266 */
267int dmabounce_sync_for_cpu(struct device *, dma_addr_t, unsigned long,
268 size_t, enum dma_data_direction);
269int dmabounce_sync_for_device(struct device *, dma_addr_t, unsigned long,
270 size_t, enum dma_data_direction);
271#else
272static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr,
273 unsigned long offset, size_t size, enum dma_data_direction dir)
274{
275 return 1;
276}
277 270
278static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr,
279 unsigned long offset, size_t size, enum dma_data_direction dir)
280{
281 return 1;
282}
283
284
285static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
286 unsigned long offset, size_t size, enum dma_data_direction dir)
287{
288 __dma_page_cpu_to_dev(page, offset, size, dir);
289 return pfn_to_dma(dev, page_to_pfn(page)) + offset;
290}
291
292static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle,
293 size_t size, enum dma_data_direction dir)
294{
295 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
296 handle & ~PAGE_MASK, size, dir);
297}
298#endif /* CONFIG_DMABOUNCE */
299
300/**
301 * dma_map_single - map a single buffer for streaming DMA
302 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
303 * @cpu_addr: CPU direct mapped address of buffer
304 * @size: size of buffer to map
305 * @dir: DMA transfer direction
306 *
307 * Ensure that any data held in the cache is appropriately discarded
308 * or written back.
309 *
310 * The device owns this memory once this call has completed. The CPU
311 * can regain ownership by calling dma_unmap_single() or
312 * dma_sync_single_for_cpu().
313 */
314static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
315 size_t size, enum dma_data_direction dir)
316{
317 unsigned long offset;
318 struct page *page;
319 dma_addr_t addr;
320
321 BUG_ON(!virt_addr_valid(cpu_addr));
322 BUG_ON(!virt_addr_valid(cpu_addr + size - 1));
323 BUG_ON(!valid_dma_direction(dir));
324
325 page = virt_to_page(cpu_addr);
326 offset = (unsigned long)cpu_addr & ~PAGE_MASK;
327 addr = __dma_map_page(dev, page, offset, size, dir);
328 debug_dma_map_page(dev, page, offset, size, dir, addr, true);
329
330 return addr;
331}
332
333/**
334 * dma_map_page - map a portion of a page for streaming DMA
335 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
336 * @page: page that buffer resides in
337 * @offset: offset into page for start of buffer
338 * @size: size of buffer to map
339 * @dir: DMA transfer direction
340 *
341 * Ensure that any data held in the cache is appropriately discarded
342 * or written back.
343 *
344 * The device owns this memory once this call has completed. The CPU
345 * can regain ownership by calling dma_unmap_page().
346 */
347static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
348 unsigned long offset, size_t size, enum dma_data_direction dir)
349{
350 dma_addr_t addr;
351
352 BUG_ON(!valid_dma_direction(dir));
353
354 addr = __dma_map_page(dev, page, offset, size, dir);
355 debug_dma_map_page(dev, page, offset, size, dir, addr, false);
356
357 return addr;
358}
359
360/**
361 * dma_unmap_single - unmap a single buffer previously mapped
362 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
363 * @handle: DMA address of buffer
364 * @size: size of buffer (same as passed to dma_map_single)
365 * @dir: DMA transfer direction (same as passed to dma_map_single)
366 *
367 * Unmap a single streaming mode DMA translation. The handle and size
368 * must match what was provided in the previous dma_map_single() call.
369 * All other usages are undefined.
370 *
371 * After this call, reads by the CPU to the buffer are guaranteed to see
372 * whatever the device wrote there.
373 */
374static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
375 size_t size, enum dma_data_direction dir)
376{
377 debug_dma_unmap_page(dev, handle, size, dir, true);
378 __dma_unmap_page(dev, handle, size, dir);
379}
380
381/**
382 * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
383 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
384 * @handle: DMA address of buffer
385 * @size: size of buffer (same as passed to dma_map_page)
386 * @dir: DMA transfer direction (same as passed to dma_map_page)
387 *
388 * Unmap a page streaming mode DMA translation. The handle and size
389 * must match what was provided in the previous dma_map_page() call.
390 * All other usages are undefined.
391 *
392 * After this call, reads by the CPU to the buffer are guaranteed to see
393 * whatever the device wrote there.
394 */
395static inline void dma_unmap_page(struct device *dev, dma_addr_t handle,
396 size_t size, enum dma_data_direction dir)
397{
398 debug_dma_unmap_page(dev, handle, size, dir, false);
399 __dma_unmap_page(dev, handle, size, dir);
400}
401
402/**
403 * dma_sync_single_range_for_cpu
404 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
405 * @handle: DMA address of buffer
406 * @offset: offset of region to start sync
407 * @size: size of region to sync
408 * @dir: DMA transfer direction (same as passed to dma_map_single)
409 *
410 * Make physical memory consistent for a single streaming mode DMA
411 * translation after a transfer.
412 *
413 * If you perform a dma_map_single() but wish to interrogate the
414 * buffer using the cpu, yet do not wish to teardown the PCI dma
415 * mapping, you must call this function before doing so. At the
416 * next point you give the PCI dma address back to the card, you
417 * must first the perform a dma_sync_for_device, and then the
418 * device again owns the buffer.
419 */
420static inline void dma_sync_single_range_for_cpu(struct device *dev,
421 dma_addr_t handle, unsigned long offset, size_t size,
422 enum dma_data_direction dir)
423{
424 BUG_ON(!valid_dma_direction(dir));
425
426 debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir);
427
428 if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir))
429 return;
430
431 __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir);
432}
433
434static inline void dma_sync_single_range_for_device(struct device *dev,
435 dma_addr_t handle, unsigned long offset, size_t size,
436 enum dma_data_direction dir)
437{
438 BUG_ON(!valid_dma_direction(dir));
439
440 debug_dma_sync_single_for_device(dev, handle + offset, size, dir);
441
442 if (!dmabounce_sync_for_device(dev, handle, offset, size, dir))
443 return;
444
445 __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir);
446}
447
448static inline void dma_sync_single_for_cpu(struct device *dev,
449 dma_addr_t handle, size_t size, enum dma_data_direction dir)
450{
451 dma_sync_single_range_for_cpu(dev, handle, 0, size, dir);
452}
453
454static inline void dma_sync_single_for_device(struct device *dev,
455 dma_addr_t handle, size_t size, enum dma_data_direction dir)
456{
457 dma_sync_single_range_for_device(dev, handle, 0, size, dir);
458}
459 271
460/* 272/*
461 * The scatter list versions of the above methods. 273 * The scatter list versions of the above methods.
462 */ 274 */
463extern int dma_map_sg(struct device *, struct scatterlist *, int, 275extern int arm_dma_map_sg(struct device *, struct scatterlist *, int,
464 enum dma_data_direction); 276 enum dma_data_direction, struct dma_attrs *attrs);
465extern void dma_unmap_sg(struct device *, struct scatterlist *, int, 277extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int,
278 enum dma_data_direction, struct dma_attrs *attrs);
279extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int,
466 enum dma_data_direction); 280 enum dma_data_direction);
467extern void dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, 281extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
468 enum dma_data_direction); 282 enum dma_data_direction);
469extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
470 enum dma_data_direction);
471
472 283
473#endif /* __KERNEL__ */ 284#endif /* __KERNEL__ */
474#endif 285#endif
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index b36f3654bf54..a6efcdd6fd25 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -30,6 +30,7 @@ struct map_desc {
30#define MT_MEMORY_DTCM 12 30#define MT_MEMORY_DTCM 12
31#define MT_MEMORY_ITCM 13 31#define MT_MEMORY_ITCM 13
32#define MT_MEMORY_SO 14 32#define MT_MEMORY_SO 14
33#define MT_MEMORY_DMA_READY 15
33 34
34#ifdef CONFIG_MMU 35#ifdef CONFIG_MMU
35extern void iotable_init(struct map_desc *, int); 36extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index ebfac782593f..1b3096dfb964 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup);
81extern void paging_init(struct machine_desc *desc); 81extern void paging_init(struct machine_desc *desc);
82extern void sanity_check_meminfo(void); 82extern void sanity_check_meminfo(void);
83extern void reboot_setup(char *str); 83extern void reboot_setup(char *str);
84extern void setup_dma_zone(struct machine_desc *desc);
84 85
85unsigned int processor_id; 86unsigned int processor_id;
86EXPORT_SYMBOL(processor_id); 87EXPORT_SYMBOL(processor_id);
@@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p)
939 machine_desc = mdesc; 940 machine_desc = mdesc;
940 machine_name = mdesc->name; 941 machine_name = mdesc->name;
941 942
942#ifdef CONFIG_ZONE_DMA 943 setup_dma_zone(mdesc);
943 if (mdesc->dma_zone_size) { 944
944 extern unsigned long arm_dma_zone_size;
945 arm_dma_zone_size = mdesc->dma_zone_size;
946 }
947#endif
948 if (mdesc->restart_mode) 945 if (mdesc->restart_mode)
949 reboot_setup(&mdesc->restart_mode); 946 reboot_setup(&mdesc->restart_mode);
950 947
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index db23ae4aaaab..ea6b43154090 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -17,8 +17,12 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/device.h> 18#include <linux/device.h>
19#include <linux/dma-mapping.h> 19#include <linux/dma-mapping.h>
20#include <linux/dma-contiguous.h>
20#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/memblock.h>
21#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/iommu.h>
25#include <linux/vmalloc.h>
22 26
23#include <asm/memory.h> 27#include <asm/memory.h>
24#include <asm/highmem.h> 28#include <asm/highmem.h>
@@ -26,9 +30,112 @@
26#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
27#include <asm/sizes.h> 31#include <asm/sizes.h>
28#include <asm/mach/arch.h> 32#include <asm/mach/arch.h>
33#include <asm/dma-iommu.h>
34#include <asm/mach/map.h>
35#include <asm/system_info.h>
36#include <asm/dma-contiguous.h>
29 37
30#include "mm.h" 38#include "mm.h"
31 39
40/*
41 * The DMA API is built upon the notion of "buffer ownership". A buffer
42 * is either exclusively owned by the CPU (and therefore may be accessed
43 * by it) or exclusively owned by the DMA device. These helper functions
44 * represent the transitions between these two ownership states.
45 *
46 * Note, however, that on later ARMs, this notion does not work due to
47 * speculative prefetches. We model our approach on the assumption that
48 * the CPU does do speculative prefetches, which means we clean caches
49 * before transfers and delay cache invalidation until transfer completion.
50 *
51 */
52static void __dma_page_cpu_to_dev(struct page *, unsigned long,
53 size_t, enum dma_data_direction);
54static void __dma_page_dev_to_cpu(struct page *, unsigned long,
55 size_t, enum dma_data_direction);
56
57/**
58 * arm_dma_map_page - map a portion of a page for streaming DMA
59 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
60 * @page: page that buffer resides in
61 * @offset: offset into page for start of buffer
62 * @size: size of buffer to map
63 * @dir: DMA transfer direction
64 *
65 * Ensure that any data held in the cache is appropriately discarded
66 * or written back.
67 *
68 * The device owns this memory once this call has completed. The CPU
69 * can regain ownership by calling dma_unmap_page().
70 */
71static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
72 unsigned long offset, size_t size, enum dma_data_direction dir,
73 struct dma_attrs *attrs)
74{
75 if (!arch_is_coherent())
76 __dma_page_cpu_to_dev(page, offset, size, dir);
77 return pfn_to_dma(dev, page_to_pfn(page)) + offset;
78}
79
80/**
81 * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
82 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
83 * @handle: DMA address of buffer
84 * @size: size of buffer (same as passed to dma_map_page)
85 * @dir: DMA transfer direction (same as passed to dma_map_page)
86 *
87 * Unmap a page streaming mode DMA translation. The handle and size
88 * must match what was provided in the previous dma_map_page() call.
89 * All other usages are undefined.
90 *
91 * After this call, reads by the CPU to the buffer are guaranteed to see
92 * whatever the device wrote there.
93 */
94static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
95 size_t size, enum dma_data_direction dir,
96 struct dma_attrs *attrs)
97{
98 if (!arch_is_coherent())
99 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
100 handle & ~PAGE_MASK, size, dir);
101}
102
103static void arm_dma_sync_single_for_cpu(struct device *dev,
104 dma_addr_t handle, size_t size, enum dma_data_direction dir)
105{
106 unsigned int offset = handle & (PAGE_SIZE - 1);
107 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
108 if (!arch_is_coherent())
109 __dma_page_dev_to_cpu(page, offset, size, dir);
110}
111
112static void arm_dma_sync_single_for_device(struct device *dev,
113 dma_addr_t handle, size_t size, enum dma_data_direction dir)
114{
115 unsigned int offset = handle & (PAGE_SIZE - 1);
116 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
117 if (!arch_is_coherent())
118 __dma_page_cpu_to_dev(page, offset, size, dir);
119}
120
121static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
122
123struct dma_map_ops arm_dma_ops = {
124 .alloc = arm_dma_alloc,
125 .free = arm_dma_free,
126 .mmap = arm_dma_mmap,
127 .map_page = arm_dma_map_page,
128 .unmap_page = arm_dma_unmap_page,
129 .map_sg = arm_dma_map_sg,
130 .unmap_sg = arm_dma_unmap_sg,
131 .sync_single_for_cpu = arm_dma_sync_single_for_cpu,
132 .sync_single_for_device = arm_dma_sync_single_for_device,
133 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
134 .sync_sg_for_device = arm_dma_sync_sg_for_device,
135 .set_dma_mask = arm_dma_set_mask,
136};
137EXPORT_SYMBOL(arm_dma_ops);
138
32static u64 get_coherent_dma_mask(struct device *dev) 139static u64 get_coherent_dma_mask(struct device *dev)
33{ 140{
34 u64 mask = (u64)arm_dma_limit; 141 u64 mask = (u64)arm_dma_limit;
@@ -56,6 +163,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
56 return mask; 163 return mask;
57} 164}
58 165
166static void __dma_clear_buffer(struct page *page, size_t size)
167{
168 void *ptr;
169 /*
170 * Ensure that the allocated pages are zeroed, and that any data
171 * lurking in the kernel direct-mapped region is invalidated.
172 */
173 ptr = page_address(page);
174 if (ptr) {
175 memset(ptr, 0, size);
176 dmac_flush_range(ptr, ptr + size);
177 outer_flush_range(__pa(ptr), __pa(ptr) + size);
178 }
179}
180
59/* 181/*
60 * Allocate a DMA buffer for 'dev' of size 'size' using the 182 * Allocate a DMA buffer for 'dev' of size 'size' using the
61 * specified gfp mask. Note that 'size' must be page aligned. 183 * specified gfp mask. Note that 'size' must be page aligned.
@@ -64,23 +186,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
64{ 186{
65 unsigned long order = get_order(size); 187 unsigned long order = get_order(size);
66 struct page *page, *p, *e; 188 struct page *page, *p, *e;
67 void *ptr;
68 u64 mask = get_coherent_dma_mask(dev);
69
70#ifdef CONFIG_DMA_API_DEBUG
71 u64 limit = (mask + 1) & ~mask;
72 if (limit && size >= limit) {
73 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
74 size, mask);
75 return NULL;
76 }
77#endif
78
79 if (!mask)
80 return NULL;
81
82 if (mask < 0xffffffffULL)
83 gfp |= GFP_DMA;
84 189
85 page = alloc_pages(gfp, order); 190 page = alloc_pages(gfp, order);
86 if (!page) 191 if (!page)
@@ -93,14 +198,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
93 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 198 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
94 __free_page(p); 199 __free_page(p);
95 200
96 /* 201 __dma_clear_buffer(page, size);
97 * Ensure that the allocated pages are zeroed, and that any data
98 * lurking in the kernel direct-mapped region is invalidated.
99 */
100 ptr = page_address(page);
101 memset(ptr, 0, size);
102 dmac_flush_range(ptr, ptr + size);
103 outer_flush_range(__pa(ptr), __pa(ptr) + size);
104 202
105 return page; 203 return page;
106} 204}
@@ -170,6 +268,11 @@ static int __init consistent_init(void)
170 unsigned long base = consistent_base; 268 unsigned long base = consistent_base;
171 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; 269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
172 270
271#ifndef CONFIG_ARM_DMA_USE_IOMMU
272 if (cpu_architecture() >= CPU_ARCH_ARMv6)
273 return 0;
274#endif
275
173 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); 276 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
174 if (!consistent_pte) { 277 if (!consistent_pte) {
175 pr_err("%s: no memory\n", __func__); 278 pr_err("%s: no memory\n", __func__);
@@ -184,14 +287,14 @@ static int __init consistent_init(void)
184 287
185 pud = pud_alloc(&init_mm, pgd, base); 288 pud = pud_alloc(&init_mm, pgd, base);
186 if (!pud) { 289 if (!pud) {
187 printk(KERN_ERR "%s: no pud tables\n", __func__); 290 pr_err("%s: no pud tables\n", __func__);
188 ret = -ENOMEM; 291 ret = -ENOMEM;
189 break; 292 break;
190 } 293 }
191 294
192 pmd = pmd_alloc(&init_mm, pud, base); 295 pmd = pmd_alloc(&init_mm, pud, base);
193 if (!pmd) { 296 if (!pmd) {
194 printk(KERN_ERR "%s: no pmd tables\n", __func__); 297 pr_err("%s: no pmd tables\n", __func__);
195 ret = -ENOMEM; 298 ret = -ENOMEM;
196 break; 299 break;
197 } 300 }
@@ -199,7 +302,7 @@ static int __init consistent_init(void)
199 302
200 pte = pte_alloc_kernel(pmd, base); 303 pte = pte_alloc_kernel(pmd, base);
201 if (!pte) { 304 if (!pte) {
202 printk(KERN_ERR "%s: no pte tables\n", __func__); 305 pr_err("%s: no pte tables\n", __func__);
203 ret = -ENOMEM; 306 ret = -ENOMEM;
204 break; 307 break;
205 } 308 }
@@ -210,9 +313,101 @@ static int __init consistent_init(void)
210 313
211 return ret; 314 return ret;
212} 315}
213
214core_initcall(consistent_init); 316core_initcall(consistent_init);
215 317
318static void *__alloc_from_contiguous(struct device *dev, size_t size,
319 pgprot_t prot, struct page **ret_page);
320
321static struct arm_vmregion_head coherent_head = {
322 .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
323 .vm_list = LIST_HEAD_INIT(coherent_head.vm_list),
324};
325
326size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
327
328static int __init early_coherent_pool(char *p)
329{
330 coherent_pool_size = memparse(p, &p);
331 return 0;
332}
333early_param("coherent_pool", early_coherent_pool);
334
335/*
336 * Initialise the coherent pool for atomic allocations.
337 */
338static int __init coherent_init(void)
339{
340 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
341 size_t size = coherent_pool_size;
342 struct page *page;
343 void *ptr;
344
345 if (cpu_architecture() < CPU_ARCH_ARMv6)
346 return 0;
347
348 ptr = __alloc_from_contiguous(NULL, size, prot, &page);
349 if (ptr) {
350 coherent_head.vm_start = (unsigned long) ptr;
351 coherent_head.vm_end = (unsigned long) ptr + size;
352 printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
353 (unsigned)size / 1024);
354 return 0;
355 }
356 printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
357 (unsigned)size / 1024);
358 return -ENOMEM;
359}
360/*
361 * CMA is activated by core_initcall, so we must be called after it.
362 */
363postcore_initcall(coherent_init);
364
365struct dma_contig_early_reserve {
366 phys_addr_t base;
367 unsigned long size;
368};
369
370static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
371
372static int dma_mmu_remap_num __initdata;
373
374void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
375{
376 dma_mmu_remap[dma_mmu_remap_num].base = base;
377 dma_mmu_remap[dma_mmu_remap_num].size = size;
378 dma_mmu_remap_num++;
379}
380
381void __init dma_contiguous_remap(void)
382{
383 int i;
384 for (i = 0; i < dma_mmu_remap_num; i++) {
385 phys_addr_t start = dma_mmu_remap[i].base;
386 phys_addr_t end = start + dma_mmu_remap[i].size;
387 struct map_desc map;
388 unsigned long addr;
389
390 if (end > arm_lowmem_limit)
391 end = arm_lowmem_limit;
392 if (start >= end)
393 return;
394
395 map.pfn = __phys_to_pfn(start);
396 map.virtual = __phys_to_virt(start);
397 map.length = end - start;
398 map.type = MT_MEMORY_DMA_READY;
399
400 /*
401 * Clear previous low-memory mapping
402 */
403 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
404 addr += PMD_SIZE)
405 pmd_clear(pmd_off_k(addr));
406
407 iotable_init(&map, 1);
408 }
409}
410
216static void * 411static void *
217__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 412__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
218 const void *caller) 413 const void *caller)
@@ -222,7 +417,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
222 int bit; 417 int bit;
223 418
224 if (!consistent_pte) { 419 if (!consistent_pte) {
225 printk(KERN_ERR "%s: not initialised\n", __func__); 420 pr_err("%s: not initialised\n", __func__);
226 dump_stack(); 421 dump_stack();
227 return NULL; 422 return NULL;
228 } 423 }
@@ -249,7 +444,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
249 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 444 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
250 445
251 pte = consistent_pte[idx] + off; 446 pte = consistent_pte[idx] + off;
252 c->vm_pages = page; 447 c->priv = page;
253 448
254 do { 449 do {
255 BUG_ON(!pte_none(*pte)); 450 BUG_ON(!pte_none(*pte));
@@ -281,14 +476,14 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
281 476
282 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); 477 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
283 if (!c) { 478 if (!c) {
284 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", 479 pr_err("%s: trying to free invalid coherent area: %p\n",
285 __func__, cpu_addr); 480 __func__, cpu_addr);
286 dump_stack(); 481 dump_stack();
287 return; 482 return;
288 } 483 }
289 484
290 if ((c->vm_end - c->vm_start) != size) { 485 if ((c->vm_end - c->vm_start) != size) {
291 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 486 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
292 __func__, c->vm_end - c->vm_start, size); 487 __func__, c->vm_end - c->vm_start, size);
293 dump_stack(); 488 dump_stack();
294 size = c->vm_end - c->vm_start; 489 size = c->vm_end - c->vm_start;
@@ -310,8 +505,8 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
310 } 505 }
311 506
312 if (pte_none(pte) || !pte_present(pte)) 507 if (pte_none(pte) || !pte_present(pte))
313 printk(KERN_CRIT "%s: bad page in kernel page table\n", 508 pr_crit("%s: bad page in kernel page table\n",
314 __func__); 509 __func__);
315 } while (size -= PAGE_SIZE); 510 } while (size -= PAGE_SIZE);
316 511
317 flush_tlb_kernel_range(c->vm_start, c->vm_end); 512 flush_tlb_kernel_range(c->vm_start, c->vm_end);
@@ -319,20 +514,182 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
319 arm_vmregion_free(&consistent_head, c); 514 arm_vmregion_free(&consistent_head, c);
320} 515}
321 516
517static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
518 void *data)
519{
520 struct page *page = virt_to_page(addr);
521 pgprot_t prot = *(pgprot_t *)data;
522
523 set_pte_ext(pte, mk_pte(page, prot), 0);
524 return 0;
525}
526
527static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
528{
529 unsigned long start = (unsigned long) page_address(page);
530 unsigned end = start + size;
531
532 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
533 dsb();
534 flush_tlb_kernel_range(start, end);
535}
536
537static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
538 pgprot_t prot, struct page **ret_page,
539 const void *caller)
540{
541 struct page *page;
542 void *ptr;
543 page = __dma_alloc_buffer(dev, size, gfp);
544 if (!page)
545 return NULL;
546
547 ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
548 if (!ptr) {
549 __dma_free_buffer(page, size);
550 return NULL;
551 }
552
553 *ret_page = page;
554 return ptr;
555}
556
557static void *__alloc_from_pool(struct device *dev, size_t size,
558 struct page **ret_page, const void *caller)
559{
560 struct arm_vmregion *c;
561 size_t align;
562
563 if (!coherent_head.vm_start) {
564 printk(KERN_ERR "%s: coherent pool not initialised!\n",
565 __func__);
566 dump_stack();
567 return NULL;
568 }
569
570 /*
571 * Align the region allocation - allocations from pool are rather
572 * small, so align them to their order in pages, minimum is a page
573 * size. This helps reduce fragmentation of the DMA space.
574 */
575 align = PAGE_SIZE << get_order(size);
576 c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
577 if (c) {
578 void *ptr = (void *)c->vm_start;
579 struct page *page = virt_to_page(ptr);
580 *ret_page = page;
581 return ptr;
582 }
583 return NULL;
584}
585
586static int __free_from_pool(void *cpu_addr, size_t size)
587{
588 unsigned long start = (unsigned long)cpu_addr;
589 unsigned long end = start + size;
590 struct arm_vmregion *c;
591
592 if (start < coherent_head.vm_start || end > coherent_head.vm_end)
593 return 0;
594
595 c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
596
597 if ((c->vm_end - c->vm_start) != size) {
598 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
599 __func__, c->vm_end - c->vm_start, size);
600 dump_stack();
601 size = c->vm_end - c->vm_start;
602 }
603
604 arm_vmregion_free(&coherent_head, c);
605 return 1;
606}
607
608static void *__alloc_from_contiguous(struct device *dev, size_t size,
609 pgprot_t prot, struct page **ret_page)
610{
611 unsigned long order = get_order(size);
612 size_t count = size >> PAGE_SHIFT;
613 struct page *page;
614
615 page = dma_alloc_from_contiguous(dev, count, order);
616 if (!page)
617 return NULL;
618
619 __dma_clear_buffer(page, size);
620 __dma_remap(page, size, prot);
621
622 *ret_page = page;
623 return page_address(page);
624}
625
626static void __free_from_contiguous(struct device *dev, struct page *page,
627 size_t size)
628{
629 __dma_remap(page, size, pgprot_kernel);
630 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
631}
632
633static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
634{
635 prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
636 pgprot_writecombine(prot) :
637 pgprot_dmacoherent(prot);
638 return prot;
639}
640
641#define nommu() 0
642
322#else /* !CONFIG_MMU */ 643#else /* !CONFIG_MMU */
323 644
324#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) 645#define nommu() 1
325#define __dma_free_remap(addr, size) do { } while (0) 646
647#define __get_dma_pgprot(attrs, prot) __pgprot(0)
648#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
649#define __alloc_from_pool(dev, size, ret_page, c) NULL
650#define __alloc_from_contiguous(dev, size, prot, ret) NULL
651#define __free_from_pool(cpu_addr, size) 0
652#define __free_from_contiguous(dev, page, size) do { } while (0)
653#define __dma_free_remap(cpu_addr, size) do { } while (0)
326 654
327#endif /* CONFIG_MMU */ 655#endif /* CONFIG_MMU */
328 656
329static void * 657static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
330__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 658 struct page **ret_page)
331 pgprot_t prot, const void *caller) 659{
660 struct page *page;
661 page = __dma_alloc_buffer(dev, size, gfp);
662 if (!page)
663 return NULL;
664
665 *ret_page = page;
666 return page_address(page);
667}
668
669
670
671static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
672 gfp_t gfp, pgprot_t prot, const void *caller)
332{ 673{
674 u64 mask = get_coherent_dma_mask(dev);
333 struct page *page; 675 struct page *page;
334 void *addr; 676 void *addr;
335 677
678#ifdef CONFIG_DMA_API_DEBUG
679 u64 limit = (mask + 1) & ~mask;
680 if (limit && size >= limit) {
681 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
682 size, mask);
683 return NULL;
684 }
685#endif
686
687 if (!mask)
688 return NULL;
689
690 if (mask < 0xffffffffULL)
691 gfp |= GFP_DMA;
692
336 /* 693 /*
337 * Following is a work-around (a.k.a. hack) to prevent pages 694 * Following is a work-around (a.k.a. hack) to prevent pages
338 * with __GFP_COMP being passed to split_page() which cannot 695 * with __GFP_COMP being passed to split_page() which cannot
@@ -342,22 +699,20 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
342 */ 699 */
343 gfp &= ~(__GFP_COMP); 700 gfp &= ~(__GFP_COMP);
344 701
345 *handle = ~0; 702 *handle = DMA_ERROR_CODE;
346 size = PAGE_ALIGN(size); 703 size = PAGE_ALIGN(size);
347 704
348 page = __dma_alloc_buffer(dev, size, gfp); 705 if (arch_is_coherent() || nommu())
349 if (!page) 706 addr = __alloc_simple_buffer(dev, size, gfp, &page);
350 return NULL; 707 else if (cpu_architecture() < CPU_ARCH_ARMv6)
351 708 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
352 if (!arch_is_coherent()) 709 else if (gfp & GFP_ATOMIC)
353 addr = __dma_alloc_remap(page, size, gfp, prot, caller); 710 addr = __alloc_from_pool(dev, size, &page, caller);
354 else 711 else
355 addr = page_address(page); 712 addr = __alloc_from_contiguous(dev, size, prot, &page);
356 713
357 if (addr) 714 if (addr)
358 *handle = pfn_to_dma(dev, page_to_pfn(page)); 715 *handle = pfn_to_dma(dev, page_to_pfn(page));
359 else
360 __dma_free_buffer(page, size);
361 716
362 return addr; 717 return addr;
363} 718}
@@ -366,138 +721,71 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
366 * Allocate DMA-coherent memory space and return both the kernel remapped 721 * Allocate DMA-coherent memory space and return both the kernel remapped
367 * virtual and bus address for that space. 722 * virtual and bus address for that space.
368 */ 723 */
369void * 724void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
370dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 725 gfp_t gfp, struct dma_attrs *attrs)
371{ 726{
727 pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
372 void *memory; 728 void *memory;
373 729
374 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 730 if (dma_alloc_from_coherent(dev, size, handle, &memory))
375 return memory; 731 return memory;
376 732
377 return __dma_alloc(dev, size, handle, gfp, 733 return __dma_alloc(dev, size, handle, gfp, prot,
378 pgprot_dmacoherent(pgprot_kernel),
379 __builtin_return_address(0)); 734 __builtin_return_address(0));
380} 735}
381EXPORT_SYMBOL(dma_alloc_coherent);
382 736
383/* 737/*
384 * Allocate a writecombining region, in much the same way as 738 * Create userspace mapping for the DMA-coherent memory.
385 * dma_alloc_coherent above.
386 */ 739 */
387void * 740int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
388dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 741 void *cpu_addr, dma_addr_t dma_addr, size_t size,
389{ 742 struct dma_attrs *attrs)
390 return __dma_alloc(dev, size, handle, gfp,
391 pgprot_writecombine(pgprot_kernel),
392 __builtin_return_address(0));
393}
394EXPORT_SYMBOL(dma_alloc_writecombine);
395
396static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
397 void *cpu_addr, dma_addr_t dma_addr, size_t size)
398{ 743{
399 int ret = -ENXIO; 744 int ret = -ENXIO;
400#ifdef CONFIG_MMU 745#ifdef CONFIG_MMU
401 unsigned long user_size, kern_size; 746 unsigned long pfn = dma_to_pfn(dev, dma_addr);
402 struct arm_vmregion *c; 747 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
403 748
404 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 749 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
750 return ret;
405 751
406 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 752 ret = remap_pfn_range(vma, vma->vm_start,
407 if (c) { 753 pfn + vma->vm_pgoff,
408 unsigned long off = vma->vm_pgoff; 754 vma->vm_end - vma->vm_start,
409 755 vma->vm_page_prot);
410 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
411
412 if (off < kern_size &&
413 user_size <= (kern_size - off)) {
414 ret = remap_pfn_range(vma, vma->vm_start,
415 page_to_pfn(c->vm_pages) + off,
416 user_size << PAGE_SHIFT,
417 vma->vm_page_prot);
418 }
419 }
420#endif /* CONFIG_MMU */ 756#endif /* CONFIG_MMU */
421 757
422 return ret; 758 return ret;
423} 759}
424 760
425int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
426 void *cpu_addr, dma_addr_t dma_addr, size_t size)
427{
428 vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
429 return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
430}
431EXPORT_SYMBOL(dma_mmap_coherent);
432
433int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
434 void *cpu_addr, dma_addr_t dma_addr, size_t size)
435{
436 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
437 return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
438}
439EXPORT_SYMBOL(dma_mmap_writecombine);
440
441/* 761/*
442 * free a page as defined by the above mapping. 762 * Free a buffer as defined by the above mapping.
443 * Must not be called with IRQs disabled.
444 */ 763 */
445void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 764void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
765 dma_addr_t handle, struct dma_attrs *attrs)
446{ 766{
447 WARN_ON(irqs_disabled()); 767 struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
448 768
449 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 769 if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
450 return; 770 return;
451 771
452 size = PAGE_ALIGN(size); 772 size = PAGE_ALIGN(size);
453 773
454 if (!arch_is_coherent()) 774 if (arch_is_coherent() || nommu()) {
775 __dma_free_buffer(page, size);
776 } else if (cpu_architecture() < CPU_ARCH_ARMv6) {
455 __dma_free_remap(cpu_addr, size); 777 __dma_free_remap(cpu_addr, size);
456 778 __dma_free_buffer(page, size);
457 __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
458}
459EXPORT_SYMBOL(dma_free_coherent);
460
461/*
462 * Make an area consistent for devices.
463 * Note: Drivers should NOT use this function directly, as it will break
464 * platforms with CONFIG_DMABOUNCE.
465 * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
466 */
467void ___dma_single_cpu_to_dev(const void *kaddr, size_t size,
468 enum dma_data_direction dir)
469{
470 unsigned long paddr;
471
472 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
473
474 dmac_map_area(kaddr, size, dir);
475
476 paddr = __pa(kaddr);
477 if (dir == DMA_FROM_DEVICE) {
478 outer_inv_range(paddr, paddr + size);
479 } else { 779 } else {
480 outer_clean_range(paddr, paddr + size); 780 if (__free_from_pool(cpu_addr, size))
481 } 781 return;
482 /* FIXME: non-speculating: flush on bidirectional mappings? */ 782 /*
483} 783 * Non-atomic allocations cannot be freed with IRQs disabled
484EXPORT_SYMBOL(___dma_single_cpu_to_dev); 784 */
485 785 WARN_ON(irqs_disabled());
486void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, 786 __free_from_contiguous(dev, page, size);
487 enum dma_data_direction dir)
488{
489 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
490
491 /* FIXME: non-speculating: not required */
492 /* don't bother invalidating if DMA to device */
493 if (dir != DMA_TO_DEVICE) {
494 unsigned long paddr = __pa(kaddr);
495 outer_inv_range(paddr, paddr + size);
496 } 787 }
497
498 dmac_unmap_area(kaddr, size, dir);
499} 788}
500EXPORT_SYMBOL(___dma_single_dev_to_cpu);
501 789
502static void dma_cache_maint_page(struct page *page, unsigned long offset, 790static void dma_cache_maint_page(struct page *page, unsigned long offset,
503 size_t size, enum dma_data_direction dir, 791 size_t size, enum dma_data_direction dir,
@@ -543,7 +831,13 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
543 } while (left); 831 } while (left);
544} 832}
545 833
546void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, 834/*
835 * Make an area consistent for devices.
836 * Note: Drivers should NOT use this function directly, as it will break
837 * platforms with CONFIG_DMABOUNCE.
838 * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
839 */
840static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
547 size_t size, enum dma_data_direction dir) 841 size_t size, enum dma_data_direction dir)
548{ 842{
549 unsigned long paddr; 843 unsigned long paddr;
@@ -558,9 +852,8 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off,
558 } 852 }
559 /* FIXME: non-speculating: flush on bidirectional mappings? */ 853 /* FIXME: non-speculating: flush on bidirectional mappings? */
560} 854}
561EXPORT_SYMBOL(___dma_page_cpu_to_dev);
562 855
563void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, 856static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
564 size_t size, enum dma_data_direction dir) 857 size_t size, enum dma_data_direction dir)
565{ 858{
566 unsigned long paddr = page_to_phys(page) + off; 859 unsigned long paddr = page_to_phys(page) + off;
@@ -578,10 +871,9 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off,
578 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) 871 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
579 set_bit(PG_dcache_clean, &page->flags); 872 set_bit(PG_dcache_clean, &page->flags);
580} 873}
581EXPORT_SYMBOL(___dma_page_dev_to_cpu);
582 874
583/** 875/**
584 * dma_map_sg - map a set of SG buffers for streaming mode DMA 876 * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA
585 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 877 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
586 * @sg: list of buffers 878 * @sg: list of buffers
587 * @nents: number of buffers to map 879 * @nents: number of buffers to map
@@ -596,32 +888,32 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu);
596 * Device ownership issues as mentioned for dma_map_single are the same 888 * Device ownership issues as mentioned for dma_map_single are the same
597 * here. 889 * here.
598 */ 890 */
599int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 891int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
600 enum dma_data_direction dir) 892 enum dma_data_direction dir, struct dma_attrs *attrs)
601{ 893{
894 struct dma_map_ops *ops = get_dma_ops(dev);
602 struct scatterlist *s; 895 struct scatterlist *s;
603 int i, j; 896 int i, j;
604 897
605 BUG_ON(!valid_dma_direction(dir));
606
607 for_each_sg(sg, s, nents, i) { 898 for_each_sg(sg, s, nents, i) {
608 s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, 899#ifdef CONFIG_NEED_SG_DMA_LENGTH
609 s->length, dir); 900 s->dma_length = s->length;
901#endif
902 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
903 s->length, dir, attrs);
610 if (dma_mapping_error(dev, s->dma_address)) 904 if (dma_mapping_error(dev, s->dma_address))
611 goto bad_mapping; 905 goto bad_mapping;
612 } 906 }
613 debug_dma_map_sg(dev, sg, nents, nents, dir);
614 return nents; 907 return nents;
615 908
616 bad_mapping: 909 bad_mapping:
617 for_each_sg(sg, s, i, j) 910 for_each_sg(sg, s, i, j)
618 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 911 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
619 return 0; 912 return 0;
620} 913}
621EXPORT_SYMBOL(dma_map_sg);
622 914
623/** 915/**
624 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 916 * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
625 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 917 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
626 * @sg: list of buffers 918 * @sg: list of buffers
627 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 919 * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
@@ -630,70 +922,55 @@ EXPORT_SYMBOL(dma_map_sg);
630 * Unmap a set of streaming mode DMA translations. Again, CPU access 922 * Unmap a set of streaming mode DMA translations. Again, CPU access
631 * rules concerning calls here are the same as for dma_unmap_single(). 923 * rules concerning calls here are the same as for dma_unmap_single().
632 */ 924 */
633void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 925void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
634 enum dma_data_direction dir) 926 enum dma_data_direction dir, struct dma_attrs *attrs)
635{ 927{
928 struct dma_map_ops *ops = get_dma_ops(dev);
636 struct scatterlist *s; 929 struct scatterlist *s;
637 int i;
638 930
639 debug_dma_unmap_sg(dev, sg, nents, dir); 931 int i;
640 932
641 for_each_sg(sg, s, nents, i) 933 for_each_sg(sg, s, nents, i)
642 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 934 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
643} 935}
644EXPORT_SYMBOL(dma_unmap_sg);
645 936
646/** 937/**
647 * dma_sync_sg_for_cpu 938 * arm_dma_sync_sg_for_cpu
648 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 939 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
649 * @sg: list of buffers 940 * @sg: list of buffers
650 * @nents: number of buffers to map (returned from dma_map_sg) 941 * @nents: number of buffers to map (returned from dma_map_sg)
651 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 942 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
652 */ 943 */
653void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 944void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
654 int nents, enum dma_data_direction dir) 945 int nents, enum dma_data_direction dir)
655{ 946{
947 struct dma_map_ops *ops = get_dma_ops(dev);
656 struct scatterlist *s; 948 struct scatterlist *s;
657 int i; 949 int i;
658 950
659 for_each_sg(sg, s, nents, i) { 951 for_each_sg(sg, s, nents, i)
660 if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, 952 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length,
661 sg_dma_len(s), dir)) 953 dir);
662 continue;
663
664 __dma_page_dev_to_cpu(sg_page(s), s->offset,
665 s->length, dir);
666 }
667
668 debug_dma_sync_sg_for_cpu(dev, sg, nents, dir);
669} 954}
670EXPORT_SYMBOL(dma_sync_sg_for_cpu);
671 955
672/** 956/**
673 * dma_sync_sg_for_device 957 * arm_dma_sync_sg_for_device
674 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 958 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
675 * @sg: list of buffers 959 * @sg: list of buffers
676 * @nents: number of buffers to map (returned from dma_map_sg) 960 * @nents: number of buffers to map (returned from dma_map_sg)
677 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 961 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
678 */ 962 */
679void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 963void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
680 int nents, enum dma_data_direction dir) 964 int nents, enum dma_data_direction dir)
681{ 965{
966 struct dma_map_ops *ops = get_dma_ops(dev);
682 struct scatterlist *s; 967 struct scatterlist *s;
683 int i; 968 int i;
684 969
685 for_each_sg(sg, s, nents, i) { 970 for_each_sg(sg, s, nents, i)
686 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, 971 ops->sync_single_for_device(dev, sg_dma_address(s), s->length,
687 sg_dma_len(s), dir)) 972 dir);
688 continue;
689
690 __dma_page_cpu_to_dev(sg_page(s), s->offset,
691 s->length, dir);
692 }
693
694 debug_dma_sync_sg_for_device(dev, sg, nents, dir);
695} 973}
696EXPORT_SYMBOL(dma_sync_sg_for_device);
697 974
698/* 975/*
699 * Return whether the given device DMA address mask can be supported 976 * Return whether the given device DMA address mask can be supported
@@ -709,18 +986,15 @@ int dma_supported(struct device *dev, u64 mask)
709} 986}
710EXPORT_SYMBOL(dma_supported); 987EXPORT_SYMBOL(dma_supported);
711 988
712int dma_set_mask(struct device *dev, u64 dma_mask) 989static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
713{ 990{
714 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 991 if (!dev->dma_mask || !dma_supported(dev, dma_mask))
715 return -EIO; 992 return -EIO;
716 993
717#ifndef CONFIG_DMABOUNCE
718 *dev->dma_mask = dma_mask; 994 *dev->dma_mask = dma_mask;
719#endif
720 995
721 return 0; 996 return 0;
722} 997}
723EXPORT_SYMBOL(dma_set_mask);
724 998
725#define PREALLOC_DMA_DEBUG_ENTRIES 4096 999#define PREALLOC_DMA_DEBUG_ENTRIES 4096
726 1000
@@ -733,3 +1007,679 @@ static int __init dma_debug_do_init(void)
733 return 0; 1007 return 0;
734} 1008}
735fs_initcall(dma_debug_do_init); 1009fs_initcall(dma_debug_do_init);
1010
1011#ifdef CONFIG_ARM_DMA_USE_IOMMU
1012
1013/* IOMMU */
1014
1015static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
1016 size_t size)
1017{
1018 unsigned int order = get_order(size);
1019 unsigned int align = 0;
1020 unsigned int count, start;
1021 unsigned long flags;
1022
1023 count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
1024 (1 << mapping->order) - 1) >> mapping->order;
1025
1026 if (order > mapping->order)
1027 align = (1 << (order - mapping->order)) - 1;
1028
1029 spin_lock_irqsave(&mapping->lock, flags);
1030 start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
1031 count, align);
1032 if (start > mapping->bits) {
1033 spin_unlock_irqrestore(&mapping->lock, flags);
1034 return DMA_ERROR_CODE;
1035 }
1036
1037 bitmap_set(mapping->bitmap, start, count);
1038 spin_unlock_irqrestore(&mapping->lock, flags);
1039
1040 return mapping->base + (start << (mapping->order + PAGE_SHIFT));
1041}
1042
1043static inline void __free_iova(struct dma_iommu_mapping *mapping,
1044 dma_addr_t addr, size_t size)
1045{
1046 unsigned int start = (addr - mapping->base) >>
1047 (mapping->order + PAGE_SHIFT);
1048 unsigned int count = ((size >> PAGE_SHIFT) +
1049 (1 << mapping->order) - 1) >> mapping->order;
1050 unsigned long flags;
1051
1052 spin_lock_irqsave(&mapping->lock, flags);
1053 bitmap_clear(mapping->bitmap, start, count);
1054 spin_unlock_irqrestore(&mapping->lock, flags);
1055}
1056
1057static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
1058{
1059 struct page **pages;
1060 int count = size >> PAGE_SHIFT;
1061 int array_size = count * sizeof(struct page *);
1062 int i = 0;
1063
1064 if (array_size <= PAGE_SIZE)
1065 pages = kzalloc(array_size, gfp);
1066 else
1067 pages = vzalloc(array_size);
1068 if (!pages)
1069 return NULL;
1070
1071 while (count) {
1072 int j, order = __ffs(count);
1073
1074 pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
1075 while (!pages[i] && order)
1076 pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
1077 if (!pages[i])
1078 goto error;
1079
1080 if (order)
1081 split_page(pages[i], order);
1082 j = 1 << order;
1083 while (--j)
1084 pages[i + j] = pages[i] + j;
1085
1086 __dma_clear_buffer(pages[i], PAGE_SIZE << order);
1087 i += 1 << order;
1088 count -= 1 << order;
1089 }
1090
1091 return pages;
1092error:
1093 while (--i)
1094 if (pages[i])
1095 __free_pages(pages[i], 0);
1096 if (array_size < PAGE_SIZE)
1097 kfree(pages);
1098 else
1099 vfree(pages);
1100 return NULL;
1101}
1102
1103static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
1104{
1105 int count = size >> PAGE_SHIFT;
1106 int array_size = count * sizeof(struct page *);
1107 int i;
1108 for (i = 0; i < count; i++)
1109 if (pages[i])
1110 __free_pages(pages[i], 0);
1111 if (array_size < PAGE_SIZE)
1112 kfree(pages);
1113 else
1114 vfree(pages);
1115 return 0;
1116}
1117
1118/*
1119 * Create a CPU mapping for a specified pages
1120 */
1121static void *
1122__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
1123{
1124 struct arm_vmregion *c;
1125 size_t align;
1126 size_t count = size >> PAGE_SHIFT;
1127 int bit;
1128
1129 if (!consistent_pte[0]) {
1130 pr_err("%s: not initialised\n", __func__);
1131 dump_stack();
1132 return NULL;
1133 }
1134
1135 /*
1136 * Align the virtual region allocation - maximum alignment is
1137 * a section size, minimum is a page size. This helps reduce
1138 * fragmentation of the DMA space, and also prevents allocations
1139 * smaller than a section from crossing a section boundary.
1140 */
1141 bit = fls(size - 1);
1142 if (bit > SECTION_SHIFT)
1143 bit = SECTION_SHIFT;
1144 align = 1 << bit;
1145
1146 /*
1147 * Allocate a virtual address in the consistent mapping region.
1148 */
1149 c = arm_vmregion_alloc(&consistent_head, align, size,
1150 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
1151 if (c) {
1152 pte_t *pte;
1153 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
1154 int i = 0;
1155 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
1156
1157 pte = consistent_pte[idx] + off;
1158 c->priv = pages;
1159
1160 do {
1161 BUG_ON(!pte_none(*pte));
1162
1163 set_pte_ext(pte, mk_pte(pages[i], prot), 0);
1164 pte++;
1165 off++;
1166 i++;
1167 if (off >= PTRS_PER_PTE) {
1168 off = 0;
1169 pte = consistent_pte[++idx];
1170 }
1171 } while (i < count);
1172
1173 dsb();
1174
1175 return (void *)c->vm_start;
1176 }
1177 return NULL;
1178}
1179
1180/*
1181 * Create a mapping in device IO address space for specified pages
1182 */
1183static dma_addr_t
1184__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
1185{
1186 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1187 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1188 dma_addr_t dma_addr, iova;
1189 int i, ret = DMA_ERROR_CODE;
1190
1191 dma_addr = __alloc_iova(mapping, size);
1192 if (dma_addr == DMA_ERROR_CODE)
1193 return dma_addr;
1194
1195 iova = dma_addr;
1196 for (i = 0; i < count; ) {
1197 unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
1198 phys_addr_t phys = page_to_phys(pages[i]);
1199 unsigned int len, j;
1200
1201 for (j = i + 1; j < count; j++, next_pfn++)
1202 if (page_to_pfn(pages[j]) != next_pfn)
1203 break;
1204
1205 len = (j - i) << PAGE_SHIFT;
1206 ret = iommu_map(mapping->domain, iova, phys, len, 0);
1207 if (ret < 0)
1208 goto fail;
1209 iova += len;
1210 i = j;
1211 }
1212 return dma_addr;
1213fail:
1214 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
1215 __free_iova(mapping, dma_addr, size);
1216 return DMA_ERROR_CODE;
1217}
1218
1219static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
1220{
1221 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1222
1223 /*
1224 * add optional in-page offset from iova to size and align
1225 * result to page size
1226 */
1227 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
1228 iova &= PAGE_MASK;
1229
1230 iommu_unmap(mapping->domain, iova, size);
1231 __free_iova(mapping, iova, size);
1232 return 0;
1233}
1234
1235static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1236 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
1237{
1238 pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
1239 struct page **pages;
1240 void *addr = NULL;
1241
1242 *handle = DMA_ERROR_CODE;
1243 size = PAGE_ALIGN(size);
1244
1245 pages = __iommu_alloc_buffer(dev, size, gfp);
1246 if (!pages)
1247 return NULL;
1248
1249 *handle = __iommu_create_mapping(dev, pages, size);
1250 if (*handle == DMA_ERROR_CODE)
1251 goto err_buffer;
1252
1253 addr = __iommu_alloc_remap(pages, size, gfp, prot);
1254 if (!addr)
1255 goto err_mapping;
1256
1257 return addr;
1258
1259err_mapping:
1260 __iommu_remove_mapping(dev, *handle, size);
1261err_buffer:
1262 __iommu_free_buffer(dev, pages, size);
1263 return NULL;
1264}
1265
1266static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1267 void *cpu_addr, dma_addr_t dma_addr, size_t size,
1268 struct dma_attrs *attrs)
1269{
1270 struct arm_vmregion *c;
1271
1272 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
1273 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
1274
1275 if (c) {
1276 struct page **pages = c->priv;
1277
1278 unsigned long uaddr = vma->vm_start;
1279 unsigned long usize = vma->vm_end - vma->vm_start;
1280 int i = 0;
1281
1282 do {
1283 int ret;
1284
1285 ret = vm_insert_page(vma, uaddr, pages[i++]);
1286 if (ret) {
1287 pr_err("Remapping memory, error: %d\n", ret);
1288 return ret;
1289 }
1290
1291 uaddr += PAGE_SIZE;
1292 usize -= PAGE_SIZE;
1293 } while (usize > 0);
1294 }
1295 return 0;
1296}
1297
1298/*
1299 * free a page as defined by the above mapping.
1300 * Must not be called with IRQs disabled.
1301 */
1302void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1303 dma_addr_t handle, struct dma_attrs *attrs)
1304{
1305 struct arm_vmregion *c;
1306 size = PAGE_ALIGN(size);
1307
1308 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
1309 if (c) {
1310 struct page **pages = c->priv;
1311 __dma_free_remap(cpu_addr, size);
1312 __iommu_remove_mapping(dev, handle, size);
1313 __iommu_free_buffer(dev, pages, size);
1314 }
1315}
1316
1317/*
1318 * Map a part of the scatter-gather list into contiguous io address space
1319 */
1320static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1321 size_t size, dma_addr_t *handle,
1322 enum dma_data_direction dir)
1323{
1324 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1325 dma_addr_t iova, iova_base;
1326 int ret = 0;
1327 unsigned int count;
1328 struct scatterlist *s;
1329
1330 size = PAGE_ALIGN(size);
1331 *handle = DMA_ERROR_CODE;
1332
1333 iova_base = iova = __alloc_iova(mapping, size);
1334 if (iova == DMA_ERROR_CODE)
1335 return -ENOMEM;
1336
1337 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
1338 phys_addr_t phys = page_to_phys(sg_page(s));
1339 unsigned int len = PAGE_ALIGN(s->offset + s->length);
1340
1341 if (!arch_is_coherent())
1342 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1343
1344 ret = iommu_map(mapping->domain, iova, phys, len, 0);
1345 if (ret < 0)
1346 goto fail;
1347 count += len >> PAGE_SHIFT;
1348 iova += len;
1349 }
1350 *handle = iova_base;
1351
1352 return 0;
1353fail:
1354 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
1355 __free_iova(mapping, iova_base, size);
1356 return ret;
1357}
1358
1359/**
1360 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
1361 * @dev: valid struct device pointer
1362 * @sg: list of buffers
1363 * @nents: number of buffers to map
1364 * @dir: DMA transfer direction
1365 *
1366 * Map a set of buffers described by scatterlist in streaming mode for DMA.
1367 * The scatter gather list elements are merged together (if possible) and
1368 * tagged with the appropriate dma address and length. They are obtained via
1369 * sg_dma_{address,length}.
1370 */
1371int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1372 enum dma_data_direction dir, struct dma_attrs *attrs)
1373{
1374 struct scatterlist *s = sg, *dma = sg, *start = sg;
1375 int i, count = 0;
1376 unsigned int offset = s->offset;
1377 unsigned int size = s->offset + s->length;
1378 unsigned int max = dma_get_max_seg_size(dev);
1379
1380 for (i = 1; i < nents; i++) {
1381 s = sg_next(s);
1382
1383 s->dma_address = DMA_ERROR_CODE;
1384 s->dma_length = 0;
1385
1386 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
1387 if (__map_sg_chunk(dev, start, size, &dma->dma_address,
1388 dir) < 0)
1389 goto bad_mapping;
1390
1391 dma->dma_address += offset;
1392 dma->dma_length = size - offset;
1393
1394 size = offset = s->offset;
1395 start = s;
1396 dma = sg_next(dma);
1397 count += 1;
1398 }
1399 size += s->length;
1400 }
1401 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
1402 goto bad_mapping;
1403
1404 dma->dma_address += offset;
1405 dma->dma_length = size - offset;
1406
1407 return count+1;
1408
1409bad_mapping:
1410 for_each_sg(sg, s, count, i)
1411 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
1412 return 0;
1413}
1414
1415/**
1416 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1417 * @dev: valid struct device pointer
1418 * @sg: list of buffers
1419 * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
1420 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1421 *
1422 * Unmap a set of streaming mode DMA translations. Again, CPU access
1423 * rules concerning calls here are the same as for dma_unmap_single().
1424 */
1425void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
1426 enum dma_data_direction dir, struct dma_attrs *attrs)
1427{
1428 struct scatterlist *s;
1429 int i;
1430
1431 for_each_sg(sg, s, nents, i) {
1432 if (sg_dma_len(s))
1433 __iommu_remove_mapping(dev, sg_dma_address(s),
1434 sg_dma_len(s));
1435 if (!arch_is_coherent())
1436 __dma_page_dev_to_cpu(sg_page(s), s->offset,
1437 s->length, dir);
1438 }
1439}
1440
1441/**
1442 * arm_iommu_sync_sg_for_cpu
1443 * @dev: valid struct device pointer
1444 * @sg: list of buffers
1445 * @nents: number of buffers to map (returned from dma_map_sg)
1446 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1447 */
1448void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1449 int nents, enum dma_data_direction dir)
1450{
1451 struct scatterlist *s;
1452 int i;
1453
1454 for_each_sg(sg, s, nents, i)
1455 if (!arch_is_coherent())
1456 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
1457
1458}
1459
1460/**
1461 * arm_iommu_sync_sg_for_device
1462 * @dev: valid struct device pointer
1463 * @sg: list of buffers
1464 * @nents: number of buffers to map (returned from dma_map_sg)
1465 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1466 */
1467void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1468 int nents, enum dma_data_direction dir)
1469{
1470 struct scatterlist *s;
1471 int i;
1472
1473 for_each_sg(sg, s, nents, i)
1474 if (!arch_is_coherent())
1475 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1476}
1477
1478
1479/**
1480 * arm_iommu_map_page
1481 * @dev: valid struct device pointer
1482 * @page: page that buffer resides in
1483 * @offset: offset into page for start of buffer
1484 * @size: size of buffer to map
1485 * @dir: DMA transfer direction
1486 *
1487 * IOMMU aware version of arm_dma_map_page()
1488 */
1489static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
1490 unsigned long offset, size_t size, enum dma_data_direction dir,
1491 struct dma_attrs *attrs)
1492{
1493 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1494 dma_addr_t dma_addr;
1495 int ret, len = PAGE_ALIGN(size + offset);
1496
1497 if (!arch_is_coherent())
1498 __dma_page_cpu_to_dev(page, offset, size, dir);
1499
1500 dma_addr = __alloc_iova(mapping, len);
1501 if (dma_addr == DMA_ERROR_CODE)
1502 return dma_addr;
1503
1504 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
1505 if (ret < 0)
1506 goto fail;
1507
1508 return dma_addr + offset;
1509fail:
1510 __free_iova(mapping, dma_addr, len);
1511 return DMA_ERROR_CODE;
1512}
1513
1514/**
1515 * arm_iommu_unmap_page
1516 * @dev: valid struct device pointer
1517 * @handle: DMA address of buffer
1518 * @size: size of buffer (same as passed to dma_map_page)
1519 * @dir: DMA transfer direction (same as passed to dma_map_page)
1520 *
1521 * IOMMU aware version of arm_dma_unmap_page()
1522 */
1523static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
1524 size_t size, enum dma_data_direction dir,
1525 struct dma_attrs *attrs)
1526{
1527 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1528 dma_addr_t iova = handle & PAGE_MASK;
1529 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
1530 int offset = handle & ~PAGE_MASK;
1531 int len = PAGE_ALIGN(size + offset);
1532
1533 if (!iova)
1534 return;
1535
1536 if (!arch_is_coherent())
1537 __dma_page_dev_to_cpu(page, offset, size, dir);
1538
1539 iommu_unmap(mapping->domain, iova, len);
1540 __free_iova(mapping, iova, len);
1541}
1542
1543static void arm_iommu_sync_single_for_cpu(struct device *dev,
1544 dma_addr_t handle, size_t size, enum dma_data_direction dir)
1545{
1546 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1547 dma_addr_t iova = handle & PAGE_MASK;
1548 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
1549 unsigned int offset = handle & ~PAGE_MASK;
1550
1551 if (!iova)
1552 return;
1553
1554 if (!arch_is_coherent())
1555 __dma_page_dev_to_cpu(page, offset, size, dir);
1556}
1557
1558static void arm_iommu_sync_single_for_device(struct device *dev,
1559 dma_addr_t handle, size_t size, enum dma_data_direction dir)
1560{
1561 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1562 dma_addr_t iova = handle & PAGE_MASK;
1563 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
1564 unsigned int offset = handle & ~PAGE_MASK;
1565
1566 if (!iova)
1567 return;
1568
1569 __dma_page_cpu_to_dev(page, offset, size, dir);
1570}
1571
1572struct dma_map_ops iommu_ops = {
1573 .alloc = arm_iommu_alloc_attrs,
1574 .free = arm_iommu_free_attrs,
1575 .mmap = arm_iommu_mmap_attrs,
1576
1577 .map_page = arm_iommu_map_page,
1578 .unmap_page = arm_iommu_unmap_page,
1579 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu,
1580 .sync_single_for_device = arm_iommu_sync_single_for_device,
1581
1582 .map_sg = arm_iommu_map_sg,
1583 .unmap_sg = arm_iommu_unmap_sg,
1584 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu,
1585 .sync_sg_for_device = arm_iommu_sync_sg_for_device,
1586};
1587
1588/**
1589 * arm_iommu_create_mapping
1590 * @bus: pointer to the bus holding the client device (for IOMMU calls)
1591 * @base: start address of the valid IO address space
1592 * @size: size of the valid IO address space
1593 * @order: accuracy of the IO addresses allocations
1594 *
1595 * Creates a mapping structure which holds information about used/unused
1596 * IO address ranges, which is required to perform memory allocation and
1597 * mapping with IOMMU aware functions.
1598 *
1599 * The client device need to be attached to the mapping with
1600 * arm_iommu_attach_device function.
1601 */
1602struct dma_iommu_mapping *
1603arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
1604 int order)
1605{
1606 unsigned int count = size >> (PAGE_SHIFT + order);
1607 unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
1608 struct dma_iommu_mapping *mapping;
1609 int err = -ENOMEM;
1610
1611 if (!count)
1612 return ERR_PTR(-EINVAL);
1613
1614 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
1615 if (!mapping)
1616 goto err;
1617
1618 mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1619 if (!mapping->bitmap)
1620 goto err2;
1621
1622 mapping->base = base;
1623 mapping->bits = BITS_PER_BYTE * bitmap_size;
1624 mapping->order = order;
1625 spin_lock_init(&mapping->lock);
1626
1627 mapping->domain = iommu_domain_alloc(bus);
1628 if (!mapping->domain)
1629 goto err3;
1630
1631 kref_init(&mapping->kref);
1632 return mapping;
1633err3:
1634 kfree(mapping->bitmap);
1635err2:
1636 kfree(mapping);
1637err:
1638 return ERR_PTR(err);
1639}
1640
1641static void release_iommu_mapping(struct kref *kref)
1642{
1643 struct dma_iommu_mapping *mapping =
1644 container_of(kref, struct dma_iommu_mapping, kref);
1645
1646 iommu_domain_free(mapping->domain);
1647 kfree(mapping->bitmap);
1648 kfree(mapping);
1649}
1650
1651void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
1652{
1653 if (mapping)
1654 kref_put(&mapping->kref, release_iommu_mapping);
1655}
1656
1657/**
1658 * arm_iommu_attach_device
1659 * @dev: valid struct device pointer
1660 * @mapping: io address space mapping structure (returned from
1661 * arm_iommu_create_mapping)
1662 *
1663 * Attaches specified io address space mapping to the provided device,
1664 * this replaces the dma operations (dma_map_ops pointer) with the
1665 * IOMMU aware version. More than one client might be attached to
1666 * the same io address space mapping.
1667 */
1668int arm_iommu_attach_device(struct device *dev,
1669 struct dma_iommu_mapping *mapping)
1670{
1671 int err;
1672
1673 err = iommu_attach_device(mapping->domain, dev);
1674 if (err)
1675 return err;
1676
1677 kref_get(&mapping->kref);
1678 dev->archdata.mapping = mapping;
1679 set_dma_ops(dev, &iommu_ops);
1680
1681 pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev));
1682 return 0;
1683}
1684
1685#endif
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8f5813bbffb5..c21d06c7dd7e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -20,6 +20,7 @@
20#include <linux/highmem.h> 20#include <linux/highmem.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/memblock.h> 22#include <linux/memblock.h>
23#include <linux/dma-contiguous.h>
23 24
24#include <asm/mach-types.h> 25#include <asm/mach-types.h>
25#include <asm/memblock.h> 26#include <asm/memblock.h>
@@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,
226} 227}
227#endif 228#endif
228 229
230void __init setup_dma_zone(struct machine_desc *mdesc)
231{
232#ifdef CONFIG_ZONE_DMA
233 if (mdesc->dma_zone_size) {
234 arm_dma_zone_size = mdesc->dma_zone_size;
235 arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
236 } else
237 arm_dma_limit = 0xffffffff;
238#endif
239}
240
229static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, 241static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
230 unsigned long max_high) 242 unsigned long max_high)
231{ 243{
@@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
273 * Adjust the sizes according to any special requirements for 285 * Adjust the sizes according to any special requirements for
274 * this machine type. 286 * this machine type.
275 */ 287 */
276 if (arm_dma_zone_size) { 288 if (arm_dma_zone_size)
277 arm_adjust_dma_zone(zone_size, zhole_size, 289 arm_adjust_dma_zone(zone_size, zhole_size,
278 arm_dma_zone_size >> PAGE_SHIFT); 290 arm_dma_zone_size >> PAGE_SHIFT);
279 arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
280 } else
281 arm_dma_limit = 0xffffffff;
282#endif 291#endif
283 292
284 free_area_init_node(0, zone_size, min, zhole_size); 293 free_area_init_node(0, zone_size, min, zhole_size);
@@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
364 if (mdesc->reserve) 373 if (mdesc->reserve)
365 mdesc->reserve(); 374 mdesc->reserve();
366 375
376 /*
377 * reserve memory for DMA contigouos allocations,
378 * must come from DMA area inside low memory
379 */
380 dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
381
367 arm_memblock_steal_permitted = false; 382 arm_memblock_steal_permitted = false;
368 memblock_allow_resize(); 383 memblock_allow_resize();
369 memblock_dump_all(); 384 memblock_dump_all();
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 27f4a619b35d..93dc0c17cdcb 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -67,5 +67,8 @@ extern u32 arm_dma_limit;
67#define arm_dma_limit ((u32)~0) 67#define arm_dma_limit ((u32)~0)
68#endif 68#endif
69 69
70extern phys_addr_t arm_lowmem_limit;
71
70void __init bootmem_init(void); 72void __init bootmem_init(void);
71void arm_mm_memblock_reserve(void); 73void arm_mm_memblock_reserve(void);
74void dma_contiguous_remap(void);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index aa78de8bfdd3..e5dad60b558b 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -288,6 +288,11 @@ static struct mem_type mem_types[] = {
288 PMD_SECT_UNCACHED | PMD_SECT_XN, 288 PMD_SECT_UNCACHED | PMD_SECT_XN,
289 .domain = DOMAIN_KERNEL, 289 .domain = DOMAIN_KERNEL,
290 }, 290 },
291 [MT_MEMORY_DMA_READY] = {
292 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
293 .prot_l1 = PMD_TYPE_TABLE,
294 .domain = DOMAIN_KERNEL,
295 },
291}; 296};
292 297
293const struct mem_type *get_mem_type(unsigned int type) 298const struct mem_type *get_mem_type(unsigned int type)
@@ -429,6 +434,7 @@ static void __init build_mem_type_table(void)
429 if (arch_is_coherent() && cpu_is_xsc3()) { 434 if (arch_is_coherent() && cpu_is_xsc3()) {
430 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 435 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
431 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 436 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
437 mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
432 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 438 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
433 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 439 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
434 } 440 }
@@ -460,6 +466,7 @@ static void __init build_mem_type_table(void)
460 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; 466 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
461 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 467 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
462 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 468 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
469 mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
463 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 470 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
464 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 471 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
465 } 472 }
@@ -512,6 +519,7 @@ static void __init build_mem_type_table(void)
512 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; 519 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
513 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; 520 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
514 mem_types[MT_MEMORY].prot_pte |= kern_pgprot; 521 mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
522 mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
515 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; 523 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
516 mem_types[MT_ROM].prot_sect |= cp->pmd; 524 mem_types[MT_ROM].prot_sect |= cp->pmd;
517 525
@@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
596 * L1 entries, whereas PGDs refer to a group of L1 entries making 604 * L1 entries, whereas PGDs refer to a group of L1 entries making
597 * up one logical pointer to an L2 table. 605 * up one logical pointer to an L2 table.
598 */ 606 */
599 if (((addr | end | phys) & ~SECTION_MASK) == 0) { 607 if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) {
600 pmd_t *p = pmd; 608 pmd_t *p = pmd;
601 609
602#ifndef CONFIG_ARM_LPAE 610#ifndef CONFIG_ARM_LPAE
@@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg)
814} 822}
815early_param("vmalloc", early_vmalloc); 823early_param("vmalloc", early_vmalloc);
816 824
817static phys_addr_t lowmem_limit __initdata = 0; 825phys_addr_t arm_lowmem_limit __initdata = 0;
818 826
819void __init sanity_check_meminfo(void) 827void __init sanity_check_meminfo(void)
820{ 828{
@@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void)
897 bank->size = newsize; 905 bank->size = newsize;
898 } 906 }
899#endif 907#endif
900 if (!bank->highmem && bank->start + bank->size > lowmem_limit) 908 if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
901 lowmem_limit = bank->start + bank->size; 909 arm_lowmem_limit = bank->start + bank->size;
902 910
903 j++; 911 j++;
904 } 912 }
@@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void)
923 } 931 }
924#endif 932#endif
925 meminfo.nr_banks = j; 933 meminfo.nr_banks = j;
926 high_memory = __va(lowmem_limit - 1) + 1; 934 high_memory = __va(arm_lowmem_limit - 1) + 1;
927 memblock_set_current_limit(lowmem_limit); 935 memblock_set_current_limit(arm_lowmem_limit);
928} 936}
929 937
930static inline void prepare_page_table(void) 938static inline void prepare_page_table(void)
@@ -949,8 +957,8 @@ static inline void prepare_page_table(void)
949 * Find the end of the first block of lowmem. 957 * Find the end of the first block of lowmem.
950 */ 958 */
951 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; 959 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
952 if (end >= lowmem_limit) 960 if (end >= arm_lowmem_limit)
953 end = lowmem_limit; 961 end = arm_lowmem_limit;
954 962
955 /* 963 /*
956 * Clear out all the kernel space mappings, except for the first 964 * Clear out all the kernel space mappings, except for the first
@@ -1093,8 +1101,8 @@ static void __init map_lowmem(void)
1093 phys_addr_t end = start + reg->size; 1101 phys_addr_t end = start + reg->size;
1094 struct map_desc map; 1102 struct map_desc map;
1095 1103
1096 if (end > lowmem_limit) 1104 if (end > arm_lowmem_limit)
1097 end = lowmem_limit; 1105 end = arm_lowmem_limit;
1098 if (start >= end) 1106 if (start >= end)
1099 break; 1107 break;
1100 1108
@@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc)
1115{ 1123{
1116 void *zero_page; 1124 void *zero_page;
1117 1125
1118 memblock_set_current_limit(lowmem_limit); 1126 memblock_set_current_limit(arm_lowmem_limit);
1119 1127
1120 build_mem_type_table(); 1128 build_mem_type_table();
1121 prepare_page_table(); 1129 prepare_page_table();
1122 map_lowmem(); 1130 map_lowmem();
1131 dma_contiguous_remap();
1123 devicemaps_init(mdesc); 1132 devicemaps_init(mdesc);
1124 kmap_init(); 1133 kmap_init();
1125 1134
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
index 162be662c088..bf312c354a21 100644
--- a/arch/arm/mm/vmregion.h
+++ b/arch/arm/mm/vmregion.h
@@ -17,7 +17,7 @@ struct arm_vmregion {
17 struct list_head vm_list; 17 struct list_head vm_list;
18 unsigned long vm_start; 18 unsigned long vm_start;
19 unsigned long vm_end; 19 unsigned long vm_end;
20 struct page *vm_pages; 20 void *priv;
21 int vm_active; 21 int vm_active;
22 const void *caller; 22 const void *caller;
23}; 23};
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 66cc380bebf0..81c3e8be789a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -32,6 +32,7 @@ config X86
32 select ARCH_WANT_OPTIONAL_GPIOLIB 32 select ARCH_WANT_OPTIONAL_GPIOLIB
33 select ARCH_WANT_FRAME_POINTERS 33 select ARCH_WANT_FRAME_POINTERS
34 select HAVE_DMA_ATTRS 34 select HAVE_DMA_ATTRS
35 select HAVE_DMA_CONTIGUOUS if !SWIOTLB
35 select HAVE_KRETPROBES 36 select HAVE_KRETPROBES
36 select HAVE_OPTPROBES 37 select HAVE_OPTPROBES
37 select HAVE_FTRACE_MCOUNT_RECORD 38 select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h
new file mode 100644
index 000000000000..c09241659971
--- /dev/null
+++ b/arch/x86/include/asm/dma-contiguous.h
@@ -0,0 +1,13 @@
1#ifndef ASMX86_DMA_CONTIGUOUS_H
2#define ASMX86_DMA_CONTIGUOUS_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm-generic/dma-contiguous.h>
8
9static inline void
10dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { }
11
12#endif
13#endif
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 61c0bd25845a..f7b4c7903e7e 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -13,6 +13,7 @@
13#include <asm/io.h> 13#include <asm/io.h>
14#include <asm/swiotlb.h> 14#include <asm/swiotlb.h>
15#include <asm-generic/dma-coherent.h> 15#include <asm-generic/dma-coherent.h>
16#include <linux/dma-contiguous.h>
16 17
17#ifdef CONFIG_ISA 18#ifdef CONFIG_ISA
18# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) 19# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
62 dma_addr_t *dma_addr, gfp_t flag, 63 dma_addr_t *dma_addr, gfp_t flag,
63 struct dma_attrs *attrs); 64 struct dma_attrs *attrs);
64 65
66extern void dma_generic_free_coherent(struct device *dev, size_t size,
67 void *vaddr, dma_addr_t dma_addr,
68 struct dma_attrs *attrs);
69
65#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ 70#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
66extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); 71extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
67extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); 72extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 3003250ac51d..62c9457ccd2f 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,14 +100,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
100 struct dma_attrs *attrs) 100 struct dma_attrs *attrs)
101{ 101{
102 unsigned long dma_mask; 102 unsigned long dma_mask;
103 struct page *page; 103 struct page *page = NULL;
104 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
104 dma_addr_t addr; 105 dma_addr_t addr;
105 106
106 dma_mask = dma_alloc_coherent_mask(dev, flag); 107 dma_mask = dma_alloc_coherent_mask(dev, flag);
107 108
108 flag |= __GFP_ZERO; 109 flag |= __GFP_ZERO;
109again: 110again:
110 page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); 111 if (!(flag & GFP_ATOMIC))
112 page = dma_alloc_from_contiguous(dev, count, get_order(size));
113 if (!page)
114 page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
111 if (!page) 115 if (!page)
112 return NULL; 116 return NULL;
113 117
@@ -127,6 +131,16 @@ again:
127 return page_address(page); 131 return page_address(page);
128} 132}
129 133
134void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
135 dma_addr_t dma_addr, struct dma_attrs *attrs)
136{
137 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
138 struct page *page = virt_to_page(vaddr);
139
140 if (!dma_release_from_contiguous(dev, page, count))
141 free_pages((unsigned long)vaddr, get_order(size));
142}
143
130/* 144/*
131 * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel 145 * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
132 * parameter documentation. 146 * parameter documentation.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index f96050685b46..871be4a84c7d 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
74 return nents; 74 return nents;
75} 75}
76 76
77static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
78 dma_addr_t dma_addr, struct dma_attrs *attrs)
79{
80 free_pages((unsigned long)vaddr, get_order(size));
81}
82
83static void nommu_sync_single_for_device(struct device *dev, 77static void nommu_sync_single_for_device(struct device *dev,
84 dma_addr_t addr, size_t size, 78 dma_addr_t addr, size_t size,
85 enum dma_data_direction dir) 79 enum dma_data_direction dir)
@@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev,
97 91
98struct dma_map_ops nommu_dma_ops = { 92struct dma_map_ops nommu_dma_ops = {
99 .alloc = dma_generic_alloc_coherent, 93 .alloc = dma_generic_alloc_coherent,
100 .free = nommu_free_coherent, 94 .free = dma_generic_free_coherent,
101 .map_sg = nommu_map_sg, 95 .map_sg = nommu_map_sg,
102 .map_page = nommu_map_page, 96 .map_page = nommu_map_page,
103 .sync_single_for_device = nommu_sync_single_for_device, 97 .sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 366c688d619e..f2afee6a19c1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -49,6 +49,7 @@
49#include <asm/pci-direct.h> 49#include <asm/pci-direct.h>
50#include <linux/init_ohci1394_dma.h> 50#include <linux/init_ohci1394_dma.h>
51#include <linux/kvm_para.h> 51#include <linux/kvm_para.h>
52#include <linux/dma-contiguous.h>
52 53
53#include <linux/errno.h> 54#include <linux/errno.h>
54#include <linux/kernel.h> 55#include <linux/kernel.h>
@@ -925,6 +926,7 @@ void __init setup_arch(char **cmdline_p)
925 } 926 }
926#endif 927#endif
927 memblock.current_limit = get_max_mapped(); 928 memblock.current_limit = get_max_mapped();
929 dma_contiguous_reserve(0);
928 930
929 /* 931 /*
930 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 932 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 9aa618acfe97..9b21469482ae 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER
192 APIs extension; the file's descriptor can then be passed on to other 192 APIs extension; the file's descriptor can then be passed on to other
193 driver. 193 driver.
194 194
195config CMA
196 bool "Contiguous Memory Allocator (EXPERIMENTAL)"
197 depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL
198 select MIGRATION
199 help
200 This enables the Contiguous Memory Allocator which allows drivers
201 to allocate big physically-contiguous blocks of memory for use with
202 hardware components that do not support I/O map nor scatter-gather.
203
204 For more information see <include/linux/dma-contiguous.h>.
205 If unsure, say "n".
206
207if CMA
208
209config CMA_DEBUG
210 bool "CMA debug messages (DEVELOPMENT)"
211 depends on DEBUG_KERNEL
212 help
213 Turns on debug messages in CMA. This produces KERN_DEBUG
214 messages for every CMA call as well as various messages while
215 processing calls such as dma_alloc_from_contiguous().
216 This option does not affect warning and error messages.
217
218comment "Default contiguous memory area size:"
219
220config CMA_SIZE_MBYTES
221 int "Size in Mega Bytes"
222 depends on !CMA_SIZE_SEL_PERCENTAGE
223 default 16
224 help
225 Defines the size (in MiB) of the default memory area for Contiguous
226 Memory Allocator.
227
228config CMA_SIZE_PERCENTAGE
229 int "Percentage of total memory"
230 depends on !CMA_SIZE_SEL_MBYTES
231 default 10
232 help
233 Defines the size of the default memory area for Contiguous Memory
234 Allocator as a percentage of the total memory in the system.
235
236choice
237 prompt "Selected region size"
238 default CMA_SIZE_SEL_ABSOLUTE
239
240config CMA_SIZE_SEL_MBYTES
241 bool "Use mega bytes value only"
242
243config CMA_SIZE_SEL_PERCENTAGE
244 bool "Use percentage value only"
245
246config CMA_SIZE_SEL_MIN
247 bool "Use lower value (minimum)"
248
249config CMA_SIZE_SEL_MAX
250 bool "Use higher value (maximum)"
251
252endchoice
253
254config CMA_ALIGNMENT
255 int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
256 range 4 9
257 default 8
258 help
259 DMA mapping framework by default aligns all buffers to the smallest
260 PAGE_SIZE order which is greater than or equal to the requested buffer
261 size. This works well for buffers up to a few hundreds kilobytes, but
262 for larger buffers it just a memory waste. With this parameter you can
263 specify the maximum PAGE_SIZE order for contiguous buffers. Larger
264 buffers will be aligned only to this specified order. The order is
265 expressed as a power of two multiplied by the PAGE_SIZE.
266
267 For example, if your system defaults to 4KiB pages, the order value
268 of 8 means that the buffers will be aligned up to 1MiB only.
269
270 If unsure, leave the default value "8".
271
272config CMA_AREAS
273 int "Maximum count of the CMA device-private areas"
274 default 7
275 help
276 CMA allows to create CMA areas for particular devices. This parameter
277 sets the maximum number of such device private CMA areas in the
278 system.
279
280 If unsure, leave the default value "7".
281
282endif
283
195endmenu 284endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index b6d1b9c4200c..5aa2d703d19f 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,6 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \
6 attribute_container.o transport_class.o \ 6 attribute_container.o transport_class.o \
7 topology.o 7 topology.o
8obj-$(CONFIG_DEVTMPFS) += devtmpfs.o 8obj-$(CONFIG_DEVTMPFS) += devtmpfs.o
9obj-$(CONFIG_CMA) += dma-contiguous.o
9obj-y += power/ 10obj-y += power/
10obj-$(CONFIG_HAS_DMA) += dma-mapping.o 11obj-$(CONFIG_HAS_DMA) += dma-mapping.o
11obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 12obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index bb0025c510b3..1b85949e3d2f 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -10,6 +10,7 @@
10struct dma_coherent_mem { 10struct dma_coherent_mem {
11 void *virt_base; 11 void *virt_base;
12 dma_addr_t device_base; 12 dma_addr_t device_base;
13 phys_addr_t pfn_base;
13 int size; 14 int size;
14 int flags; 15 int flags;
15 unsigned long *bitmap; 16 unsigned long *bitmap;
@@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
44 45
45 dev->dma_mem->virt_base = mem_base; 46 dev->dma_mem->virt_base = mem_base;
46 dev->dma_mem->device_base = device_addr; 47 dev->dma_mem->device_base = device_addr;
48 dev->dma_mem->pfn_base = PFN_DOWN(bus_addr);
47 dev->dma_mem->size = pages; 49 dev->dma_mem->size = pages;
48 dev->dma_mem->flags = flags; 50 dev->dma_mem->flags = flags;
49 51
@@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
176 return 0; 178 return 0;
177} 179}
178EXPORT_SYMBOL(dma_release_from_coherent); 180EXPORT_SYMBOL(dma_release_from_coherent);
181
182/**
183 * dma_mmap_from_coherent() - try to mmap the memory allocated from
184 * per-device coherent memory pool to userspace
185 * @dev: device from which the memory was allocated
186 * @vma: vm_area for the userspace memory
187 * @vaddr: cpu address returned by dma_alloc_from_coherent
188 * @size: size of the memory buffer allocated by dma_alloc_from_coherent
189 *
190 * This checks whether the memory was allocated from the per-device
191 * coherent memory pool and if so, maps that memory to the provided vma.
192 *
193 * Returns 1 if we correctly mapped the memory, or 0 if
194 * dma_release_coherent() should proceed with mapping memory from
195 * generic pools.
196 */
197int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
198 void *vaddr, size_t size, int *ret)
199{
200 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
201
202 if (mem && vaddr >= mem->virt_base && vaddr + size <=
203 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
204 unsigned long off = vma->vm_pgoff;
205 int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
206 int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
207 int count = size >> PAGE_SHIFT;
208
209 *ret = -ENXIO;
210 if (off < count && user_count <= count - off) {
211 unsigned pfn = mem->pfn_base + start + off;
212 *ret = remap_pfn_range(vma, vma->vm_start, pfn,
213 user_count << PAGE_SHIFT,
214 vma->vm_page_prot);
215 }
216 return 1;
217 }
218 return 0;
219}
220EXPORT_SYMBOL(dma_mmap_from_coherent);
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
new file mode 100644
index 000000000000..78efb0306a44
--- /dev/null
+++ b/drivers/base/dma-contiguous.c
@@ -0,0 +1,401 @@
1/*
2 * Contiguous Memory Allocator for DMA mapping framework
3 * Copyright (c) 2010-2011 by Samsung Electronics.
4 * Written by:
5 * Marek Szyprowski <m.szyprowski@samsung.com>
6 * Michal Nazarewicz <mina86@mina86.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License or (at your optional) any later version of the license.
12 */
13
14#define pr_fmt(fmt) "cma: " fmt
15
16#ifdef CONFIG_CMA_DEBUG
17#ifndef DEBUG
18# define DEBUG
19#endif
20#endif
21
22#include <asm/page.h>
23#include <asm/dma-contiguous.h>
24
25#include <linux/memblock.h>
26#include <linux/err.h>
27#include <linux/mm.h>
28#include <linux/mutex.h>
29#include <linux/page-isolation.h>
30#include <linux/slab.h>
31#include <linux/swap.h>
32#include <linux/mm_types.h>
33#include <linux/dma-contiguous.h>
34
35#ifndef SZ_1M
36#define SZ_1M (1 << 20)
37#endif
38
39struct cma {
40 unsigned long base_pfn;
41 unsigned long count;
42 unsigned long *bitmap;
43};
44
45struct cma *dma_contiguous_default_area;
46
47#ifdef CONFIG_CMA_SIZE_MBYTES
48#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
49#else
50#define CMA_SIZE_MBYTES 0
51#endif
52
53/*
54 * Default global CMA area size can be defined in kernel's .config.
55 * This is usefull mainly for distro maintainers to create a kernel
56 * that works correctly for most supported systems.
57 * The size can be set in bytes or as a percentage of the total memory
58 * in the system.
59 *
60 * Users, who want to set the size of global CMA area for their system
61 * should use cma= kernel parameter.
62 */
63static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M;
64static long size_cmdline = -1;
65
66static int __init early_cma(char *p)
67{
68 pr_debug("%s(%s)\n", __func__, p);
69 size_cmdline = memparse(p, &p);
70 return 0;
71}
72early_param("cma", early_cma);
73
74#ifdef CONFIG_CMA_SIZE_PERCENTAGE
75
76static unsigned long __init __maybe_unused cma_early_percent_memory(void)
77{
78 struct memblock_region *reg;
79 unsigned long total_pages = 0;
80
81 /*
82 * We cannot use memblock_phys_mem_size() here, because
83 * memblock_analyze() has not been called yet.
84 */
85 for_each_memblock(memory, reg)
86 total_pages += memblock_region_memory_end_pfn(reg) -
87 memblock_region_memory_base_pfn(reg);
88
89 return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
90}
91
92#else
93
94static inline __maybe_unused unsigned long cma_early_percent_memory(void)
95{
96 return 0;
97}
98
99#endif
100
101/**
102 * dma_contiguous_reserve() - reserve area for contiguous memory handling
103 * @limit: End address of the reserved memory (optional, 0 for any).
104 *
105 * This function reserves memory from early allocator. It should be
106 * called by arch specific code once the early allocator (memblock or bootmem)
107 * has been activated and all other subsystems have already allocated/reserved
108 * memory.
109 */
110void __init dma_contiguous_reserve(phys_addr_t limit)
111{
112 unsigned long selected_size = 0;
113
114 pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
115
116 if (size_cmdline != -1) {
117 selected_size = size_cmdline;
118 } else {
119#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
120 selected_size = size_bytes;
121#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
122 selected_size = cma_early_percent_memory();
123#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
124 selected_size = min(size_bytes, cma_early_percent_memory());
125#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
126 selected_size = max(size_bytes, cma_early_percent_memory());
127#endif
128 }
129
130 if (selected_size) {
131 pr_debug("%s: reserving %ld MiB for global area\n", __func__,
132 selected_size / SZ_1M);
133
134 dma_declare_contiguous(NULL, selected_size, 0, limit);
135 }
136};
137
138static DEFINE_MUTEX(cma_mutex);
139
140static __init int cma_activate_area(unsigned long base_pfn, unsigned long count)
141{
142 unsigned long pfn = base_pfn;
143 unsigned i = count >> pageblock_order;
144 struct zone *zone;
145
146 WARN_ON_ONCE(!pfn_valid(pfn));
147 zone = page_zone(pfn_to_page(pfn));
148
149 do {
150 unsigned j;
151 base_pfn = pfn;
152 for (j = pageblock_nr_pages; j; --j, pfn++) {
153 WARN_ON_ONCE(!pfn_valid(pfn));
154 if (page_zone(pfn_to_page(pfn)) != zone)
155 return -EINVAL;
156 }
157 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
158 } while (--i);
159 return 0;
160}
161
162static __init struct cma *cma_create_area(unsigned long base_pfn,
163 unsigned long count)
164{
165 int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
166 struct cma *cma;
167 int ret = -ENOMEM;
168
169 pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count);
170
171 cma = kmalloc(sizeof *cma, GFP_KERNEL);
172 if (!cma)
173 return ERR_PTR(-ENOMEM);
174
175 cma->base_pfn = base_pfn;
176 cma->count = count;
177 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
178
179 if (!cma->bitmap)
180 goto no_mem;
181
182 ret = cma_activate_area(base_pfn, count);
183 if (ret)
184 goto error;
185
186 pr_debug("%s: returned %p\n", __func__, (void *)cma);
187 return cma;
188
189error:
190 kfree(cma->bitmap);
191no_mem:
192 kfree(cma);
193 return ERR_PTR(ret);
194}
195
196static struct cma_reserved {
197 phys_addr_t start;
198 unsigned long size;
199 struct device *dev;
200} cma_reserved[MAX_CMA_AREAS] __initdata;
201static unsigned cma_reserved_count __initdata;
202
203static int __init cma_init_reserved_areas(void)
204{
205 struct cma_reserved *r = cma_reserved;
206 unsigned i = cma_reserved_count;
207
208 pr_debug("%s()\n", __func__);
209
210 for (; i; --i, ++r) {
211 struct cma *cma;
212 cma = cma_create_area(PFN_DOWN(r->start),
213 r->size >> PAGE_SHIFT);
214 if (!IS_ERR(cma))
215 dev_set_cma_area(r->dev, cma);
216 }
217 return 0;
218}
219core_initcall(cma_init_reserved_areas);
220
221/**
222 * dma_declare_contiguous() - reserve area for contiguous memory handling
223 * for particular device
224 * @dev: Pointer to device structure.
225 * @size: Size of the reserved memory.
226 * @base: Start address of the reserved memory (optional, 0 for any).
227 * @limit: End address of the reserved memory (optional, 0 for any).
228 *
229 * This function reserves memory for specified device. It should be
230 * called by board specific code when early allocator (memblock or bootmem)
231 * is still activate.
232 */
233int __init dma_declare_contiguous(struct device *dev, unsigned long size,
234 phys_addr_t base, phys_addr_t limit)
235{
236 struct cma_reserved *r = &cma_reserved[cma_reserved_count];
237 unsigned long alignment;
238
239 pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
240 (unsigned long)size, (unsigned long)base,
241 (unsigned long)limit);
242
243 /* Sanity checks */
244 if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) {
245 pr_err("Not enough slots for CMA reserved regions!\n");
246 return -ENOSPC;
247 }
248
249 if (!size)
250 return -EINVAL;
251
252 /* Sanitise input arguments */
253 alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order);
254 base = ALIGN(base, alignment);
255 size = ALIGN(size, alignment);
256 limit &= ~(alignment - 1);
257
258 /* Reserve memory */
259 if (base) {
260 if (memblock_is_region_reserved(base, size) ||
261 memblock_reserve(base, size) < 0) {
262 base = -EBUSY;
263 goto err;
264 }
265 } else {
266 /*
267 * Use __memblock_alloc_base() since
268 * memblock_alloc_base() panic()s.
269 */
270 phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
271 if (!addr) {
272 base = -ENOMEM;
273 goto err;
274 } else if (addr + size > ~(unsigned long)0) {
275 memblock_free(addr, size);
276 base = -EINVAL;
277 goto err;
278 } else {
279 base = addr;
280 }
281 }
282
283 /*
284 * Each reserved area must be initialised later, when more kernel
285 * subsystems (like slab allocator) are available.
286 */
287 r->start = base;
288 r->size = size;
289 r->dev = dev;
290 cma_reserved_count++;
291 pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,
292 (unsigned long)base);
293
294 /* Architecture specific contiguous memory fixup. */
295 dma_contiguous_early_fixup(base, size);
296 return 0;
297err:
298 pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);
299 return base;
300}
301
302/**
303 * dma_alloc_from_contiguous() - allocate pages from contiguous area
304 * @dev: Pointer to device for which the allocation is performed.
305 * @count: Requested number of pages.
306 * @align: Requested alignment of pages (in PAGE_SIZE order).
307 *
308 * This function allocates memory buffer for specified device. It uses
309 * device specific contiguous memory area if available or the default
310 * global one. Requires architecture specific get_dev_cma_area() helper
311 * function.
312 */
313struct page *dma_alloc_from_contiguous(struct device *dev, int count,
314 unsigned int align)
315{
316 unsigned long mask, pfn, pageno, start = 0;
317 struct cma *cma = dev_get_cma_area(dev);
318 int ret;
319
320 if (!cma || !cma->count)
321 return NULL;
322
323 if (align > CONFIG_CMA_ALIGNMENT)
324 align = CONFIG_CMA_ALIGNMENT;
325
326 pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
327 count, align);
328
329 if (!count)
330 return NULL;
331
332 mask = (1 << align) - 1;
333
334 mutex_lock(&cma_mutex);
335
336 for (;;) {
337 pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
338 start, count, mask);
339 if (pageno >= cma->count) {
340 ret = -ENOMEM;
341 goto error;
342 }
343
344 pfn = cma->base_pfn + pageno;
345 ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
346 if (ret == 0) {
347 bitmap_set(cma->bitmap, pageno, count);
348 break;
349 } else if (ret != -EBUSY) {
350 goto error;
351 }
352 pr_debug("%s(): memory range at %p is busy, retrying\n",
353 __func__, pfn_to_page(pfn));
354 /* try again with a bit different memory target */
355 start = pageno + mask + 1;
356 }
357
358 mutex_unlock(&cma_mutex);
359
360 pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn));
361 return pfn_to_page(pfn);
362error:
363 mutex_unlock(&cma_mutex);
364 return NULL;
365}
366
367/**
368 * dma_release_from_contiguous() - release allocated pages
369 * @dev: Pointer to device for which the pages were allocated.
370 * @pages: Allocated pages.
371 * @count: Number of allocated pages.
372 *
373 * This function releases memory allocated by dma_alloc_from_contiguous().
374 * It returns false when provided pages do not belong to contiguous area and
375 * true otherwise.
376 */
377bool dma_release_from_contiguous(struct device *dev, struct page *pages,
378 int count)
379{
380 struct cma *cma = dev_get_cma_area(dev);
381 unsigned long pfn;
382
383 if (!cma || !pages)
384 return false;
385
386 pr_debug("%s(page %p)\n", __func__, (void *)pages);
387
388 pfn = page_to_pfn(pages);
389
390 if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
391 return false;
392
393 VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
394
395 mutex_lock(&cma_mutex);
396 bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
397 free_contig_range(pfn, count);
398 mutex_unlock(&cma_mutex);
399
400 return true;
401}
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h
index 85a3ffaa0242..abfb2682de7f 100644
--- a/include/asm-generic/dma-coherent.h
+++ b/include/asm-generic/dma-coherent.h
@@ -3,13 +3,15 @@
3 3
4#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT 4#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
5/* 5/*
6 * These two functions are only for dma allocator. 6 * These three functions are only for dma allocator.
7 * Don't use them in device drivers. 7 * Don't use them in device drivers.
8 */ 8 */
9int dma_alloc_from_coherent(struct device *dev, ssize_t size, 9int dma_alloc_from_coherent(struct device *dev, ssize_t size,
10 dma_addr_t *dma_handle, void **ret); 10 dma_addr_t *dma_handle, void **ret);
11int dma_release_from_coherent(struct device *dev, int order, void *vaddr); 11int dma_release_from_coherent(struct device *dev, int order, void *vaddr);
12 12
13int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
14 void *cpu_addr, size_t size, int *ret);
13/* 15/*
14 * Standard interface 16 * Standard interface
15 */ 17 */
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h
new file mode 100644
index 000000000000..c544356b374b
--- /dev/null
+++ b/include/asm-generic/dma-contiguous.h
@@ -0,0 +1,28 @@
1#ifndef ASM_DMA_CONTIGUOUS_H
2#define ASM_DMA_CONTIGUOUS_H
3
4#ifdef __KERNEL__
5#ifdef CONFIG_CMA
6
7#include <linux/device.h>
8#include <linux/dma-contiguous.h>
9
10static inline struct cma *dev_get_cma_area(struct device *dev)
11{
12 if (dev && dev->cma_area)
13 return dev->cma_area;
14 return dma_contiguous_default_area;
15}
16
17static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
18{
19 if (dev)
20 dev->cma_area = cma;
21 if (!dev || !dma_contiguous_default_area)
22 dma_contiguous_default_area = cma;
23}
24
25#endif
26#endif
27
28#endif
diff --git a/include/linux/device.h b/include/linux/device.h
index e04f5776f6d0..161d96241b1b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -667,6 +667,10 @@ struct device {
667 667
668 struct dma_coherent_mem *dma_mem; /* internal for coherent mem 668 struct dma_coherent_mem *dma_mem; /* internal for coherent mem
669 override */ 669 override */
670#ifdef CONFIG_CMA
671 struct cma *cma_area; /* contiguous memory area for dma
672 allocations */
673#endif
670 /* arch specific additions */ 674 /* arch specific additions */
671 struct dev_archdata archdata; 675 struct dev_archdata archdata;
672 676
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
new file mode 100644
index 000000000000..2f303e4b7ed3
--- /dev/null
+++ b/include/linux/dma-contiguous.h
@@ -0,0 +1,110 @@
1#ifndef __LINUX_CMA_H
2#define __LINUX_CMA_H
3
4/*
5 * Contiguous Memory Allocator for DMA mapping framework
6 * Copyright (c) 2010-2011 by Samsung Electronics.
7 * Written by:
8 * Marek Szyprowski <m.szyprowski@samsung.com>
9 * Michal Nazarewicz <mina86@mina86.com>
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2 of the
14 * License or (at your optional) any later version of the license.
15 */
16
17/*
18 * Contiguous Memory Allocator
19 *
20 * The Contiguous Memory Allocator (CMA) makes it possible to
21 * allocate big contiguous chunks of memory after the system has
22 * booted.
23 *
24 * Why is it needed?
25 *
26 * Various devices on embedded systems have no scatter-getter and/or
27 * IO map support and require contiguous blocks of memory to
28 * operate. They include devices such as cameras, hardware video
29 * coders, etc.
30 *
31 * Such devices often require big memory buffers (a full HD frame
32 * is, for instance, more then 2 mega pixels large, i.e. more than 6
33 * MB of memory), which makes mechanisms such as kmalloc() or
34 * alloc_page() ineffective.
35 *
36 * At the same time, a solution where a big memory region is
37 * reserved for a device is suboptimal since often more memory is
38 * reserved then strictly required and, moreover, the memory is
39 * inaccessible to page system even if device drivers don't use it.
40 *
41 * CMA tries to solve this issue by operating on memory regions
42 * where only movable pages can be allocated from. This way, kernel
43 * can use the memory for pagecache and when device driver requests
44 * it, allocated pages can be migrated.
45 *
46 * Driver usage
47 *
48 * CMA should not be used by the device drivers directly. It is
49 * only a helper framework for dma-mapping subsystem.
50 *
51 * For more information, see kernel-docs in drivers/base/dma-contiguous.c
52 */
53
54#ifdef __KERNEL__
55
56struct cma;
57struct page;
58struct device;
59
60#ifdef CONFIG_CMA
61
62/*
63 * There is always at least global CMA area and a few optional device
64 * private areas configured in kernel .config.
65 */
66#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
67
68extern struct cma *dma_contiguous_default_area;
69
70void dma_contiguous_reserve(phys_addr_t addr_limit);
71int dma_declare_contiguous(struct device *dev, unsigned long size,
72 phys_addr_t base, phys_addr_t limit);
73
74struct page *dma_alloc_from_contiguous(struct device *dev, int count,
75 unsigned int order);
76bool dma_release_from_contiguous(struct device *dev, struct page *pages,
77 int count);
78
79#else
80
81#define MAX_CMA_AREAS (0)
82
83static inline void dma_contiguous_reserve(phys_addr_t limit) { }
84
85static inline
86int dma_declare_contiguous(struct device *dev, unsigned long size,
87 phys_addr_t base, phys_addr_t limit)
88{
89 return -ENOSYS;
90}
91
92static inline
93struct page *dma_alloc_from_contiguous(struct device *dev, int count,
94 unsigned int order)
95{
96 return NULL;
97}
98
99static inline
100bool dma_release_from_contiguous(struct device *dev, struct page *pages,
101 int count)
102{
103 return false;
104}
105
106#endif
107
108#endif
109
110#endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 581e74b7df95..1e49be49d324 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -391,4 +391,16 @@ static inline bool pm_suspended_storage(void)
391} 391}
392#endif /* CONFIG_PM_SLEEP */ 392#endif /* CONFIG_PM_SLEEP */
393 393
394#ifdef CONFIG_CMA
395
396/* The below functions must be run on a range from a single zone. */
397extern int alloc_contig_range(unsigned long start, unsigned long end,
398 unsigned migratetype);
399extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
400
401/* CMA stuff */
402extern void init_cma_reserved_pageblock(struct page *page);
403
404#endif
405
394#endif /* __LINUX_GFP_H */ 406#endif /* __LINUX_GFP_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 41aa49b74821..4871e31ae277 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,13 +35,39 @@
35 */ 35 */
36#define PAGE_ALLOC_COSTLY_ORDER 3 36#define PAGE_ALLOC_COSTLY_ORDER 3
37 37
38#define MIGRATE_UNMOVABLE 0 38enum {
39#define MIGRATE_RECLAIMABLE 1 39 MIGRATE_UNMOVABLE,
40#define MIGRATE_MOVABLE 2 40 MIGRATE_RECLAIMABLE,
41#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ 41 MIGRATE_MOVABLE,
42#define MIGRATE_RESERVE 3 42 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
43#define MIGRATE_ISOLATE 4 /* can't allocate from here */ 43 MIGRATE_RESERVE = MIGRATE_PCPTYPES,
44#define MIGRATE_TYPES 5 44#ifdef CONFIG_CMA
45 /*
46 * MIGRATE_CMA migration type is designed to mimic the way
47 * ZONE_MOVABLE works. Only movable pages can be allocated
48 * from MIGRATE_CMA pageblocks and page allocator never
49 * implicitly change migration type of MIGRATE_CMA pageblock.
50 *
51 * The way to use it is to change migratetype of a range of
52 * pageblocks to MIGRATE_CMA which can be done by
53 * __free_pageblock_cma() function. What is important though
54 * is that a range of pageblocks must be aligned to
55 * MAX_ORDER_NR_PAGES should biggest page be bigger then
56 * a single pageblock.
57 */
58 MIGRATE_CMA,
59#endif
60 MIGRATE_ISOLATE, /* can't allocate from here */
61 MIGRATE_TYPES
62};
63
64#ifdef CONFIG_CMA
65# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
66# define cma_wmark_pages(zone) zone->min_cma_pages
67#else
68# define is_migrate_cma(migratetype) false
69# define cma_wmark_pages(zone) 0
70#endif
45 71
46#define for_each_migratetype_order(order, type) \ 72#define for_each_migratetype_order(order, type) \
47 for (order = 0; order < MAX_ORDER; order++) \ 73 for (order = 0; order < MAX_ORDER; order++) \
@@ -347,6 +373,13 @@ struct zone {
347 /* see spanned/present_pages for more description */ 373 /* see spanned/present_pages for more description */
348 seqlock_t span_seqlock; 374 seqlock_t span_seqlock;
349#endif 375#endif
376#ifdef CONFIG_CMA
377 /*
378 * CMA needs to increase watermark levels during the allocation
379 * process to make sure that the system is not starved.
380 */
381 unsigned long min_cma_pages;
382#endif
350 struct free_area free_area[MAX_ORDER]; 383 struct free_area free_area[MAX_ORDER];
351 384
352#ifndef CONFIG_SPARSEMEM 385#ifndef CONFIG_SPARSEMEM
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 051c1b1ede4e..3bdcab30ca41 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -3,7 +3,7 @@
3 3
4/* 4/*
5 * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. 5 * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
6 * If specified range includes migrate types other than MOVABLE, 6 * If specified range includes migrate types other than MOVABLE or CMA,
7 * this will fail with -EBUSY. 7 * this will fail with -EBUSY.
8 * 8 *
9 * For isolating all pages in the range finally, the caller have to 9 * For isolating all pages in the range finally, the caller have to
@@ -11,27 +11,27 @@
11 * test it. 11 * test it.
12 */ 12 */
13extern int 13extern int
14start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); 14start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
15 unsigned migratetype);
15 16
16/* 17/*
17 * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. 18 * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
18 * target range is [start_pfn, end_pfn) 19 * target range is [start_pfn, end_pfn)
19 */ 20 */
20extern int 21extern int
21undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); 22undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
23 unsigned migratetype);
22 24
23/* 25/*
24 * test all pages in [start_pfn, end_pfn)are isolated or not. 26 * Test all pages in [start_pfn, end_pfn) are isolated or not.
25 */ 27 */
26extern int 28int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
27test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
28 29
29/* 30/*
30 * Internal funcs.Changes pageblock's migrate type. 31 * Internal functions. Changes pageblock's migrate type.
31 * Please use make_pagetype_isolated()/make_pagetype_movable().
32 */ 32 */
33extern int set_migratetype_isolate(struct page *page); 33extern int set_migratetype_isolate(struct page *page);
34extern void unset_migratetype_isolate(struct page *page); 34extern void unset_migratetype_isolate(struct page *page, unsigned migratetype);
35 35
36 36
37#endif 37#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index e338407f1225..39220026c797 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -198,7 +198,7 @@ config COMPACTION
198config MIGRATION 198config MIGRATION
199 bool "Page migration" 199 bool "Page migration"
200 def_bool y 200 def_bool y
201 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION 201 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA
202 help 202 help
203 Allows the migration of the physical location of pages of processes 203 Allows the migration of the physical location of pages of processes
204 while the virtual addresses are not changed. This is useful in 204 while the virtual addresses are not changed. This is useful in
diff --git a/mm/Makefile b/mm/Makefile
index 50ec00ef2a0e..8aada89efbbb 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
13 readahead.o swap.o truncate.o vmscan.o shmem.o \ 13 readahead.o swap.o truncate.o vmscan.o shmem.o \
14 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 14 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
15 page_isolation.o mm_init.o mmu_context.o percpu.o \ 15 page_isolation.o mm_init.o mmu_context.o percpu.o \
16 $(mmu-y) 16 compaction.o $(mmu-y)
17obj-y += init-mm.o 17obj-y += init-mm.o
18 18
19ifdef CONFIG_NO_BOOTMEM 19ifdef CONFIG_NO_BOOTMEM
@@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o
32obj-$(CONFIG_SPARSEMEM) += sparse.o 32obj-$(CONFIG_SPARSEMEM) += sparse.o
33obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o 33obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
34obj-$(CONFIG_SLOB) += slob.o 34obj-$(CONFIG_SLOB) += slob.o
35obj-$(CONFIG_COMPACTION) += compaction.o
36obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 35obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
37obj-$(CONFIG_KSM) += ksm.o 36obj-$(CONFIG_KSM) += ksm.o
38obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o 37obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 74a8c825ff28..da7d35ea5103 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,30 +16,11 @@
16#include <linux/sysfs.h> 16#include <linux/sysfs.h>
17#include "internal.h" 17#include "internal.h"
18 18
19#if defined CONFIG_COMPACTION || defined CONFIG_CMA
20
19#define CREATE_TRACE_POINTS 21#define CREATE_TRACE_POINTS
20#include <trace/events/compaction.h> 22#include <trace/events/compaction.h>
21 23
22/*
23 * compact_control is used to track pages being migrated and the free pages
24 * they are being migrated to during memory compaction. The free_pfn starts
25 * at the end of a zone and migrate_pfn begins at the start. Movable pages
26 * are moved to the end of a zone during a compaction run and the run
27 * completes when free_pfn <= migrate_pfn
28 */
29struct compact_control {
30 struct list_head freepages; /* List of free pages to migrate to */
31 struct list_head migratepages; /* List of pages being migrated */
32 unsigned long nr_freepages; /* Number of isolated free pages */
33 unsigned long nr_migratepages; /* Number of pages to migrate */
34 unsigned long free_pfn; /* isolate_freepages search base */
35 unsigned long migrate_pfn; /* isolate_migratepages search base */
36 bool sync; /* Synchronous migration */
37
38 int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */
40 struct zone *zone;
41};
42
43static unsigned long release_freepages(struct list_head *freelist) 24static unsigned long release_freepages(struct list_head *freelist)
44{ 25{
45 struct page *page, *next; 26 struct page *page, *next;
@@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist)
54 return count; 35 return count;
55} 36}
56 37
57/* Isolate free pages onto a private freelist. Must hold zone->lock */ 38static void map_pages(struct list_head *list)
58static unsigned long isolate_freepages_block(struct zone *zone, 39{
59 unsigned long blockpfn, 40 struct page *page;
60 struct list_head *freelist) 41
42 list_for_each_entry(page, list, lru) {
43 arch_alloc_page(page, 0);
44 kernel_map_pages(page, 1, 1);
45 }
46}
47
48static inline bool migrate_async_suitable(int migratetype)
49{
50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
51}
52
53/*
54 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
55 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
56 * pages inside of the pageblock (even though it may still end up isolating
57 * some pages).
58 */
59static unsigned long isolate_freepages_block(unsigned long blockpfn,
60 unsigned long end_pfn,
61 struct list_head *freelist,
62 bool strict)
61{ 63{
62 unsigned long zone_end_pfn, end_pfn;
63 int nr_scanned = 0, total_isolated = 0; 64 int nr_scanned = 0, total_isolated = 0;
64 struct page *cursor; 65 struct page *cursor;
65 66
66 /* Get the last PFN we should scan for free pages at */
67 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
68 end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
69
70 /* Find the first usable PFN in the block to initialse page cursor */
71 for (; blockpfn < end_pfn; blockpfn++) {
72 if (pfn_valid_within(blockpfn))
73 break;
74 }
75 cursor = pfn_to_page(blockpfn); 67 cursor = pfn_to_page(blockpfn);
76 68
77 /* Isolate free pages. This assumes the block is valid */ 69 /* Isolate free pages. This assumes the block is valid */
@@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone,
79 int isolated, i; 71 int isolated, i;
80 struct page *page = cursor; 72 struct page *page = cursor;
81 73
82 if (!pfn_valid_within(blockpfn)) 74 if (!pfn_valid_within(blockpfn)) {
75 if (strict)
76 return 0;
83 continue; 77 continue;
78 }
84 nr_scanned++; 79 nr_scanned++;
85 80
86 if (!PageBuddy(page)) 81 if (!PageBuddy(page)) {
82 if (strict)
83 return 0;
87 continue; 84 continue;
85 }
88 86
89 /* Found a free page, break it into order-0 pages */ 87 /* Found a free page, break it into order-0 pages */
90 isolated = split_free_page(page); 88 isolated = split_free_page(page);
89 if (!isolated && strict)
90 return 0;
91 total_isolated += isolated; 91 total_isolated += isolated;
92 for (i = 0; i < isolated; i++) { 92 for (i = 0; i < isolated; i++) {
93 list_add(&page->lru, freelist); 93 list_add(&page->lru, freelist);
@@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone,
105 return total_isolated; 105 return total_isolated;
106} 106}
107 107
108/* Returns true if the page is within a block suitable for migration to */ 108/**
109static bool suitable_migration_target(struct page *page) 109 * isolate_freepages_range() - isolate free pages.
110{ 110 * @start_pfn: The first PFN to start isolating.
111 111 * @end_pfn: The one-past-last PFN.
112 int migratetype = get_pageblock_migratetype(page); 112 *
113 113 * Non-free pages, invalid PFNs, or zone boundaries within the
114 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 114 * [start_pfn, end_pfn) range are considered errors, cause function to
115 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 115 * undo its actions and return zero.
116 return false; 116 *
117 117 * Otherwise, function returns one-past-the-last PFN of isolated page
118 /* If the page is a large free page, then allow migration */ 118 * (which may be greater then end_pfn if end fell in a middle of
119 if (PageBuddy(page) && page_order(page) >= pageblock_order) 119 * a free page).
120 return true;
121
122 /* If the block is MIGRATE_MOVABLE, allow migration */
123 if (migratetype == MIGRATE_MOVABLE)
124 return true;
125
126 /* Otherwise skip the block */
127 return false;
128}
129
130/*
131 * Based on information in the current compact_control, find blocks
132 * suitable for isolating free pages from and then isolate them.
133 */ 120 */
134static void isolate_freepages(struct zone *zone, 121unsigned long
135 struct compact_control *cc) 122isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
136{ 123{
137 struct page *page; 124 unsigned long isolated, pfn, block_end_pfn, flags;
138 unsigned long high_pfn, low_pfn, pfn; 125 struct zone *zone = NULL;
139 unsigned long flags; 126 LIST_HEAD(freelist);
140 int nr_freepages = cc->nr_freepages;
141 struct list_head *freelist = &cc->freepages;
142
143 /*
144 * Initialise the free scanner. The starting point is where we last
145 * scanned from (or the end of the zone if starting). The low point
146 * is the end of the pageblock the migration scanner is using.
147 */
148 pfn = cc->free_pfn;
149 low_pfn = cc->migrate_pfn + pageblock_nr_pages;
150 127
151 /* 128 if (pfn_valid(start_pfn))
152 * Take care that if the migration scanner is at the end of the zone 129 zone = page_zone(pfn_to_page(start_pfn));
153 * that the free scanner does not accidentally move to the next zone
154 * in the next isolation cycle.
155 */
156 high_pfn = min(low_pfn, pfn);
157
158 /*
159 * Isolate free pages until enough are available to migrate the
160 * pages on cc->migratepages. We stop searching if the migrate
161 * and free page scanners meet or enough free pages are isolated.
162 */
163 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
164 pfn -= pageblock_nr_pages) {
165 unsigned long isolated;
166 130
167 if (!pfn_valid(pfn)) 131 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
168 continue; 132 if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn)))
133 break;
169 134
170 /* 135 /*
171 * Check for overlapping nodes/zones. It's possible on some 136 * On subsequent iterations ALIGN() is actually not needed,
172 * configurations to have a setup like 137 * but we keep it that we not to complicate the code.
173 * node0 node1 node0
174 * i.e. it's possible that all pages within a zones range of
175 * pages do not belong to a single zone.
176 */ 138 */
177 page = pfn_to_page(pfn); 139 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
178 if (page_zone(page) != zone) 140 block_end_pfn = min(block_end_pfn, end_pfn);
179 continue;
180 141
181 /* Check the block is suitable for migration */ 142 spin_lock_irqsave(&zone->lock, flags);
182 if (!suitable_migration_target(page)) 143 isolated = isolate_freepages_block(pfn, block_end_pfn,
183 continue; 144 &freelist, true);
145 spin_unlock_irqrestore(&zone->lock, flags);
184 146
185 /* 147 /*
186 * Found a block suitable for isolating free pages from. Now 148 * In strict mode, isolate_freepages_block() returns 0 if
187 * we disabled interrupts, double check things are ok and 149 * there are any holes in the block (ie. invalid PFNs or
188 * isolate the pages. This is to minimise the time IRQs 150 * non-free pages).
189 * are disabled
190 */ 151 */
191 isolated = 0; 152 if (!isolated)
192 spin_lock_irqsave(&zone->lock, flags); 153 break;
193 if (suitable_migration_target(page)) {
194 isolated = isolate_freepages_block(zone, pfn, freelist);
195 nr_freepages += isolated;
196 }
197 spin_unlock_irqrestore(&zone->lock, flags);
198 154
199 /* 155 /*
200 * Record the highest PFN we isolated pages from. When next 156 * If we managed to isolate pages, it is always (1 << n) *
201 * looking for free pages, the search will restart here as 157 * pageblock_nr_pages for some non-negative n. (Max order
202 * page migration may have returned some pages to the allocator 158 * page may span two pageblocks).
203 */ 159 */
204 if (isolated)
205 high_pfn = max(high_pfn, pfn);
206 } 160 }
207 161
208 /* split_free_page does not map the pages */ 162 /* split_free_page does not map the pages */
209 list_for_each_entry(page, freelist, lru) { 163 map_pages(&freelist);
210 arch_alloc_page(page, 0); 164
211 kernel_map_pages(page, 1, 1); 165 if (pfn < end_pfn) {
166 /* Loop terminated early, cleanup. */
167 release_freepages(&freelist);
168 return 0;
212 } 169 }
213 170
214 cc->free_pfn = high_pfn; 171 /* We don't use freelists for anything. */
215 cc->nr_freepages = nr_freepages; 172 return pfn;
216} 173}
217 174
218/* Update the number of anon and file isolated pages in the zone */ 175/* Update the number of anon and file isolated pages in the zone */
@@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone)
243 return isolated > (inactive + active) / 2; 200 return isolated > (inactive + active) / 2;
244} 201}
245 202
246/* possible outcome of isolate_migratepages */ 203/**
247typedef enum { 204 * isolate_migratepages_range() - isolate all migrate-able pages in range.
248 ISOLATE_ABORT, /* Abort compaction now */ 205 * @zone: Zone pages are in.
249 ISOLATE_NONE, /* No pages isolated, continue scanning */ 206 * @cc: Compaction control structure.
250 ISOLATE_SUCCESS, /* Pages isolated, migrate */ 207 * @low_pfn: The first PFN of the range.
251} isolate_migrate_t; 208 * @end_pfn: The one-past-the-last PFN of the range.
252 209 *
253/* 210 * Isolate all pages that can be migrated from the range specified by
254 * Isolate all pages that can be migrated from the block pointed to by 211 * [low_pfn, end_pfn). Returns zero if there is a fatal signal
255 * the migrate scanner within compact_control. 212 * pending), otherwise PFN of the first page that was not scanned
213 * (which may be both less, equal to or more then end_pfn).
214 *
215 * Assumes that cc->migratepages is empty and cc->nr_migratepages is
216 * zero.
217 *
218 * Apart from cc->migratepages and cc->nr_migratetypes this function
219 * does not modify any cc's fields, in particular it does not modify
220 * (or read for that matter) cc->migrate_pfn.
256 */ 221 */
257static isolate_migrate_t isolate_migratepages(struct zone *zone, 222unsigned long
258 struct compact_control *cc) 223isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
224 unsigned long low_pfn, unsigned long end_pfn)
259{ 225{
260 unsigned long low_pfn, end_pfn;
261 unsigned long last_pageblock_nr = 0, pageblock_nr; 226 unsigned long last_pageblock_nr = 0, pageblock_nr;
262 unsigned long nr_scanned = 0, nr_isolated = 0; 227 unsigned long nr_scanned = 0, nr_isolated = 0;
263 struct list_head *migratelist = &cc->migratepages; 228 struct list_head *migratelist = &cc->migratepages;
264 isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; 229 isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE;
265 230
266 /* Do not scan outside zone boundaries */
267 low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
268
269 /* Only scan within a pageblock boundary */
270 end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
271
272 /* Do not cross the free scanner or scan within a memory hole */
273 if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
274 cc->migrate_pfn = end_pfn;
275 return ISOLATE_NONE;
276 }
277
278 /* 231 /*
279 * Ensure that there are not too many pages isolated from the LRU 232 * Ensure that there are not too many pages isolated from the LRU
280 * list by either parallel reclaimers or compaction. If there are, 233 * list by either parallel reclaimers or compaction. If there are,
@@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
283 while (unlikely(too_many_isolated(zone))) { 236 while (unlikely(too_many_isolated(zone))) {
284 /* async migration should just abort */ 237 /* async migration should just abort */
285 if (!cc->sync) 238 if (!cc->sync)
286 return ISOLATE_ABORT; 239 return 0;
287 240
288 congestion_wait(BLK_RW_ASYNC, HZ/10); 241 congestion_wait(BLK_RW_ASYNC, HZ/10);
289 242
290 if (fatal_signal_pending(current)) 243 if (fatal_signal_pending(current))
291 return ISOLATE_ABORT; 244 return 0;
292 } 245 }
293 246
294 /* Time to isolate some pages for migration */ 247 /* Time to isolate some pages for migration */
@@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
351 */ 304 */
352 pageblock_nr = low_pfn >> pageblock_order; 305 pageblock_nr = low_pfn >> pageblock_order;
353 if (!cc->sync && last_pageblock_nr != pageblock_nr && 306 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
354 get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { 307 !migrate_async_suitable(get_pageblock_migratetype(page))) {
355 low_pfn += pageblock_nr_pages; 308 low_pfn += pageblock_nr_pages;
356 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 309 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
357 last_pageblock_nr = pageblock_nr; 310 last_pageblock_nr = pageblock_nr;
@@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
396 acct_isolated(zone, cc); 349 acct_isolated(zone, cc);
397 350
398 spin_unlock_irq(&zone->lru_lock); 351 spin_unlock_irq(&zone->lru_lock);
399 cc->migrate_pfn = low_pfn;
400 352
401 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 353 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
402 354
403 return ISOLATE_SUCCESS; 355 return low_pfn;
356}
357
358#endif /* CONFIG_COMPACTION || CONFIG_CMA */
359#ifdef CONFIG_COMPACTION
360
361/* Returns true if the page is within a block suitable for migration to */
362static bool suitable_migration_target(struct page *page)
363{
364
365 int migratetype = get_pageblock_migratetype(page);
366
367 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
368 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
369 return false;
370
371 /* If the page is a large free page, then allow migration */
372 if (PageBuddy(page) && page_order(page) >= pageblock_order)
373 return true;
374
375 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
376 if (migrate_async_suitable(migratetype))
377 return true;
378
379 /* Otherwise skip the block */
380 return false;
381}
382
383/*
384 * Based on information in the current compact_control, find blocks
385 * suitable for isolating free pages from and then isolate them.
386 */
387static void isolate_freepages(struct zone *zone,
388 struct compact_control *cc)
389{
390 struct page *page;
391 unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
392 unsigned long flags;
393 int nr_freepages = cc->nr_freepages;
394 struct list_head *freelist = &cc->freepages;
395
396 /*
397 * Initialise the free scanner. The starting point is where we last
398 * scanned from (or the end of the zone if starting). The low point
399 * is the end of the pageblock the migration scanner is using.
400 */
401 pfn = cc->free_pfn;
402 low_pfn = cc->migrate_pfn + pageblock_nr_pages;
403
404 /*
405 * Take care that if the migration scanner is at the end of the zone
406 * that the free scanner does not accidentally move to the next zone
407 * in the next isolation cycle.
408 */
409 high_pfn = min(low_pfn, pfn);
410
411 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
412
413 /*
414 * Isolate free pages until enough are available to migrate the
415 * pages on cc->migratepages. We stop searching if the migrate
416 * and free page scanners meet or enough free pages are isolated.
417 */
418 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
419 pfn -= pageblock_nr_pages) {
420 unsigned long isolated;
421
422 if (!pfn_valid(pfn))
423 continue;
424
425 /*
426 * Check for overlapping nodes/zones. It's possible on some
427 * configurations to have a setup like
428 * node0 node1 node0
429 * i.e. it's possible that all pages within a zones range of
430 * pages do not belong to a single zone.
431 */
432 page = pfn_to_page(pfn);
433 if (page_zone(page) != zone)
434 continue;
435
436 /* Check the block is suitable for migration */
437 if (!suitable_migration_target(page))
438 continue;
439
440 /*
441 * Found a block suitable for isolating free pages from. Now
442 * we disabled interrupts, double check things are ok and
443 * isolate the pages. This is to minimise the time IRQs
444 * are disabled
445 */
446 isolated = 0;
447 spin_lock_irqsave(&zone->lock, flags);
448 if (suitable_migration_target(page)) {
449 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
450 isolated = isolate_freepages_block(pfn, end_pfn,
451 freelist, false);
452 nr_freepages += isolated;
453 }
454 spin_unlock_irqrestore(&zone->lock, flags);
455
456 /*
457 * Record the highest PFN we isolated pages from. When next
458 * looking for free pages, the search will restart here as
459 * page migration may have returned some pages to the allocator
460 */
461 if (isolated)
462 high_pfn = max(high_pfn, pfn);
463 }
464
465 /* split_free_page does not map the pages */
466 map_pages(freelist);
467
468 cc->free_pfn = high_pfn;
469 cc->nr_freepages = nr_freepages;
404} 470}
405 471
406/* 472/*
@@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc)
449 cc->nr_freepages = nr_freepages; 515 cc->nr_freepages = nr_freepages;
450} 516}
451 517
518/* possible outcome of isolate_migratepages */
519typedef enum {
520 ISOLATE_ABORT, /* Abort compaction now */
521 ISOLATE_NONE, /* No pages isolated, continue scanning */
522 ISOLATE_SUCCESS, /* Pages isolated, migrate */
523} isolate_migrate_t;
524
525/*
526 * Isolate all pages that can be migrated from the block pointed to by
527 * the migrate scanner within compact_control.
528 */
529static isolate_migrate_t isolate_migratepages(struct zone *zone,
530 struct compact_control *cc)
531{
532 unsigned long low_pfn, end_pfn;
533
534 /* Do not scan outside zone boundaries */
535 low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
536
537 /* Only scan within a pageblock boundary */
538 end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
539
540 /* Do not cross the free scanner or scan within a memory hole */
541 if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
542 cc->migrate_pfn = end_pfn;
543 return ISOLATE_NONE;
544 }
545
546 /* Perform the isolation */
547 low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn);
548 if (!low_pfn)
549 return ISOLATE_ABORT;
550
551 cc->migrate_pfn = low_pfn;
552
553 return ISOLATE_SUCCESS;
554}
555
452static int compact_finished(struct zone *zone, 556static int compact_finished(struct zone *zone,
453 struct compact_control *cc) 557 struct compact_control *cc)
454{ 558{
@@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node)
795 return device_remove_file(&node->dev, &dev_attr_compact); 899 return device_remove_file(&node->dev, &dev_attr_compact);
796} 900}
797#endif /* CONFIG_SYSFS && CONFIG_NUMA */ 901#endif /* CONFIG_SYSFS && CONFIG_NUMA */
902
903#endif /* CONFIG_COMPACTION */
diff --git a/mm/internal.h b/mm/internal.h
index 2189af491783..aee4761cf9a9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order);
100extern bool is_free_buddy_page(struct page *page); 100extern bool is_free_buddy_page(struct page *page);
101#endif 101#endif
102 102
103#if defined CONFIG_COMPACTION || defined CONFIG_CMA
104
105/*
106 * in mm/compaction.c
107 */
108/*
109 * compact_control is used to track pages being migrated and the free pages
110 * they are being migrated to during memory compaction. The free_pfn starts
111 * at the end of a zone and migrate_pfn begins at the start. Movable pages
112 * are moved to the end of a zone during a compaction run and the run
113 * completes when free_pfn <= migrate_pfn
114 */
115struct compact_control {
116 struct list_head freepages; /* List of free pages to migrate to */
117 struct list_head migratepages; /* List of pages being migrated */
118 unsigned long nr_freepages; /* Number of isolated free pages */
119 unsigned long nr_migratepages; /* Number of pages to migrate */
120 unsigned long free_pfn; /* isolate_freepages search base */
121 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 bool sync; /* Synchronous migration */
123
124 int order; /* order a direct compactor needs */
125 int migratetype; /* MOVABLE, RECLAIMABLE etc */
126 struct zone *zone;
127};
128
129unsigned long
130isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn);
131unsigned long
132isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
133 unsigned long low_pfn, unsigned long end_pfn);
134
135#endif
103 136
104/* 137/*
105 * function for dealing with page's order in buddy system. 138 * function for dealing with page's order in buddy system.
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 97cc2733551a..c99ad4e6b88c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1404 /* Not a free page */ 1404 /* Not a free page */
1405 ret = 1; 1405 ret = 1;
1406 } 1406 }
1407 unset_migratetype_isolate(p); 1407 unset_migratetype_isolate(p, MIGRATE_MOVABLE);
1408 unlock_memory_hotplug(); 1408 unlock_memory_hotplug();
1409 return ret; 1409 return ret;
1410} 1410}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6629fafd6ce4..fc898cb4fe8f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn,
891 nr_pages = end_pfn - start_pfn; 891 nr_pages = end_pfn - start_pfn;
892 892
893 /* set above range as isolated */ 893 /* set above range as isolated */
894 ret = start_isolate_page_range(start_pfn, end_pfn); 894 ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
895 if (ret) 895 if (ret)
896 goto out; 896 goto out;
897 897
@@ -956,7 +956,7 @@ repeat:
956 We cannot do rollback at this point. */ 956 We cannot do rollback at this point. */
957 offline_isolated_pages(start_pfn, end_pfn); 957 offline_isolated_pages(start_pfn, end_pfn);
958 /* reset pagetype flags and makes migrate type to be MOVABLE */ 958 /* reset pagetype flags and makes migrate type to be MOVABLE */
959 undo_isolate_page_range(start_pfn, end_pfn); 959 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
960 /* removal success */ 960 /* removal success */
961 zone->present_pages -= offlined_pages; 961 zone->present_pages -= offlined_pages;
962 zone->zone_pgdat->node_present_pages -= offlined_pages; 962 zone->zone_pgdat->node_present_pages -= offlined_pages;
@@ -981,7 +981,7 @@ failed_removal:
981 start_pfn, end_pfn); 981 start_pfn, end_pfn);
982 memory_notify(MEM_CANCEL_OFFLINE, &arg); 982 memory_notify(MEM_CANCEL_OFFLINE, &arg);
983 /* pushback to free area */ 983 /* pushback to free area */
984 undo_isolate_page_range(start_pfn, end_pfn); 984 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
985 985
986out: 986out:
987 unlock_memory_hotplug(); 987 unlock_memory_hotplug();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1851df600438..bab8e3bc4202 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
57#include <linux/ftrace_event.h> 57#include <linux/ftrace_event.h>
58#include <linux/memcontrol.h> 58#include <linux/memcontrol.h>
59#include <linux/prefetch.h> 59#include <linux/prefetch.h>
60#include <linux/migrate.h>
60#include <linux/page-debug-flags.h> 61#include <linux/page-debug-flags.h>
61 62
62#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
@@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
513 * free pages of length of (1 << order) and marked with _mapcount -2. Page's 514 * free pages of length of (1 << order) and marked with _mapcount -2. Page's
514 * order is recorded in page_private(page) field. 515 * order is recorded in page_private(page) field.
515 * So when we are allocating or freeing one, we can derive the state of the 516 * So when we are allocating or freeing one, we can derive the state of the
516 * other. That is, if we allocate a small block, and both were 517 * other. That is, if we allocate a small block, and both were
517 * free, the remainder of the region must be split into blocks. 518 * free, the remainder of the region must be split into blocks.
518 * If a block is freed, and its buddy is also free, then this 519 * If a block is freed, and its buddy is also free, then this
519 * triggers coalescing into a block of larger size. 520 * triggers coalescing into a block of larger size.
520 * 521 *
521 * -- wli 522 * -- wli
522 */ 523 */
@@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
749 __free_pages(page, order); 750 __free_pages(page, order);
750} 751}
751 752
753#ifdef CONFIG_CMA
754/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
755void __init init_cma_reserved_pageblock(struct page *page)
756{
757 unsigned i = pageblock_nr_pages;
758 struct page *p = page;
759
760 do {
761 __ClearPageReserved(p);
762 set_page_count(p, 0);
763 } while (++p, --i);
764
765 set_page_refcounted(page);
766 set_pageblock_migratetype(page, MIGRATE_CMA);
767 __free_pages(page, pageblock_order);
768 totalram_pages += pageblock_nr_pages;
769}
770#endif
752 771
753/* 772/*
754 * The order of subdivision here is critical for the IO subsystem. 773 * The order of subdivision here is critical for the IO subsystem.
@@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
874 * This array describes the order lists are fallen back to when 893 * This array describes the order lists are fallen back to when
875 * the free lists for the desirable migrate type are depleted 894 * the free lists for the desirable migrate type are depleted
876 */ 895 */
877static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { 896static int fallbacks[MIGRATE_TYPES][4] = {
878 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 897 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
879 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 898 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
880 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, 899#ifdef CONFIG_CMA
881 [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ 900 [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
901 [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */
902#else
903 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
904#endif
905 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
906 [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */
882}; 907};
883 908
884/* 909/*
@@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
973 /* Find the largest possible block of pages in the other list */ 998 /* Find the largest possible block of pages in the other list */
974 for (current_order = MAX_ORDER-1; current_order >= order; 999 for (current_order = MAX_ORDER-1; current_order >= order;
975 --current_order) { 1000 --current_order) {
976 for (i = 0; i < MIGRATE_TYPES - 1; i++) { 1001 for (i = 0;; i++) {
977 migratetype = fallbacks[start_migratetype][i]; 1002 migratetype = fallbacks[start_migratetype][i];
978 1003
979 /* MIGRATE_RESERVE handled later if necessary */ 1004 /* MIGRATE_RESERVE handled later if necessary */
980 if (migratetype == MIGRATE_RESERVE) 1005 if (migratetype == MIGRATE_RESERVE)
981 continue; 1006 break;
982 1007
983 area = &(zone->free_area[current_order]); 1008 area = &(zone->free_area[current_order]);
984 if (list_empty(&area->free_list[migratetype])) 1009 if (list_empty(&area->free_list[migratetype]))
@@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
993 * pages to the preferred allocation list. If falling 1018 * pages to the preferred allocation list. If falling
994 * back for a reclaimable kernel allocation, be more 1019 * back for a reclaimable kernel allocation, be more
995 * aggressive about taking ownership of free pages 1020 * aggressive about taking ownership of free pages
1021 *
1022 * On the other hand, never change migration
1023 * type of MIGRATE_CMA pageblocks nor move CMA
1024 * pages on different free lists. We don't
1025 * want unmovable pages to be allocated from
1026 * MIGRATE_CMA areas.
996 */ 1027 */
997 if (unlikely(current_order >= (pageblock_order >> 1)) || 1028 if (!is_migrate_cma(migratetype) &&
998 start_migratetype == MIGRATE_RECLAIMABLE || 1029 (unlikely(current_order >= pageblock_order / 2) ||
999 page_group_by_mobility_disabled) { 1030 start_migratetype == MIGRATE_RECLAIMABLE ||
1000 unsigned long pages; 1031 page_group_by_mobility_disabled)) {
1032 int pages;
1001 pages = move_freepages_block(zone, page, 1033 pages = move_freepages_block(zone, page,
1002 start_migratetype); 1034 start_migratetype);
1003 1035
@@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
1015 rmv_page_order(page); 1047 rmv_page_order(page);
1016 1048
1017 /* Take ownership for orders >= pageblock_order */ 1049 /* Take ownership for orders >= pageblock_order */
1018 if (current_order >= pageblock_order) 1050 if (current_order >= pageblock_order &&
1051 !is_migrate_cma(migratetype))
1019 change_pageblock_range(page, current_order, 1052 change_pageblock_range(page, current_order,
1020 start_migratetype); 1053 start_migratetype);
1021 1054
1022 expand(zone, page, order, current_order, area, migratetype); 1055 expand(zone, page, order, current_order, area,
1056 is_migrate_cma(migratetype)
1057 ? migratetype : start_migratetype);
1023 1058
1024 trace_mm_page_alloc_extfrag(page, order, current_order, 1059 trace_mm_page_alloc_extfrag(page, order, current_order,
1025 start_migratetype, migratetype); 1060 start_migratetype, migratetype);
@@ -1061,17 +1096,17 @@ retry_reserve:
1061 return page; 1096 return page;
1062} 1097}
1063 1098
1064/* 1099/*
1065 * Obtain a specified number of elements from the buddy allocator, all under 1100 * Obtain a specified number of elements from the buddy allocator, all under
1066 * a single hold of the lock, for efficiency. Add them to the supplied list. 1101 * a single hold of the lock, for efficiency. Add them to the supplied list.
1067 * Returns the number of new pages which were placed at *list. 1102 * Returns the number of new pages which were placed at *list.
1068 */ 1103 */
1069static int rmqueue_bulk(struct zone *zone, unsigned int order, 1104static int rmqueue_bulk(struct zone *zone, unsigned int order,
1070 unsigned long count, struct list_head *list, 1105 unsigned long count, struct list_head *list,
1071 int migratetype, int cold) 1106 int migratetype, int cold)
1072{ 1107{
1073 int i; 1108 int mt = migratetype, i;
1074 1109
1075 spin_lock(&zone->lock); 1110 spin_lock(&zone->lock);
1076 for (i = 0; i < count; ++i) { 1111 for (i = 0; i < count; ++i) {
1077 struct page *page = __rmqueue(zone, order, migratetype); 1112 struct page *page = __rmqueue(zone, order, migratetype);
@@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1091 list_add(&page->lru, list); 1126 list_add(&page->lru, list);
1092 else 1127 else
1093 list_add_tail(&page->lru, list); 1128 list_add_tail(&page->lru, list);
1094 set_page_private(page, migratetype); 1129 if (IS_ENABLED(CONFIG_CMA)) {
1130 mt = get_pageblock_migratetype(page);
1131 if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
1132 mt = migratetype;
1133 }
1134 set_page_private(page, mt);
1095 list = &page->lru; 1135 list = &page->lru;
1096 } 1136 }
1097 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); 1137 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
@@ -1371,8 +1411,12 @@ int split_free_page(struct page *page)
1371 1411
1372 if (order >= pageblock_order - 1) { 1412 if (order >= pageblock_order - 1) {
1373 struct page *endpage = page + (1 << order) - 1; 1413 struct page *endpage = page + (1 << order) - 1;
1374 for (; page < endpage; page += pageblock_nr_pages) 1414 for (; page < endpage; page += pageblock_nr_pages) {
1375 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 1415 int mt = get_pageblock_migratetype(page);
1416 if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
1417 set_pageblock_migratetype(page,
1418 MIGRATE_MOVABLE);
1419 }
1376 } 1420 }
1377 1421
1378 return 1 << order; 1422 return 1 << order;
@@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2086} 2130}
2087#endif /* CONFIG_COMPACTION */ 2131#endif /* CONFIG_COMPACTION */
2088 2132
2089/* The really slow allocator path where we enter direct reclaim */ 2133/* Perform direct synchronous page reclaim */
2090static inline struct page * 2134static int
2091__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, 2135__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
2092 struct zonelist *zonelist, enum zone_type high_zoneidx, 2136 nodemask_t *nodemask)
2093 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2094 int migratetype, unsigned long *did_some_progress)
2095{ 2137{
2096 struct page *page = NULL;
2097 struct reclaim_state reclaim_state; 2138 struct reclaim_state reclaim_state;
2098 bool drained = false; 2139 int progress;
2099 2140
2100 cond_resched(); 2141 cond_resched();
2101 2142
@@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2106 reclaim_state.reclaimed_slab = 0; 2147 reclaim_state.reclaimed_slab = 0;
2107 current->reclaim_state = &reclaim_state; 2148 current->reclaim_state = &reclaim_state;
2108 2149
2109 *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); 2150 progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask);
2110 2151
2111 current->reclaim_state = NULL; 2152 current->reclaim_state = NULL;
2112 lockdep_clear_current_reclaim_state(); 2153 lockdep_clear_current_reclaim_state();
@@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2114 2155
2115 cond_resched(); 2156 cond_resched();
2116 2157
2158 return progress;
2159}
2160
2161/* The really slow allocator path where we enter direct reclaim */
2162static inline struct page *
2163__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2164 struct zonelist *zonelist, enum zone_type high_zoneidx,
2165 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2166 int migratetype, unsigned long *did_some_progress)
2167{
2168 struct page *page = NULL;
2169 bool drained = false;
2170
2171 *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
2172 nodemask);
2117 if (unlikely(!(*did_some_progress))) 2173 if (unlikely(!(*did_some_progress)))
2118 return NULL; 2174 return NULL;
2119 2175
@@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4301 init_waitqueue_head(&pgdat->kswapd_wait); 4357 init_waitqueue_head(&pgdat->kswapd_wait);
4302 pgdat->kswapd_max_order = 0; 4358 pgdat->kswapd_max_order = 0;
4303 pgdat_page_cgroup_init(pgdat); 4359 pgdat_page_cgroup_init(pgdat);
4304 4360
4305 for (j = 0; j < MAX_NR_ZONES; j++) { 4361 for (j = 0; j < MAX_NR_ZONES; j++) {
4306 struct zone *zone = pgdat->node_zones + j; 4362 struct zone *zone = pgdat->node_zones + j;
4307 unsigned long size, realsize, memmap_pages; 4363 unsigned long size, realsize, memmap_pages;
@@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void)
4976 calculate_totalreserve_pages(); 5032 calculate_totalreserve_pages();
4977} 5033}
4978 5034
4979/** 5035static void __setup_per_zone_wmarks(void)
4980 * setup_per_zone_wmarks - called when min_free_kbytes changes
4981 * or when memory is hot-{added|removed}
4982 *
4983 * Ensures that the watermark[min,low,high] values for each zone are set
4984 * correctly with respect to min_free_kbytes.
4985 */
4986void setup_per_zone_wmarks(void)
4987{ 5036{
4988 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); 5037 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
4989 unsigned long lowmem_pages = 0; 5038 unsigned long lowmem_pages = 0;
@@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void)
5030 5079
5031 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); 5080 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
5032 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); 5081 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
5082
5083 zone->watermark[WMARK_MIN] += cma_wmark_pages(zone);
5084 zone->watermark[WMARK_LOW] += cma_wmark_pages(zone);
5085 zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone);
5086
5033 setup_zone_migrate_reserve(zone); 5087 setup_zone_migrate_reserve(zone);
5034 spin_unlock_irqrestore(&zone->lock, flags); 5088 spin_unlock_irqrestore(&zone->lock, flags);
5035 } 5089 }
@@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void)
5038 calculate_totalreserve_pages(); 5092 calculate_totalreserve_pages();
5039} 5093}
5040 5094
5095/**
5096 * setup_per_zone_wmarks - called when min_free_kbytes changes
5097 * or when memory is hot-{added|removed}
5098 *
5099 * Ensures that the watermark[min,low,high] values for each zone are set
5100 * correctly with respect to min_free_kbytes.
5101 */
5102void setup_per_zone_wmarks(void)
5103{
5104 mutex_lock(&zonelists_mutex);
5105 __setup_per_zone_wmarks();
5106 mutex_unlock(&zonelists_mutex);
5107}
5108
5041/* 5109/*
5042 * The inactive anon list should be small enough that the VM never has to 5110 * The inactive anon list should be small enough that the VM never has to
5043 * do too much work, but large enough that each inactive page has a chance 5111 * do too much work, but large enough that each inactive page has a chance
@@ -5415,14 +5483,16 @@ static int
5415__count_immobile_pages(struct zone *zone, struct page *page, int count) 5483__count_immobile_pages(struct zone *zone, struct page *page, int count)
5416{ 5484{
5417 unsigned long pfn, iter, found; 5485 unsigned long pfn, iter, found;
5486 int mt;
5487
5418 /* 5488 /*
5419 * For avoiding noise data, lru_add_drain_all() should be called 5489 * For avoiding noise data, lru_add_drain_all() should be called
5420 * If ZONE_MOVABLE, the zone never contains immobile pages 5490 * If ZONE_MOVABLE, the zone never contains immobile pages
5421 */ 5491 */
5422 if (zone_idx(zone) == ZONE_MOVABLE) 5492 if (zone_idx(zone) == ZONE_MOVABLE)
5423 return true; 5493 return true;
5424 5494 mt = get_pageblock_migratetype(page);
5425 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) 5495 if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt))
5426 return true; 5496 return true;
5427 5497
5428 pfn = page_to_pfn(page); 5498 pfn = page_to_pfn(page);
@@ -5539,7 +5609,7 @@ out:
5539 return ret; 5609 return ret;
5540} 5610}
5541 5611
5542void unset_migratetype_isolate(struct page *page) 5612void unset_migratetype_isolate(struct page *page, unsigned migratetype)
5543{ 5613{
5544 struct zone *zone; 5614 struct zone *zone;
5545 unsigned long flags; 5615 unsigned long flags;
@@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page)
5547 spin_lock_irqsave(&zone->lock, flags); 5617 spin_lock_irqsave(&zone->lock, flags);
5548 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 5618 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
5549 goto out; 5619 goto out;
5550 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 5620 set_pageblock_migratetype(page, migratetype);
5551 move_freepages_block(zone, page, MIGRATE_MOVABLE); 5621 move_freepages_block(zone, page, migratetype);
5552out: 5622out:
5553 spin_unlock_irqrestore(&zone->lock, flags); 5623 spin_unlock_irqrestore(&zone->lock, flags);
5554} 5624}
5555 5625
5626#ifdef CONFIG_CMA
5627
5628static unsigned long pfn_max_align_down(unsigned long pfn)
5629{
5630 return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
5631 pageblock_nr_pages) - 1);
5632}
5633
5634static unsigned long pfn_max_align_up(unsigned long pfn)
5635{
5636 return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
5637 pageblock_nr_pages));
5638}
5639
5640static struct page *
5641__alloc_contig_migrate_alloc(struct page *page, unsigned long private,
5642 int **resultp)
5643{
5644 return alloc_page(GFP_HIGHUSER_MOVABLE);
5645}
5646
5647/* [start, end) must belong to a single zone. */
5648static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5649{
5650 /* This function is based on compact_zone() from compaction.c. */
5651
5652 unsigned long pfn = start;
5653 unsigned int tries = 0;
5654 int ret = 0;
5655
5656 struct compact_control cc = {
5657 .nr_migratepages = 0,
5658 .order = -1,
5659 .zone = page_zone(pfn_to_page(start)),
5660 .sync = true,
5661 };
5662 INIT_LIST_HEAD(&cc.migratepages);
5663
5664 migrate_prep_local();
5665
5666 while (pfn < end || !list_empty(&cc.migratepages)) {
5667 if (fatal_signal_pending(current)) {
5668 ret = -EINTR;
5669 break;
5670 }
5671
5672 if (list_empty(&cc.migratepages)) {
5673 cc.nr_migratepages = 0;
5674 pfn = isolate_migratepages_range(cc.zone, &cc,
5675 pfn, end);
5676 if (!pfn) {
5677 ret = -EINTR;
5678 break;
5679 }
5680 tries = 0;
5681 } else if (++tries == 5) {
5682 ret = ret < 0 ? ret : -EBUSY;
5683 break;
5684 }
5685
5686 ret = migrate_pages(&cc.migratepages,
5687 __alloc_contig_migrate_alloc,
5688 0, false, MIGRATE_SYNC);
5689 }
5690
5691 putback_lru_pages(&cc.migratepages);
5692 return ret > 0 ? 0 : ret;
5693}
5694
5695/*
5696 * Update zone's cma pages counter used for watermark level calculation.
5697 */
5698static inline void __update_cma_watermarks(struct zone *zone, int count)
5699{
5700 unsigned long flags;
5701 spin_lock_irqsave(&zone->lock, flags);
5702 zone->min_cma_pages += count;
5703 spin_unlock_irqrestore(&zone->lock, flags);
5704 setup_per_zone_wmarks();
5705}
5706
5707/*
5708 * Trigger memory pressure bump to reclaim some pages in order to be able to
5709 * allocate 'count' pages in single page units. Does similar work as
5710 *__alloc_pages_slowpath() function.
5711 */
5712static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
5713{
5714 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
5715 struct zonelist *zonelist = node_zonelist(0, gfp_mask);
5716 int did_some_progress = 0;
5717 int order = 1;
5718
5719 /*
5720 * Increase level of watermarks to force kswapd do his job
5721 * to stabilise at new watermark level.
5722 */
5723 __update_cma_watermarks(zone, count);
5724
5725 /* Obey watermarks as if the page was being allocated */
5726 while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) {
5727 wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
5728
5729 did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
5730 NULL);
5731 if (!did_some_progress) {
5732 /* Exhausted what can be done so it's blamo time */
5733 out_of_memory(zonelist, gfp_mask, order, NULL, false);
5734 }
5735 }
5736
5737 /* Restore original watermark levels. */
5738 __update_cma_watermarks(zone, -count);
5739
5740 return count;
5741}
5742
5743/**
5744 * alloc_contig_range() -- tries to allocate given range of pages
5745 * @start: start PFN to allocate
5746 * @end: one-past-the-last PFN to allocate
5747 * @migratetype: migratetype of the underlaying pageblocks (either
5748 * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks
5749 * in range must have the same migratetype and it must
5750 * be either of the two.
5751 *
5752 * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
5753 * aligned, however it's the caller's responsibility to guarantee that
5754 * we are the only thread that changes migrate type of pageblocks the
5755 * pages fall in.
5756 *
5757 * The PFN range must belong to a single zone.
5758 *
5759 * Returns zero on success or negative error code. On success all
5760 * pages which PFN is in [start, end) are allocated for the caller and
5761 * need to be freed with free_contig_range().
5762 */
5763int alloc_contig_range(unsigned long start, unsigned long end,
5764 unsigned migratetype)
5765{
5766 struct zone *zone = page_zone(pfn_to_page(start));
5767 unsigned long outer_start, outer_end;
5768 int ret = 0, order;
5769
5770 /*
5771 * What we do here is we mark all pageblocks in range as
5772 * MIGRATE_ISOLATE. Because pageblock and max order pages may
5773 * have different sizes, and due to the way page allocator
5774 * work, we align the range to biggest of the two pages so
5775 * that page allocator won't try to merge buddies from
5776 * different pageblocks and change MIGRATE_ISOLATE to some
5777 * other migration type.
5778 *
5779 * Once the pageblocks are marked as MIGRATE_ISOLATE, we
5780 * migrate the pages from an unaligned range (ie. pages that
5781 * we are interested in). This will put all the pages in
5782 * range back to page allocator as MIGRATE_ISOLATE.
5783 *
5784 * When this is done, we take the pages in range from page
5785 * allocator removing them from the buddy system. This way
5786 * page allocator will never consider using them.
5787 *
5788 * This lets us mark the pageblocks back as
5789 * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
5790 * aligned range but not in the unaligned, original range are
5791 * put back to page allocator so that buddy can use them.
5792 */
5793
5794 ret = start_isolate_page_range(pfn_max_align_down(start),
5795 pfn_max_align_up(end), migratetype);
5796 if (ret)
5797 goto done;
5798
5799 ret = __alloc_contig_migrate_range(start, end);
5800 if (ret)
5801 goto done;
5802
5803 /*
5804 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
5805 * aligned blocks that are marked as MIGRATE_ISOLATE. What's
5806 * more, all pages in [start, end) are free in page allocator.
5807 * What we are going to do is to allocate all pages from
5808 * [start, end) (that is remove them from page allocator).
5809 *
5810 * The only problem is that pages at the beginning and at the
5811 * end of interesting range may be not aligned with pages that
5812 * page allocator holds, ie. they can be part of higher order
5813 * pages. Because of this, we reserve the bigger range and
5814 * once this is done free the pages we are not interested in.
5815 *
5816 * We don't have to hold zone->lock here because the pages are
5817 * isolated thus they won't get removed from buddy.
5818 */
5819
5820 lru_add_drain_all();
5821 drain_all_pages();
5822
5823 order = 0;
5824 outer_start = start;
5825 while (!PageBuddy(pfn_to_page(outer_start))) {
5826 if (++order >= MAX_ORDER) {
5827 ret = -EBUSY;
5828 goto done;
5829 }
5830 outer_start &= ~0UL << order;
5831 }
5832
5833 /* Make sure the range is really isolated. */
5834 if (test_pages_isolated(outer_start, end)) {
5835 pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
5836 outer_start, end);
5837 ret = -EBUSY;
5838 goto done;
5839 }
5840
5841 /*
5842 * Reclaim enough pages to make sure that contiguous allocation
5843 * will not starve the system.
5844 */
5845 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
5846
5847 /* Grab isolated pages from freelists. */
5848 outer_end = isolate_freepages_range(outer_start, end);
5849 if (!outer_end) {
5850 ret = -EBUSY;
5851 goto done;
5852 }
5853
5854 /* Free head and tail (if any) */
5855 if (start != outer_start)
5856 free_contig_range(outer_start, start - outer_start);
5857 if (end != outer_end)
5858 free_contig_range(end, outer_end - end);
5859
5860done:
5861 undo_isolate_page_range(pfn_max_align_down(start),
5862 pfn_max_align_up(end), migratetype);
5863 return ret;
5864}
5865
5866void free_contig_range(unsigned long pfn, unsigned nr_pages)
5867{
5868 for (; nr_pages--; ++pfn)
5869 __free_page(pfn_to_page(pfn));
5870}
5871#endif
5872
5556#ifdef CONFIG_MEMORY_HOTREMOVE 5873#ifdef CONFIG_MEMORY_HOTREMOVE
5557/* 5874/*
5558 * All pages in the range must be isolated before calling this. 5875 * All pages in the range must be isolated before calling this.
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 4ae42bb40892..c9f04774f2b8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
24 * to be MIGRATE_ISOLATE. 24 * to be MIGRATE_ISOLATE.
25 * @start_pfn: The lower PFN of the range to be isolated. 25 * @start_pfn: The lower PFN of the range to be isolated.
26 * @end_pfn: The upper PFN of the range to be isolated. 26 * @end_pfn: The upper PFN of the range to be isolated.
27 * @migratetype: migrate type to set in error recovery.
27 * 28 *
28 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 29 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
29 * the range will never be allocated. Any free pages and pages freed in the 30 * the range will never be allocated. Any free pages and pages freed in the
@@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
32 * start_pfn/end_pfn must be aligned to pageblock_order. 33 * start_pfn/end_pfn must be aligned to pageblock_order.
33 * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 34 * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
34 */ 35 */
35int 36int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
36start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 37 unsigned migratetype)
37{ 38{
38 unsigned long pfn; 39 unsigned long pfn;
39 unsigned long undo_pfn; 40 unsigned long undo_pfn;
@@ -56,7 +57,7 @@ undo:
56 for (pfn = start_pfn; 57 for (pfn = start_pfn;
57 pfn < undo_pfn; 58 pfn < undo_pfn;
58 pfn += pageblock_nr_pages) 59 pfn += pageblock_nr_pages)
59 unset_migratetype_isolate(pfn_to_page(pfn)); 60 unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
60 61
61 return -EBUSY; 62 return -EBUSY;
62} 63}
@@ -64,8 +65,8 @@ undo:
64/* 65/*
65 * Make isolated pages available again. 66 * Make isolated pages available again.
66 */ 67 */
67int 68int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
68undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 69 unsigned migratetype)
69{ 70{
70 unsigned long pfn; 71 unsigned long pfn;
71 struct page *page; 72 struct page *page;
@@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
77 page = __first_valid_page(pfn, pageblock_nr_pages); 78 page = __first_valid_page(pfn, pageblock_nr_pages);
78 if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 79 if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
79 continue; 80 continue;
80 unset_migratetype_isolate(page); 81 unset_migratetype_isolate(page, migratetype);
81 } 82 }
82 return 0; 83 return 0;
83} 84}
@@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
86 * all pages in [start_pfn...end_pfn) must be in the same zone. 87 * all pages in [start_pfn...end_pfn) must be in the same zone.
87 * zone->lock must be held before call this. 88 * zone->lock must be held before call this.
88 * 89 *
89 * Returns 1 if all pages in the range is isolated. 90 * Returns 1 if all pages in the range are isolated.
90 */ 91 */
91static int 92static int
92__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) 93__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7db1b9bab492..0dad31dc1618 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
613 "Reclaimable", 613 "Reclaimable",
614 "Movable", 614 "Movable",
615 "Reserve", 615 "Reserve",
616#ifdef CONFIG_CMA
617 "CMA",
618#endif
616 "Isolate", 619 "Isolate",
617}; 620};
618 621