diff options
41 files changed, 2898 insertions, 780 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b69cfdc12112..f1959b7d13d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
508 | Also note the kernel might malfunction if you disable | 508 | Also note the kernel might malfunction if you disable |
509 | some critical bits. | 509 | some critical bits. |
510 | 510 | ||
511 | cma=nn[MG] [ARM,KNL] | ||
512 | Sets the size of kernel global memory area for contiguous | ||
513 | memory allocations. For more information, see | ||
514 | include/linux/dma-contiguous.h | ||
515 | |||
511 | cmo_free_hint= [PPC] Format: { yes | no } | 516 | cmo_free_hint= [PPC] Format: { yes | no } |
512 | Specify whether pages are marked as being inactive | 517 | Specify whether pages are marked as being inactive |
513 | when they are freed. This is used in CMO environments | 518 | when they are freed. This is used in CMO environments |
@@ -515,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
515 | a hypervisor. | 520 | a hypervisor. |
516 | Default: yes | 521 | Default: yes |
517 | 522 | ||
523 | coherent_pool=nn[KMG] [ARM,KNL] | ||
524 | Sets the size of memory pool for coherent, atomic dma | ||
525 | allocations if Contiguous Memory Allocator (CMA) is used. | ||
526 | |||
518 | code_bytes [X86] How many bytes of object code to print | 527 | code_bytes [X86] How many bytes of object code to print |
519 | in an oops report. | 528 | in an oops report. |
520 | Range: 0 - 8192 | 529 | Range: 0 - 8192 |
diff --git a/arch/Kconfig b/arch/Kconfig index e9a910876cda..8c3d957fa8e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -159,6 +159,9 @@ config HAVE_ARCH_TRACEHOOK | |||
159 | config HAVE_DMA_ATTRS | 159 | config HAVE_DMA_ATTRS |
160 | bool | 160 | bool |
161 | 161 | ||
162 | config HAVE_DMA_CONTIGUOUS | ||
163 | bool | ||
164 | |||
162 | config USE_GENERIC_SMP_HELPERS | 165 | config USE_GENERIC_SMP_HELPERS |
163 | bool | 166 | bool |
164 | 167 | ||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5458aa9db067..3ca1ba981efb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -5,6 +5,9 @@ config ARM | |||
5 | select HAVE_AOUT | 5 | select HAVE_AOUT |
6 | select HAVE_DMA_API_DEBUG | 6 | select HAVE_DMA_API_DEBUG |
7 | select HAVE_IDE if PCI || ISA || PCMCIA | 7 | select HAVE_IDE if PCI || ISA || PCMCIA |
8 | select HAVE_DMA_ATTRS | ||
9 | select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) | ||
10 | select CMA if (CPU_V6 || CPU_V6K || CPU_V7) | ||
8 | select HAVE_MEMBLOCK | 11 | select HAVE_MEMBLOCK |
9 | select RTC_LIB | 12 | select RTC_LIB |
10 | select SYS_SUPPORTS_APM_EMULATION | 13 | select SYS_SUPPORTS_APM_EMULATION |
@@ -54,6 +57,14 @@ config ARM | |||
54 | config ARM_HAS_SG_CHAIN | 57 | config ARM_HAS_SG_CHAIN |
55 | bool | 58 | bool |
56 | 59 | ||
60 | config NEED_SG_DMA_LENGTH | ||
61 | bool | ||
62 | |||
63 | config ARM_DMA_USE_IOMMU | ||
64 | select NEED_SG_DMA_LENGTH | ||
65 | select ARM_HAS_SG_CHAIN | ||
66 | bool | ||
67 | |||
57 | config HAVE_PWM | 68 | config HAVE_PWM |
58 | bool | 69 | bool |
59 | 70 | ||
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 595ecd290ebf..9d7eb530f95f 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c | |||
@@ -173,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_ | |||
173 | read_lock_irqsave(&device_info->lock, flags); | 173 | read_lock_irqsave(&device_info->lock, flags); |
174 | 174 | ||
175 | list_for_each_entry(b, &device_info->safe_buffers, node) | 175 | list_for_each_entry(b, &device_info->safe_buffers, node) |
176 | if (b->safe_dma_addr == safe_dma_addr) { | 176 | if (b->safe_dma_addr <= safe_dma_addr && |
177 | b->safe_dma_addr + b->size > safe_dma_addr) { | ||
177 | rb = b; | 178 | rb = b; |
178 | break; | 179 | break; |
179 | } | 180 | } |
@@ -254,7 +255,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, | |||
254 | if (buf == NULL) { | 255 | if (buf == NULL) { |
255 | dev_err(dev, "%s: unable to map unsafe buffer %p!\n", | 256 | dev_err(dev, "%s: unable to map unsafe buffer %p!\n", |
256 | __func__, ptr); | 257 | __func__, ptr); |
257 | return ~0; | 258 | return DMA_ERROR_CODE; |
258 | } | 259 | } |
259 | 260 | ||
260 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 261 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
@@ -307,8 +308,9 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf, | |||
307 | * substitute the safe buffer for the unsafe one. | 308 | * substitute the safe buffer for the unsafe one. |
308 | * (basically move the buffer from an unsafe area to a safe one) | 309 | * (basically move the buffer from an unsafe area to a safe one) |
309 | */ | 310 | */ |
310 | dma_addr_t __dma_map_page(struct device *dev, struct page *page, | 311 | static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, |
311 | unsigned long offset, size_t size, enum dma_data_direction dir) | 312 | unsigned long offset, size_t size, enum dma_data_direction dir, |
313 | struct dma_attrs *attrs) | ||
312 | { | 314 | { |
313 | dma_addr_t dma_addr; | 315 | dma_addr_t dma_addr; |
314 | int ret; | 316 | int ret; |
@@ -320,21 +322,20 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, | |||
320 | 322 | ||
321 | ret = needs_bounce(dev, dma_addr, size); | 323 | ret = needs_bounce(dev, dma_addr, size); |
322 | if (ret < 0) | 324 | if (ret < 0) |
323 | return ~0; | 325 | return DMA_ERROR_CODE; |
324 | 326 | ||
325 | if (ret == 0) { | 327 | if (ret == 0) { |
326 | __dma_page_cpu_to_dev(page, offset, size, dir); | 328 | arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); |
327 | return dma_addr; | 329 | return dma_addr; |
328 | } | 330 | } |
329 | 331 | ||
330 | if (PageHighMem(page)) { | 332 | if (PageHighMem(page)) { |
331 | dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); | 333 | dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); |
332 | return ~0; | 334 | return DMA_ERROR_CODE; |
333 | } | 335 | } |
334 | 336 | ||
335 | return map_single(dev, page_address(page) + offset, size, dir); | 337 | return map_single(dev, page_address(page) + offset, size, dir); |
336 | } | 338 | } |
337 | EXPORT_SYMBOL(__dma_map_page); | ||
338 | 339 | ||
339 | /* | 340 | /* |
340 | * see if a mapped address was really a "safe" buffer and if so, copy | 341 | * see if a mapped address was really a "safe" buffer and if so, copy |
@@ -342,8 +343,8 @@ EXPORT_SYMBOL(__dma_map_page); | |||
342 | * the safe buffer. (basically return things back to the way they | 343 | * the safe buffer. (basically return things back to the way they |
343 | * should be) | 344 | * should be) |
344 | */ | 345 | */ |
345 | void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | 346 | static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, |
346 | enum dma_data_direction dir) | 347 | enum dma_data_direction dir, struct dma_attrs *attrs) |
347 | { | 348 | { |
348 | struct safe_buffer *buf; | 349 | struct safe_buffer *buf; |
349 | 350 | ||
@@ -352,19 +353,18 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
352 | 353 | ||
353 | buf = find_safe_buffer_dev(dev, dma_addr, __func__); | 354 | buf = find_safe_buffer_dev(dev, dma_addr, __func__); |
354 | if (!buf) { | 355 | if (!buf) { |
355 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)), | 356 | arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); |
356 | dma_addr & ~PAGE_MASK, size, dir); | ||
357 | return; | 357 | return; |
358 | } | 358 | } |
359 | 359 | ||
360 | unmap_single(dev, buf, size, dir); | 360 | unmap_single(dev, buf, size, dir); |
361 | } | 361 | } |
362 | EXPORT_SYMBOL(__dma_unmap_page); | ||
363 | 362 | ||
364 | int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | 363 | static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, |
365 | unsigned long off, size_t sz, enum dma_data_direction dir) | 364 | size_t sz, enum dma_data_direction dir) |
366 | { | 365 | { |
367 | struct safe_buffer *buf; | 366 | struct safe_buffer *buf; |
367 | unsigned long off; | ||
368 | 368 | ||
369 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", | 369 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", |
370 | __func__, addr, off, sz, dir); | 370 | __func__, addr, off, sz, dir); |
@@ -373,6 +373,8 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | |||
373 | if (!buf) | 373 | if (!buf) |
374 | return 1; | 374 | return 1; |
375 | 375 | ||
376 | off = addr - buf->safe_dma_addr; | ||
377 | |||
376 | BUG_ON(buf->direction != dir); | 378 | BUG_ON(buf->direction != dir); |
377 | 379 | ||
378 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 380 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
@@ -388,12 +390,21 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | |||
388 | } | 390 | } |
389 | return 0; | 391 | return 0; |
390 | } | 392 | } |
391 | EXPORT_SYMBOL(dmabounce_sync_for_cpu); | ||
392 | 393 | ||
393 | int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | 394 | static void dmabounce_sync_for_cpu(struct device *dev, |
394 | unsigned long off, size_t sz, enum dma_data_direction dir) | 395 | dma_addr_t handle, size_t size, enum dma_data_direction dir) |
396 | { | ||
397 | if (!__dmabounce_sync_for_cpu(dev, handle, size, dir)) | ||
398 | return; | ||
399 | |||
400 | arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir); | ||
401 | } | ||
402 | |||
403 | static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | ||
404 | size_t sz, enum dma_data_direction dir) | ||
395 | { | 405 | { |
396 | struct safe_buffer *buf; | 406 | struct safe_buffer *buf; |
407 | unsigned long off; | ||
397 | 408 | ||
398 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", | 409 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", |
399 | __func__, addr, off, sz, dir); | 410 | __func__, addr, off, sz, dir); |
@@ -402,6 +413,8 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | |||
402 | if (!buf) | 413 | if (!buf) |
403 | return 1; | 414 | return 1; |
404 | 415 | ||
416 | off = addr - buf->safe_dma_addr; | ||
417 | |||
405 | BUG_ON(buf->direction != dir); | 418 | BUG_ON(buf->direction != dir); |
406 | 419 | ||
407 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 420 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
@@ -417,7 +430,38 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | |||
417 | } | 430 | } |
418 | return 0; | 431 | return 0; |
419 | } | 432 | } |
420 | EXPORT_SYMBOL(dmabounce_sync_for_device); | 433 | |
434 | static void dmabounce_sync_for_device(struct device *dev, | ||
435 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
436 | { | ||
437 | if (!__dmabounce_sync_for_device(dev, handle, size, dir)) | ||
438 | return; | ||
439 | |||
440 | arm_dma_ops.sync_single_for_device(dev, handle, size, dir); | ||
441 | } | ||
442 | |||
443 | static int dmabounce_set_mask(struct device *dev, u64 dma_mask) | ||
444 | { | ||
445 | if (dev->archdata.dmabounce) | ||
446 | return 0; | ||
447 | |||
448 | return arm_dma_ops.set_dma_mask(dev, dma_mask); | ||
449 | } | ||
450 | |||
451 | static struct dma_map_ops dmabounce_ops = { | ||
452 | .alloc = arm_dma_alloc, | ||
453 | .free = arm_dma_free, | ||
454 | .mmap = arm_dma_mmap, | ||
455 | .map_page = dmabounce_map_page, | ||
456 | .unmap_page = dmabounce_unmap_page, | ||
457 | .sync_single_for_cpu = dmabounce_sync_for_cpu, | ||
458 | .sync_single_for_device = dmabounce_sync_for_device, | ||
459 | .map_sg = arm_dma_map_sg, | ||
460 | .unmap_sg = arm_dma_unmap_sg, | ||
461 | .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, | ||
462 | .sync_sg_for_device = arm_dma_sync_sg_for_device, | ||
463 | .set_dma_mask = dmabounce_set_mask, | ||
464 | }; | ||
421 | 465 | ||
422 | static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, | 466 | static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, |
423 | const char *name, unsigned long size) | 467 | const char *name, unsigned long size) |
@@ -479,6 +523,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size, | |||
479 | #endif | 523 | #endif |
480 | 524 | ||
481 | dev->archdata.dmabounce = device_info; | 525 | dev->archdata.dmabounce = device_info; |
526 | set_dma_ops(dev, &dmabounce_ops); | ||
482 | 527 | ||
483 | dev_info(dev, "dmabounce: registered device\n"); | 528 | dev_info(dev, "dmabounce: registered device\n"); |
484 | 529 | ||
@@ -497,6 +542,7 @@ void dmabounce_unregister_dev(struct device *dev) | |||
497 | struct dmabounce_device_info *device_info = dev->archdata.dmabounce; | 542 | struct dmabounce_device_info *device_info = dev->archdata.dmabounce; |
498 | 543 | ||
499 | dev->archdata.dmabounce = NULL; | 544 | dev->archdata.dmabounce = NULL; |
545 | set_dma_ops(dev, NULL); | ||
500 | 546 | ||
501 | if (!device_info) { | 547 | if (!device_info) { |
502 | dev_warn(dev, | 548 | dev_warn(dev, |
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 7aa368003b05..b69c0d3285f8 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h | |||
@@ -7,12 +7,16 @@ | |||
7 | #define ASMARM_DEVICE_H | 7 | #define ASMARM_DEVICE_H |
8 | 8 | ||
9 | struct dev_archdata { | 9 | struct dev_archdata { |
10 | struct dma_map_ops *dma_ops; | ||
10 | #ifdef CONFIG_DMABOUNCE | 11 | #ifdef CONFIG_DMABOUNCE |
11 | struct dmabounce_device_info *dmabounce; | 12 | struct dmabounce_device_info *dmabounce; |
12 | #endif | 13 | #endif |
13 | #ifdef CONFIG_IOMMU_API | 14 | #ifdef CONFIG_IOMMU_API |
14 | void *iommu; /* private IOMMU data */ | 15 | void *iommu; /* private IOMMU data */ |
15 | #endif | 16 | #endif |
17 | #ifdef CONFIG_ARM_DMA_USE_IOMMU | ||
18 | struct dma_iommu_mapping *mapping; | ||
19 | #endif | ||
16 | }; | 20 | }; |
17 | 21 | ||
18 | struct omap_device; | 22 | struct omap_device; |
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..3ed37b4d93da --- /dev/null +++ b/arch/arm/include/asm/dma-contiguous.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef ASMARM_DMA_CONTIGUOUS_H | ||
2 | #define ASMARM_DMA_CONTIGUOUS_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #ifdef CONFIG_CMA | ||
6 | |||
7 | #include <linux/types.h> | ||
8 | #include <asm-generic/dma-contiguous.h> | ||
9 | |||
10 | void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); | ||
11 | |||
12 | #endif | ||
13 | #endif | ||
14 | |||
15 | #endif | ||
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h new file mode 100644 index 000000000000..799b09409fad --- /dev/null +++ b/arch/arm/include/asm/dma-iommu.h | |||
@@ -0,0 +1,34 @@ | |||
1 | #ifndef ASMARM_DMA_IOMMU_H | ||
2 | #define ASMARM_DMA_IOMMU_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/mm_types.h> | ||
7 | #include <linux/scatterlist.h> | ||
8 | #include <linux/dma-debug.h> | ||
9 | #include <linux/kmemcheck.h> | ||
10 | |||
11 | struct dma_iommu_mapping { | ||
12 | /* iommu specific data */ | ||
13 | struct iommu_domain *domain; | ||
14 | |||
15 | void *bitmap; | ||
16 | size_t bits; | ||
17 | unsigned int order; | ||
18 | dma_addr_t base; | ||
19 | |||
20 | spinlock_t lock; | ||
21 | struct kref kref; | ||
22 | }; | ||
23 | |||
24 | struct dma_iommu_mapping * | ||
25 | arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, | ||
26 | int order); | ||
27 | |||
28 | void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); | ||
29 | |||
30 | int arm_iommu_attach_device(struct device *dev, | ||
31 | struct dma_iommu_mapping *mapping); | ||
32 | |||
33 | #endif /* __KERNEL__ */ | ||
34 | #endif | ||
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c981c4b..bbef15d04890 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h | |||
@@ -5,11 +5,35 @@ | |||
5 | 5 | ||
6 | #include <linux/mm_types.h> | 6 | #include <linux/mm_types.h> |
7 | #include <linux/scatterlist.h> | 7 | #include <linux/scatterlist.h> |
8 | #include <linux/dma-attrs.h> | ||
8 | #include <linux/dma-debug.h> | 9 | #include <linux/dma-debug.h> |
9 | 10 | ||
10 | #include <asm-generic/dma-coherent.h> | 11 | #include <asm-generic/dma-coherent.h> |
11 | #include <asm/memory.h> | 12 | #include <asm/memory.h> |
12 | 13 | ||
14 | #define DMA_ERROR_CODE (~0) | ||
15 | extern struct dma_map_ops arm_dma_ops; | ||
16 | |||
17 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) | ||
18 | { | ||
19 | if (dev && dev->archdata.dma_ops) | ||
20 | return dev->archdata.dma_ops; | ||
21 | return &arm_dma_ops; | ||
22 | } | ||
23 | |||
24 | static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) | ||
25 | { | ||
26 | BUG_ON(!dev); | ||
27 | dev->archdata.dma_ops = ops; | ||
28 | } | ||
29 | |||
30 | #include <asm-generic/dma-mapping-common.h> | ||
31 | |||
32 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
33 | { | ||
34 | return get_dma_ops(dev)->set_dma_mask(dev, mask); | ||
35 | } | ||
36 | |||
13 | #ifdef __arch_page_to_dma | 37 | #ifdef __arch_page_to_dma |
14 | #error Please update to __arch_pfn_to_dma | 38 | #error Please update to __arch_pfn_to_dma |
15 | #endif | 39 | #endif |
@@ -62,68 +86,11 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) | |||
62 | #endif | 86 | #endif |
63 | 87 | ||
64 | /* | 88 | /* |
65 | * The DMA API is built upon the notion of "buffer ownership". A buffer | ||
66 | * is either exclusively owned by the CPU (and therefore may be accessed | ||
67 | * by it) or exclusively owned by the DMA device. These helper functions | ||
68 | * represent the transitions between these two ownership states. | ||
69 | * | ||
70 | * Note, however, that on later ARMs, this notion does not work due to | ||
71 | * speculative prefetches. We model our approach on the assumption that | ||
72 | * the CPU does do speculative prefetches, which means we clean caches | ||
73 | * before transfers and delay cache invalidation until transfer completion. | ||
74 | * | ||
75 | * Private support functions: these are not part of the API and are | ||
76 | * liable to change. Drivers must not use these. | ||
77 | */ | ||
78 | static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, | ||
79 | enum dma_data_direction dir) | ||
80 | { | ||
81 | extern void ___dma_single_cpu_to_dev(const void *, size_t, | ||
82 | enum dma_data_direction); | ||
83 | |||
84 | if (!arch_is_coherent()) | ||
85 | ___dma_single_cpu_to_dev(kaddr, size, dir); | ||
86 | } | ||
87 | |||
88 | static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, | ||
89 | enum dma_data_direction dir) | ||
90 | { | ||
91 | extern void ___dma_single_dev_to_cpu(const void *, size_t, | ||
92 | enum dma_data_direction); | ||
93 | |||
94 | if (!arch_is_coherent()) | ||
95 | ___dma_single_dev_to_cpu(kaddr, size, dir); | ||
96 | } | ||
97 | |||
98 | static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, | ||
99 | size_t size, enum dma_data_direction dir) | ||
100 | { | ||
101 | extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, | ||
102 | size_t, enum dma_data_direction); | ||
103 | |||
104 | if (!arch_is_coherent()) | ||
105 | ___dma_page_cpu_to_dev(page, off, size, dir); | ||
106 | } | ||
107 | |||
108 | static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, | ||
109 | size_t size, enum dma_data_direction dir) | ||
110 | { | ||
111 | extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, | ||
112 | size_t, enum dma_data_direction); | ||
113 | |||
114 | if (!arch_is_coherent()) | ||
115 | ___dma_page_dev_to_cpu(page, off, size, dir); | ||
116 | } | ||
117 | |||
118 | extern int dma_supported(struct device *, u64); | ||
119 | extern int dma_set_mask(struct device *, u64); | ||
120 | |||
121 | /* | ||
122 | * DMA errors are defined by all-bits-set in the DMA address. | 89 | * DMA errors are defined by all-bits-set in the DMA address. |
123 | */ | 90 | */ |
124 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 91 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
125 | { | 92 | { |
126 | return dma_addr == ~0; | 93 | return dma_addr == DMA_ERROR_CODE; |
127 | } | 94 | } |
128 | 95 | ||
129 | /* | 96 | /* |
@@ -141,69 +108,118 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, | |||
141 | { | 108 | { |
142 | } | 109 | } |
143 | 110 | ||
111 | extern int dma_supported(struct device *dev, u64 mask); | ||
112 | |||
144 | /** | 113 | /** |
145 | * dma_alloc_coherent - allocate consistent memory for DMA | 114 | * arm_dma_alloc - allocate consistent memory for DMA |
146 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 115 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
147 | * @size: required memory size | 116 | * @size: required memory size |
148 | * @handle: bus-specific DMA address | 117 | * @handle: bus-specific DMA address |
118 | * @attrs: optinal attributes that specific mapping properties | ||
149 | * | 119 | * |
150 | * Allocate some uncached, unbuffered memory for a device for | 120 | * Allocate some memory for a device for performing DMA. This function |
151 | * performing DMA. This function allocates pages, and will | 121 | * allocates pages, and will return the CPU-viewed address, and sets @handle |
152 | * return the CPU-viewed address, and sets @handle to be the | 122 | * to be the device-viewed address. |
153 | * device-viewed address. | ||
154 | */ | 123 | */ |
155 | extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); | 124 | extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
125 | gfp_t gfp, struct dma_attrs *attrs); | ||
126 | |||
127 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
128 | |||
129 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
130 | dma_addr_t *dma_handle, gfp_t flag, | ||
131 | struct dma_attrs *attrs) | ||
132 | { | ||
133 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
134 | void *cpu_addr; | ||
135 | BUG_ON(!ops); | ||
136 | |||
137 | cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
138 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
139 | return cpu_addr; | ||
140 | } | ||
156 | 141 | ||
157 | /** | 142 | /** |
158 | * dma_free_coherent - free memory allocated by dma_alloc_coherent | 143 | * arm_dma_free - free memory allocated by arm_dma_alloc |
159 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 144 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
160 | * @size: size of memory originally requested in dma_alloc_coherent | 145 | * @size: size of memory originally requested in dma_alloc_coherent |
161 | * @cpu_addr: CPU-view address returned from dma_alloc_coherent | 146 | * @cpu_addr: CPU-view address returned from dma_alloc_coherent |
162 | * @handle: device-view address returned from dma_alloc_coherent | 147 | * @handle: device-view address returned from dma_alloc_coherent |
148 | * @attrs: optinal attributes that specific mapping properties | ||
163 | * | 149 | * |
164 | * Free (and unmap) a DMA buffer previously allocated by | 150 | * Free (and unmap) a DMA buffer previously allocated by |
165 | * dma_alloc_coherent(). | 151 | * arm_dma_alloc(). |
166 | * | 152 | * |
167 | * References to memory and mappings associated with cpu_addr/handle | 153 | * References to memory and mappings associated with cpu_addr/handle |
168 | * during and after this call executing are illegal. | 154 | * during and after this call executing are illegal. |
169 | */ | 155 | */ |
170 | extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); | 156 | extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, |
157 | dma_addr_t handle, struct dma_attrs *attrs); | ||
158 | |||
159 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
160 | |||
161 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
162 | void *cpu_addr, dma_addr_t dma_handle, | ||
163 | struct dma_attrs *attrs) | ||
164 | { | ||
165 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
166 | BUG_ON(!ops); | ||
167 | |||
168 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
169 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
170 | } | ||
171 | 171 | ||
172 | /** | 172 | /** |
173 | * dma_mmap_coherent - map a coherent DMA allocation into user space | 173 | * arm_dma_mmap - map a coherent DMA allocation into user space |
174 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 174 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
175 | * @vma: vm_area_struct describing requested user mapping | 175 | * @vma: vm_area_struct describing requested user mapping |
176 | * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent | 176 | * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent |
177 | * @handle: device-view address returned from dma_alloc_coherent | 177 | * @handle: device-view address returned from dma_alloc_coherent |
178 | * @size: size of memory originally requested in dma_alloc_coherent | 178 | * @size: size of memory originally requested in dma_alloc_coherent |
179 | * @attrs: optinal attributes that specific mapping properties | ||
179 | * | 180 | * |
180 | * Map a coherent DMA buffer previously allocated by dma_alloc_coherent | 181 | * Map a coherent DMA buffer previously allocated by dma_alloc_coherent |
181 | * into user space. The coherent DMA buffer must not be freed by the | 182 | * into user space. The coherent DMA buffer must not be freed by the |
182 | * driver until the user space mapping has been released. | 183 | * driver until the user space mapping has been released. |
183 | */ | 184 | */ |
184 | int dma_mmap_coherent(struct device *, struct vm_area_struct *, | 185 | extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, |
185 | void *, dma_addr_t, size_t); | 186 | void *cpu_addr, dma_addr_t dma_addr, size_t size, |
187 | struct dma_attrs *attrs); | ||
186 | 188 | ||
189 | #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) | ||
187 | 190 | ||
188 | /** | 191 | static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, |
189 | * dma_alloc_writecombine - allocate writecombining memory for DMA | 192 | void *cpu_addr, dma_addr_t dma_addr, |
190 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 193 | size_t size, struct dma_attrs *attrs) |
191 | * @size: required memory size | 194 | { |
192 | * @handle: bus-specific DMA address | 195 | struct dma_map_ops *ops = get_dma_ops(dev); |
193 | * | 196 | BUG_ON(!ops); |
194 | * Allocate some uncached, buffered memory for a device for | 197 | return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); |
195 | * performing DMA. This function allocates pages, and will | 198 | } |
196 | * return the CPU-viewed address, and sets @handle to be the | 199 | |
197 | * device-viewed address. | 200 | static inline void *dma_alloc_writecombine(struct device *dev, size_t size, |
198 | */ | 201 | dma_addr_t *dma_handle, gfp_t flag) |
199 | extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *, | 202 | { |
200 | gfp_t); | 203 | DEFINE_DMA_ATTRS(attrs); |
204 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
205 | return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); | ||
206 | } | ||
201 | 207 | ||
202 | #define dma_free_writecombine(dev,size,cpu_addr,handle) \ | 208 | static inline void dma_free_writecombine(struct device *dev, size_t size, |
203 | dma_free_coherent(dev,size,cpu_addr,handle) | 209 | void *cpu_addr, dma_addr_t dma_handle) |
210 | { | ||
211 | DEFINE_DMA_ATTRS(attrs); | ||
212 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
213 | return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); | ||
214 | } | ||
204 | 215 | ||
205 | int dma_mmap_writecombine(struct device *, struct vm_area_struct *, | 216 | static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, |
206 | void *, dma_addr_t, size_t); | 217 | void *cpu_addr, dma_addr_t dma_addr, size_t size) |
218 | { | ||
219 | DEFINE_DMA_ATTRS(attrs); | ||
220 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
221 | return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); | ||
222 | } | ||
207 | 223 | ||
208 | /* | 224 | /* |
209 | * This can be called during boot to increase the size of the consistent | 225 | * This can be called during boot to increase the size of the consistent |
@@ -212,8 +228,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, | |||
212 | */ | 228 | */ |
213 | extern void __init init_consistent_dma_size(unsigned long size); | 229 | extern void __init init_consistent_dma_size(unsigned long size); |
214 | 230 | ||
215 | |||
216 | #ifdef CONFIG_DMABOUNCE | ||
217 | /* | 231 | /* |
218 | * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" | 232 | * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" |
219 | * and utilize bounce buffers as needed to work around limited DMA windows. | 233 | * and utilize bounce buffers as needed to work around limited DMA windows. |
@@ -253,222 +267,19 @@ extern int dmabounce_register_dev(struct device *, unsigned long, | |||
253 | */ | 267 | */ |
254 | extern void dmabounce_unregister_dev(struct device *); | 268 | extern void dmabounce_unregister_dev(struct device *); |
255 | 269 | ||
256 | /* | ||
257 | * The DMA API, implemented by dmabounce.c. See below for descriptions. | ||
258 | */ | ||
259 | extern dma_addr_t __dma_map_page(struct device *, struct page *, | ||
260 | unsigned long, size_t, enum dma_data_direction); | ||
261 | extern void __dma_unmap_page(struct device *, dma_addr_t, size_t, | ||
262 | enum dma_data_direction); | ||
263 | |||
264 | /* | ||
265 | * Private functions | ||
266 | */ | ||
267 | int dmabounce_sync_for_cpu(struct device *, dma_addr_t, unsigned long, | ||
268 | size_t, enum dma_data_direction); | ||
269 | int dmabounce_sync_for_device(struct device *, dma_addr_t, unsigned long, | ||
270 | size_t, enum dma_data_direction); | ||
271 | #else | ||
272 | static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr, | ||
273 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
274 | { | ||
275 | return 1; | ||
276 | } | ||
277 | 270 | ||
278 | static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, | ||
279 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
280 | { | ||
281 | return 1; | ||
282 | } | ||
283 | |||
284 | |||
285 | static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, | ||
286 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
287 | { | ||
288 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
289 | return pfn_to_dma(dev, page_to_pfn(page)) + offset; | ||
290 | } | ||
291 | |||
292 | static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
293 | size_t size, enum dma_data_direction dir) | ||
294 | { | ||
295 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), | ||
296 | handle & ~PAGE_MASK, size, dir); | ||
297 | } | ||
298 | #endif /* CONFIG_DMABOUNCE */ | ||
299 | |||
300 | /** | ||
301 | * dma_map_single - map a single buffer for streaming DMA | ||
302 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
303 | * @cpu_addr: CPU direct mapped address of buffer | ||
304 | * @size: size of buffer to map | ||
305 | * @dir: DMA transfer direction | ||
306 | * | ||
307 | * Ensure that any data held in the cache is appropriately discarded | ||
308 | * or written back. | ||
309 | * | ||
310 | * The device owns this memory once this call has completed. The CPU | ||
311 | * can regain ownership by calling dma_unmap_single() or | ||
312 | * dma_sync_single_for_cpu(). | ||
313 | */ | ||
314 | static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, | ||
315 | size_t size, enum dma_data_direction dir) | ||
316 | { | ||
317 | unsigned long offset; | ||
318 | struct page *page; | ||
319 | dma_addr_t addr; | ||
320 | |||
321 | BUG_ON(!virt_addr_valid(cpu_addr)); | ||
322 | BUG_ON(!virt_addr_valid(cpu_addr + size - 1)); | ||
323 | BUG_ON(!valid_dma_direction(dir)); | ||
324 | |||
325 | page = virt_to_page(cpu_addr); | ||
326 | offset = (unsigned long)cpu_addr & ~PAGE_MASK; | ||
327 | addr = __dma_map_page(dev, page, offset, size, dir); | ||
328 | debug_dma_map_page(dev, page, offset, size, dir, addr, true); | ||
329 | |||
330 | return addr; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * dma_map_page - map a portion of a page for streaming DMA | ||
335 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
336 | * @page: page that buffer resides in | ||
337 | * @offset: offset into page for start of buffer | ||
338 | * @size: size of buffer to map | ||
339 | * @dir: DMA transfer direction | ||
340 | * | ||
341 | * Ensure that any data held in the cache is appropriately discarded | ||
342 | * or written back. | ||
343 | * | ||
344 | * The device owns this memory once this call has completed. The CPU | ||
345 | * can regain ownership by calling dma_unmap_page(). | ||
346 | */ | ||
347 | static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, | ||
348 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
349 | { | ||
350 | dma_addr_t addr; | ||
351 | |||
352 | BUG_ON(!valid_dma_direction(dir)); | ||
353 | |||
354 | addr = __dma_map_page(dev, page, offset, size, dir); | ||
355 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | ||
356 | |||
357 | return addr; | ||
358 | } | ||
359 | |||
360 | /** | ||
361 | * dma_unmap_single - unmap a single buffer previously mapped | ||
362 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
363 | * @handle: DMA address of buffer | ||
364 | * @size: size of buffer (same as passed to dma_map_single) | ||
365 | * @dir: DMA transfer direction (same as passed to dma_map_single) | ||
366 | * | ||
367 | * Unmap a single streaming mode DMA translation. The handle and size | ||
368 | * must match what was provided in the previous dma_map_single() call. | ||
369 | * All other usages are undefined. | ||
370 | * | ||
371 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
372 | * whatever the device wrote there. | ||
373 | */ | ||
374 | static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, | ||
375 | size_t size, enum dma_data_direction dir) | ||
376 | { | ||
377 | debug_dma_unmap_page(dev, handle, size, dir, true); | ||
378 | __dma_unmap_page(dev, handle, size, dir); | ||
379 | } | ||
380 | |||
381 | /** | ||
382 | * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() | ||
383 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
384 | * @handle: DMA address of buffer | ||
385 | * @size: size of buffer (same as passed to dma_map_page) | ||
386 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
387 | * | ||
388 | * Unmap a page streaming mode DMA translation. The handle and size | ||
389 | * must match what was provided in the previous dma_map_page() call. | ||
390 | * All other usages are undefined. | ||
391 | * | ||
392 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
393 | * whatever the device wrote there. | ||
394 | */ | ||
395 | static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
396 | size_t size, enum dma_data_direction dir) | ||
397 | { | ||
398 | debug_dma_unmap_page(dev, handle, size, dir, false); | ||
399 | __dma_unmap_page(dev, handle, size, dir); | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * dma_sync_single_range_for_cpu | ||
404 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
405 | * @handle: DMA address of buffer | ||
406 | * @offset: offset of region to start sync | ||
407 | * @size: size of region to sync | ||
408 | * @dir: DMA transfer direction (same as passed to dma_map_single) | ||
409 | * | ||
410 | * Make physical memory consistent for a single streaming mode DMA | ||
411 | * translation after a transfer. | ||
412 | * | ||
413 | * If you perform a dma_map_single() but wish to interrogate the | ||
414 | * buffer using the cpu, yet do not wish to teardown the PCI dma | ||
415 | * mapping, you must call this function before doing so. At the | ||
416 | * next point you give the PCI dma address back to the card, you | ||
417 | * must first the perform a dma_sync_for_device, and then the | ||
418 | * device again owns the buffer. | ||
419 | */ | ||
420 | static inline void dma_sync_single_range_for_cpu(struct device *dev, | ||
421 | dma_addr_t handle, unsigned long offset, size_t size, | ||
422 | enum dma_data_direction dir) | ||
423 | { | ||
424 | BUG_ON(!valid_dma_direction(dir)); | ||
425 | |||
426 | debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir); | ||
427 | |||
428 | if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir)) | ||
429 | return; | ||
430 | |||
431 | __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir); | ||
432 | } | ||
433 | |||
434 | static inline void dma_sync_single_range_for_device(struct device *dev, | ||
435 | dma_addr_t handle, unsigned long offset, size_t size, | ||
436 | enum dma_data_direction dir) | ||
437 | { | ||
438 | BUG_ON(!valid_dma_direction(dir)); | ||
439 | |||
440 | debug_dma_sync_single_for_device(dev, handle + offset, size, dir); | ||
441 | |||
442 | if (!dmabounce_sync_for_device(dev, handle, offset, size, dir)) | ||
443 | return; | ||
444 | |||
445 | __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir); | ||
446 | } | ||
447 | |||
448 | static inline void dma_sync_single_for_cpu(struct device *dev, | ||
449 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
450 | { | ||
451 | dma_sync_single_range_for_cpu(dev, handle, 0, size, dir); | ||
452 | } | ||
453 | |||
454 | static inline void dma_sync_single_for_device(struct device *dev, | ||
455 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
456 | { | ||
457 | dma_sync_single_range_for_device(dev, handle, 0, size, dir); | ||
458 | } | ||
459 | 271 | ||
460 | /* | 272 | /* |
461 | * The scatter list versions of the above methods. | 273 | * The scatter list versions of the above methods. |
462 | */ | 274 | */ |
463 | extern int dma_map_sg(struct device *, struct scatterlist *, int, | 275 | extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, |
464 | enum dma_data_direction); | 276 | enum dma_data_direction, struct dma_attrs *attrs); |
465 | extern void dma_unmap_sg(struct device *, struct scatterlist *, int, | 277 | extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, |
278 | enum dma_data_direction, struct dma_attrs *attrs); | ||
279 | extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, | ||
466 | enum dma_data_direction); | 280 | enum dma_data_direction); |
467 | extern void dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, | 281 | extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, |
468 | enum dma_data_direction); | 282 | enum dma_data_direction); |
469 | extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int, | ||
470 | enum dma_data_direction); | ||
471 | |||
472 | 283 | ||
473 | #endif /* __KERNEL__ */ | 284 | #endif /* __KERNEL__ */ |
474 | #endif | 285 | #endif |
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index b36f3654bf54..a6efcdd6fd25 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h | |||
@@ -30,6 +30,7 @@ struct map_desc { | |||
30 | #define MT_MEMORY_DTCM 12 | 30 | #define MT_MEMORY_DTCM 12 |
31 | #define MT_MEMORY_ITCM 13 | 31 | #define MT_MEMORY_ITCM 13 |
32 | #define MT_MEMORY_SO 14 | 32 | #define MT_MEMORY_SO 14 |
33 | #define MT_MEMORY_DMA_READY 15 | ||
33 | 34 | ||
34 | #ifdef CONFIG_MMU | 35 | #ifdef CONFIG_MMU |
35 | extern void iotable_init(struct map_desc *, int); | 36 | extern void iotable_init(struct map_desc *, int); |
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac782593f..1b3096dfb964 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c | |||
@@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup); | |||
81 | extern void paging_init(struct machine_desc *desc); | 81 | extern void paging_init(struct machine_desc *desc); |
82 | extern void sanity_check_meminfo(void); | 82 | extern void sanity_check_meminfo(void); |
83 | extern void reboot_setup(char *str); | 83 | extern void reboot_setup(char *str); |
84 | extern void setup_dma_zone(struct machine_desc *desc); | ||
84 | 85 | ||
85 | unsigned int processor_id; | 86 | unsigned int processor_id; |
86 | EXPORT_SYMBOL(processor_id); | 87 | EXPORT_SYMBOL(processor_id); |
@@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p) | |||
939 | machine_desc = mdesc; | 940 | machine_desc = mdesc; |
940 | machine_name = mdesc->name; | 941 | machine_name = mdesc->name; |
941 | 942 | ||
942 | #ifdef CONFIG_ZONE_DMA | 943 | setup_dma_zone(mdesc); |
943 | if (mdesc->dma_zone_size) { | 944 | |
944 | extern unsigned long arm_dma_zone_size; | ||
945 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
946 | } | ||
947 | #endif | ||
948 | if (mdesc->restart_mode) | 945 | if (mdesc->restart_mode) |
949 | reboot_setup(&mdesc->restart_mode); | 946 | reboot_setup(&mdesc->restart_mode); |
950 | 947 | ||
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4aaaab..ea6b43154090 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
@@ -17,8 +17,12 @@ | |||
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/device.h> | 18 | #include <linux/device.h> |
19 | #include <linux/dma-mapping.h> | 19 | #include <linux/dma-mapping.h> |
20 | #include <linux/dma-contiguous.h> | ||
20 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
22 | #include <linux/memblock.h> | ||
21 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/iommu.h> | ||
25 | #include <linux/vmalloc.h> | ||
22 | 26 | ||
23 | #include <asm/memory.h> | 27 | #include <asm/memory.h> |
24 | #include <asm/highmem.h> | 28 | #include <asm/highmem.h> |
@@ -26,9 +30,112 @@ | |||
26 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
27 | #include <asm/sizes.h> | 31 | #include <asm/sizes.h> |
28 | #include <asm/mach/arch.h> | 32 | #include <asm/mach/arch.h> |
33 | #include <asm/dma-iommu.h> | ||
34 | #include <asm/mach/map.h> | ||
35 | #include <asm/system_info.h> | ||
36 | #include <asm/dma-contiguous.h> | ||
29 | 37 | ||
30 | #include "mm.h" | 38 | #include "mm.h" |
31 | 39 | ||
40 | /* | ||
41 | * The DMA API is built upon the notion of "buffer ownership". A buffer | ||
42 | * is either exclusively owned by the CPU (and therefore may be accessed | ||
43 | * by it) or exclusively owned by the DMA device. These helper functions | ||
44 | * represent the transitions between these two ownership states. | ||
45 | * | ||
46 | * Note, however, that on later ARMs, this notion does not work due to | ||
47 | * speculative prefetches. We model our approach on the assumption that | ||
48 | * the CPU does do speculative prefetches, which means we clean caches | ||
49 | * before transfers and delay cache invalidation until transfer completion. | ||
50 | * | ||
51 | */ | ||
52 | static void __dma_page_cpu_to_dev(struct page *, unsigned long, | ||
53 | size_t, enum dma_data_direction); | ||
54 | static void __dma_page_dev_to_cpu(struct page *, unsigned long, | ||
55 | size_t, enum dma_data_direction); | ||
56 | |||
57 | /** | ||
58 | * arm_dma_map_page - map a portion of a page for streaming DMA | ||
59 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
60 | * @page: page that buffer resides in | ||
61 | * @offset: offset into page for start of buffer | ||
62 | * @size: size of buffer to map | ||
63 | * @dir: DMA transfer direction | ||
64 | * | ||
65 | * Ensure that any data held in the cache is appropriately discarded | ||
66 | * or written back. | ||
67 | * | ||
68 | * The device owns this memory once this call has completed. The CPU | ||
69 | * can regain ownership by calling dma_unmap_page(). | ||
70 | */ | ||
71 | static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, | ||
72 | unsigned long offset, size_t size, enum dma_data_direction dir, | ||
73 | struct dma_attrs *attrs) | ||
74 | { | ||
75 | if (!arch_is_coherent()) | ||
76 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
77 | return pfn_to_dma(dev, page_to_pfn(page)) + offset; | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() | ||
82 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
83 | * @handle: DMA address of buffer | ||
84 | * @size: size of buffer (same as passed to dma_map_page) | ||
85 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
86 | * | ||
87 | * Unmap a page streaming mode DMA translation. The handle and size | ||
88 | * must match what was provided in the previous dma_map_page() call. | ||
89 | * All other usages are undefined. | ||
90 | * | ||
91 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
92 | * whatever the device wrote there. | ||
93 | */ | ||
94 | static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
95 | size_t size, enum dma_data_direction dir, | ||
96 | struct dma_attrs *attrs) | ||
97 | { | ||
98 | if (!arch_is_coherent()) | ||
99 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), | ||
100 | handle & ~PAGE_MASK, size, dir); | ||
101 | } | ||
102 | |||
103 | static void arm_dma_sync_single_for_cpu(struct device *dev, | ||
104 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
105 | { | ||
106 | unsigned int offset = handle & (PAGE_SIZE - 1); | ||
107 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); | ||
108 | if (!arch_is_coherent()) | ||
109 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
110 | } | ||
111 | |||
112 | static void arm_dma_sync_single_for_device(struct device *dev, | ||
113 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
114 | { | ||
115 | unsigned int offset = handle & (PAGE_SIZE - 1); | ||
116 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); | ||
117 | if (!arch_is_coherent()) | ||
118 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
119 | } | ||
120 | |||
121 | static int arm_dma_set_mask(struct device *dev, u64 dma_mask); | ||
122 | |||
123 | struct dma_map_ops arm_dma_ops = { | ||
124 | .alloc = arm_dma_alloc, | ||
125 | .free = arm_dma_free, | ||
126 | .mmap = arm_dma_mmap, | ||
127 | .map_page = arm_dma_map_page, | ||
128 | .unmap_page = arm_dma_unmap_page, | ||
129 | .map_sg = arm_dma_map_sg, | ||
130 | .unmap_sg = arm_dma_unmap_sg, | ||
131 | .sync_single_for_cpu = arm_dma_sync_single_for_cpu, | ||
132 | .sync_single_for_device = arm_dma_sync_single_for_device, | ||
133 | .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, | ||
134 | .sync_sg_for_device = arm_dma_sync_sg_for_device, | ||
135 | .set_dma_mask = arm_dma_set_mask, | ||
136 | }; | ||
137 | EXPORT_SYMBOL(arm_dma_ops); | ||
138 | |||
32 | static u64 get_coherent_dma_mask(struct device *dev) | 139 | static u64 get_coherent_dma_mask(struct device *dev) |
33 | { | 140 | { |
34 | u64 mask = (u64)arm_dma_limit; | 141 | u64 mask = (u64)arm_dma_limit; |
@@ -56,6 +163,21 @@ static u64 get_coherent_dma_mask(struct device *dev) | |||
56 | return mask; | 163 | return mask; |
57 | } | 164 | } |
58 | 165 | ||
166 | static void __dma_clear_buffer(struct page *page, size_t size) | ||
167 | { | ||
168 | void *ptr; | ||
169 | /* | ||
170 | * Ensure that the allocated pages are zeroed, and that any data | ||
171 | * lurking in the kernel direct-mapped region is invalidated. | ||
172 | */ | ||
173 | ptr = page_address(page); | ||
174 | if (ptr) { | ||
175 | memset(ptr, 0, size); | ||
176 | dmac_flush_range(ptr, ptr + size); | ||
177 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
178 | } | ||
179 | } | ||
180 | |||
59 | /* | 181 | /* |
60 | * Allocate a DMA buffer for 'dev' of size 'size' using the | 182 | * Allocate a DMA buffer for 'dev' of size 'size' using the |
61 | * specified gfp mask. Note that 'size' must be page aligned. | 183 | * specified gfp mask. Note that 'size' must be page aligned. |
@@ -64,23 +186,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
64 | { | 186 | { |
65 | unsigned long order = get_order(size); | 187 | unsigned long order = get_order(size); |
66 | struct page *page, *p, *e; | 188 | struct page *page, *p, *e; |
67 | void *ptr; | ||
68 | u64 mask = get_coherent_dma_mask(dev); | ||
69 | |||
70 | #ifdef CONFIG_DMA_API_DEBUG | ||
71 | u64 limit = (mask + 1) & ~mask; | ||
72 | if (limit && size >= limit) { | ||
73 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
74 | size, mask); | ||
75 | return NULL; | ||
76 | } | ||
77 | #endif | ||
78 | |||
79 | if (!mask) | ||
80 | return NULL; | ||
81 | |||
82 | if (mask < 0xffffffffULL) | ||
83 | gfp |= GFP_DMA; | ||
84 | 189 | ||
85 | page = alloc_pages(gfp, order); | 190 | page = alloc_pages(gfp, order); |
86 | if (!page) | 191 | if (!page) |
@@ -93,14 +198,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
93 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) | 198 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) |
94 | __free_page(p); | 199 | __free_page(p); |
95 | 200 | ||
96 | /* | 201 | __dma_clear_buffer(page, size); |
97 | * Ensure that the allocated pages are zeroed, and that any data | ||
98 | * lurking in the kernel direct-mapped region is invalidated. | ||
99 | */ | ||
100 | ptr = page_address(page); | ||
101 | memset(ptr, 0, size); | ||
102 | dmac_flush_range(ptr, ptr + size); | ||
103 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
104 | 202 | ||
105 | return page; | 203 | return page; |
106 | } | 204 | } |
@@ -170,6 +268,11 @@ static int __init consistent_init(void) | |||
170 | unsigned long base = consistent_base; | 268 | unsigned long base = consistent_base; |
171 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; | 269 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; |
172 | 270 | ||
271 | #ifndef CONFIG_ARM_DMA_USE_IOMMU | ||
272 | if (cpu_architecture() >= CPU_ARCH_ARMv6) | ||
273 | return 0; | ||
274 | #endif | ||
275 | |||
173 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); | 276 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); |
174 | if (!consistent_pte) { | 277 | if (!consistent_pte) { |
175 | pr_err("%s: no memory\n", __func__); | 278 | pr_err("%s: no memory\n", __func__); |
@@ -184,14 +287,14 @@ static int __init consistent_init(void) | |||
184 | 287 | ||
185 | pud = pud_alloc(&init_mm, pgd, base); | 288 | pud = pud_alloc(&init_mm, pgd, base); |
186 | if (!pud) { | 289 | if (!pud) { |
187 | printk(KERN_ERR "%s: no pud tables\n", __func__); | 290 | pr_err("%s: no pud tables\n", __func__); |
188 | ret = -ENOMEM; | 291 | ret = -ENOMEM; |
189 | break; | 292 | break; |
190 | } | 293 | } |
191 | 294 | ||
192 | pmd = pmd_alloc(&init_mm, pud, base); | 295 | pmd = pmd_alloc(&init_mm, pud, base); |
193 | if (!pmd) { | 296 | if (!pmd) { |
194 | printk(KERN_ERR "%s: no pmd tables\n", __func__); | 297 | pr_err("%s: no pmd tables\n", __func__); |
195 | ret = -ENOMEM; | 298 | ret = -ENOMEM; |
196 | break; | 299 | break; |
197 | } | 300 | } |
@@ -199,7 +302,7 @@ static int __init consistent_init(void) | |||
199 | 302 | ||
200 | pte = pte_alloc_kernel(pmd, base); | 303 | pte = pte_alloc_kernel(pmd, base); |
201 | if (!pte) { | 304 | if (!pte) { |
202 | printk(KERN_ERR "%s: no pte tables\n", __func__); | 305 | pr_err("%s: no pte tables\n", __func__); |
203 | ret = -ENOMEM; | 306 | ret = -ENOMEM; |
204 | break; | 307 | break; |
205 | } | 308 | } |
@@ -210,9 +313,101 @@ static int __init consistent_init(void) | |||
210 | 313 | ||
211 | return ret; | 314 | return ret; |
212 | } | 315 | } |
213 | |||
214 | core_initcall(consistent_init); | 316 | core_initcall(consistent_init); |
215 | 317 | ||
318 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
319 | pgprot_t prot, struct page **ret_page); | ||
320 | |||
321 | static struct arm_vmregion_head coherent_head = { | ||
322 | .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), | ||
323 | .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), | ||
324 | }; | ||
325 | |||
326 | size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; | ||
327 | |||
328 | static int __init early_coherent_pool(char *p) | ||
329 | { | ||
330 | coherent_pool_size = memparse(p, &p); | ||
331 | return 0; | ||
332 | } | ||
333 | early_param("coherent_pool", early_coherent_pool); | ||
334 | |||
335 | /* | ||
336 | * Initialise the coherent pool for atomic allocations. | ||
337 | */ | ||
338 | static int __init coherent_init(void) | ||
339 | { | ||
340 | pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); | ||
341 | size_t size = coherent_pool_size; | ||
342 | struct page *page; | ||
343 | void *ptr; | ||
344 | |||
345 | if (cpu_architecture() < CPU_ARCH_ARMv6) | ||
346 | return 0; | ||
347 | |||
348 | ptr = __alloc_from_contiguous(NULL, size, prot, &page); | ||
349 | if (ptr) { | ||
350 | coherent_head.vm_start = (unsigned long) ptr; | ||
351 | coherent_head.vm_end = (unsigned long) ptr + size; | ||
352 | printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", | ||
353 | (unsigned)size / 1024); | ||
354 | return 0; | ||
355 | } | ||
356 | printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", | ||
357 | (unsigned)size / 1024); | ||
358 | return -ENOMEM; | ||
359 | } | ||
360 | /* | ||
361 | * CMA is activated by core_initcall, so we must be called after it. | ||
362 | */ | ||
363 | postcore_initcall(coherent_init); | ||
364 | |||
365 | struct dma_contig_early_reserve { | ||
366 | phys_addr_t base; | ||
367 | unsigned long size; | ||
368 | }; | ||
369 | |||
370 | static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; | ||
371 | |||
372 | static int dma_mmu_remap_num __initdata; | ||
373 | |||
374 | void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) | ||
375 | { | ||
376 | dma_mmu_remap[dma_mmu_remap_num].base = base; | ||
377 | dma_mmu_remap[dma_mmu_remap_num].size = size; | ||
378 | dma_mmu_remap_num++; | ||
379 | } | ||
380 | |||
381 | void __init dma_contiguous_remap(void) | ||
382 | { | ||
383 | int i; | ||
384 | for (i = 0; i < dma_mmu_remap_num; i++) { | ||
385 | phys_addr_t start = dma_mmu_remap[i].base; | ||
386 | phys_addr_t end = start + dma_mmu_remap[i].size; | ||
387 | struct map_desc map; | ||
388 | unsigned long addr; | ||
389 | |||
390 | if (end > arm_lowmem_limit) | ||
391 | end = arm_lowmem_limit; | ||
392 | if (start >= end) | ||
393 | return; | ||
394 | |||
395 | map.pfn = __phys_to_pfn(start); | ||
396 | map.virtual = __phys_to_virt(start); | ||
397 | map.length = end - start; | ||
398 | map.type = MT_MEMORY_DMA_READY; | ||
399 | |||
400 | /* | ||
401 | * Clear previous low-memory mapping | ||
402 | */ | ||
403 | for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); | ||
404 | addr += PMD_SIZE) | ||
405 | pmd_clear(pmd_off_k(addr)); | ||
406 | |||
407 | iotable_init(&map, 1); | ||
408 | } | ||
409 | } | ||
410 | |||
216 | static void * | 411 | static void * |
217 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | 412 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, |
218 | const void *caller) | 413 | const void *caller) |
@@ -222,7 +417,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | |||
222 | int bit; | 417 | int bit; |
223 | 418 | ||
224 | if (!consistent_pte) { | 419 | if (!consistent_pte) { |
225 | printk(KERN_ERR "%s: not initialised\n", __func__); | 420 | pr_err("%s: not initialised\n", __func__); |
226 | dump_stack(); | 421 | dump_stack(); |
227 | return NULL; | 422 | return NULL; |
228 | } | 423 | } |
@@ -249,7 +444,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | |||
249 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); | 444 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); |
250 | 445 | ||
251 | pte = consistent_pte[idx] + off; | 446 | pte = consistent_pte[idx] + off; |
252 | c->vm_pages = page; | 447 | c->priv = page; |
253 | 448 | ||
254 | do { | 449 | do { |
255 | BUG_ON(!pte_none(*pte)); | 450 | BUG_ON(!pte_none(*pte)); |
@@ -281,14 +476,14 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
281 | 476 | ||
282 | c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); | 477 | c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); |
283 | if (!c) { | 478 | if (!c) { |
284 | printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", | 479 | pr_err("%s: trying to free invalid coherent area: %p\n", |
285 | __func__, cpu_addr); | 480 | __func__, cpu_addr); |
286 | dump_stack(); | 481 | dump_stack(); |
287 | return; | 482 | return; |
288 | } | 483 | } |
289 | 484 | ||
290 | if ((c->vm_end - c->vm_start) != size) { | 485 | if ((c->vm_end - c->vm_start) != size) { |
291 | printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", | 486 | pr_err("%s: freeing wrong coherent size (%ld != %d)\n", |
292 | __func__, c->vm_end - c->vm_start, size); | 487 | __func__, c->vm_end - c->vm_start, size); |
293 | dump_stack(); | 488 | dump_stack(); |
294 | size = c->vm_end - c->vm_start; | 489 | size = c->vm_end - c->vm_start; |
@@ -310,8 +505,8 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
310 | } | 505 | } |
311 | 506 | ||
312 | if (pte_none(pte) || !pte_present(pte)) | 507 | if (pte_none(pte) || !pte_present(pte)) |
313 | printk(KERN_CRIT "%s: bad page in kernel page table\n", | 508 | pr_crit("%s: bad page in kernel page table\n", |
314 | __func__); | 509 | __func__); |
315 | } while (size -= PAGE_SIZE); | 510 | } while (size -= PAGE_SIZE); |
316 | 511 | ||
317 | flush_tlb_kernel_range(c->vm_start, c->vm_end); | 512 | flush_tlb_kernel_range(c->vm_start, c->vm_end); |
@@ -319,20 +514,182 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
319 | arm_vmregion_free(&consistent_head, c); | 514 | arm_vmregion_free(&consistent_head, c); |
320 | } | 515 | } |
321 | 516 | ||
517 | static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, | ||
518 | void *data) | ||
519 | { | ||
520 | struct page *page = virt_to_page(addr); | ||
521 | pgprot_t prot = *(pgprot_t *)data; | ||
522 | |||
523 | set_pte_ext(pte, mk_pte(page, prot), 0); | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | static void __dma_remap(struct page *page, size_t size, pgprot_t prot) | ||
528 | { | ||
529 | unsigned long start = (unsigned long) page_address(page); | ||
530 | unsigned end = start + size; | ||
531 | |||
532 | apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); | ||
533 | dsb(); | ||
534 | flush_tlb_kernel_range(start, end); | ||
535 | } | ||
536 | |||
537 | static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, | ||
538 | pgprot_t prot, struct page **ret_page, | ||
539 | const void *caller) | ||
540 | { | ||
541 | struct page *page; | ||
542 | void *ptr; | ||
543 | page = __dma_alloc_buffer(dev, size, gfp); | ||
544 | if (!page) | ||
545 | return NULL; | ||
546 | |||
547 | ptr = __dma_alloc_remap(page, size, gfp, prot, caller); | ||
548 | if (!ptr) { | ||
549 | __dma_free_buffer(page, size); | ||
550 | return NULL; | ||
551 | } | ||
552 | |||
553 | *ret_page = page; | ||
554 | return ptr; | ||
555 | } | ||
556 | |||
557 | static void *__alloc_from_pool(struct device *dev, size_t size, | ||
558 | struct page **ret_page, const void *caller) | ||
559 | { | ||
560 | struct arm_vmregion *c; | ||
561 | size_t align; | ||
562 | |||
563 | if (!coherent_head.vm_start) { | ||
564 | printk(KERN_ERR "%s: coherent pool not initialised!\n", | ||
565 | __func__); | ||
566 | dump_stack(); | ||
567 | return NULL; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * Align the region allocation - allocations from pool are rather | ||
572 | * small, so align them to their order in pages, minimum is a page | ||
573 | * size. This helps reduce fragmentation of the DMA space. | ||
574 | */ | ||
575 | align = PAGE_SIZE << get_order(size); | ||
576 | c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); | ||
577 | if (c) { | ||
578 | void *ptr = (void *)c->vm_start; | ||
579 | struct page *page = virt_to_page(ptr); | ||
580 | *ret_page = page; | ||
581 | return ptr; | ||
582 | } | ||
583 | return NULL; | ||
584 | } | ||
585 | |||
586 | static int __free_from_pool(void *cpu_addr, size_t size) | ||
587 | { | ||
588 | unsigned long start = (unsigned long)cpu_addr; | ||
589 | unsigned long end = start + size; | ||
590 | struct arm_vmregion *c; | ||
591 | |||
592 | if (start < coherent_head.vm_start || end > coherent_head.vm_end) | ||
593 | return 0; | ||
594 | |||
595 | c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); | ||
596 | |||
597 | if ((c->vm_end - c->vm_start) != size) { | ||
598 | printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", | ||
599 | __func__, c->vm_end - c->vm_start, size); | ||
600 | dump_stack(); | ||
601 | size = c->vm_end - c->vm_start; | ||
602 | } | ||
603 | |||
604 | arm_vmregion_free(&coherent_head, c); | ||
605 | return 1; | ||
606 | } | ||
607 | |||
608 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
609 | pgprot_t prot, struct page **ret_page) | ||
610 | { | ||
611 | unsigned long order = get_order(size); | ||
612 | size_t count = size >> PAGE_SHIFT; | ||
613 | struct page *page; | ||
614 | |||
615 | page = dma_alloc_from_contiguous(dev, count, order); | ||
616 | if (!page) | ||
617 | return NULL; | ||
618 | |||
619 | __dma_clear_buffer(page, size); | ||
620 | __dma_remap(page, size, prot); | ||
621 | |||
622 | *ret_page = page; | ||
623 | return page_address(page); | ||
624 | } | ||
625 | |||
626 | static void __free_from_contiguous(struct device *dev, struct page *page, | ||
627 | size_t size) | ||
628 | { | ||
629 | __dma_remap(page, size, pgprot_kernel); | ||
630 | dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); | ||
631 | } | ||
632 | |||
633 | static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) | ||
634 | { | ||
635 | prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? | ||
636 | pgprot_writecombine(prot) : | ||
637 | pgprot_dmacoherent(prot); | ||
638 | return prot; | ||
639 | } | ||
640 | |||
641 | #define nommu() 0 | ||
642 | |||
322 | #else /* !CONFIG_MMU */ | 643 | #else /* !CONFIG_MMU */ |
323 | 644 | ||
324 | #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) | 645 | #define nommu() 1 |
325 | #define __dma_free_remap(addr, size) do { } while (0) | 646 | |
647 | #define __get_dma_pgprot(attrs, prot) __pgprot(0) | ||
648 | #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL | ||
649 | #define __alloc_from_pool(dev, size, ret_page, c) NULL | ||
650 | #define __alloc_from_contiguous(dev, size, prot, ret) NULL | ||
651 | #define __free_from_pool(cpu_addr, size) 0 | ||
652 | #define __free_from_contiguous(dev, page, size) do { } while (0) | ||
653 | #define __dma_free_remap(cpu_addr, size) do { } while (0) | ||
326 | 654 | ||
327 | #endif /* CONFIG_MMU */ | 655 | #endif /* CONFIG_MMU */ |
328 | 656 | ||
329 | static void * | 657 | static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, |
330 | __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | 658 | struct page **ret_page) |
331 | pgprot_t prot, const void *caller) | 659 | { |
660 | struct page *page; | ||
661 | page = __dma_alloc_buffer(dev, size, gfp); | ||
662 | if (!page) | ||
663 | return NULL; | ||
664 | |||
665 | *ret_page = page; | ||
666 | return page_address(page); | ||
667 | } | ||
668 | |||
669 | |||
670 | |||
671 | static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | ||
672 | gfp_t gfp, pgprot_t prot, const void *caller) | ||
332 | { | 673 | { |
674 | u64 mask = get_coherent_dma_mask(dev); | ||
333 | struct page *page; | 675 | struct page *page; |
334 | void *addr; | 676 | void *addr; |
335 | 677 | ||
678 | #ifdef CONFIG_DMA_API_DEBUG | ||
679 | u64 limit = (mask + 1) & ~mask; | ||
680 | if (limit && size >= limit) { | ||
681 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
682 | size, mask); | ||
683 | return NULL; | ||
684 | } | ||
685 | #endif | ||
686 | |||
687 | if (!mask) | ||
688 | return NULL; | ||
689 | |||
690 | if (mask < 0xffffffffULL) | ||
691 | gfp |= GFP_DMA; | ||
692 | |||
336 | /* | 693 | /* |
337 | * Following is a work-around (a.k.a. hack) to prevent pages | 694 | * Following is a work-around (a.k.a. hack) to prevent pages |
338 | * with __GFP_COMP being passed to split_page() which cannot | 695 | * with __GFP_COMP being passed to split_page() which cannot |
@@ -342,22 +699,20 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
342 | */ | 699 | */ |
343 | gfp &= ~(__GFP_COMP); | 700 | gfp &= ~(__GFP_COMP); |
344 | 701 | ||
345 | *handle = ~0; | 702 | *handle = DMA_ERROR_CODE; |
346 | size = PAGE_ALIGN(size); | 703 | size = PAGE_ALIGN(size); |
347 | 704 | ||
348 | page = __dma_alloc_buffer(dev, size, gfp); | 705 | if (arch_is_coherent() || nommu()) |
349 | if (!page) | 706 | addr = __alloc_simple_buffer(dev, size, gfp, &page); |
350 | return NULL; | 707 | else if (cpu_architecture() < CPU_ARCH_ARMv6) |
351 | 708 | addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); | |
352 | if (!arch_is_coherent()) | 709 | else if (gfp & GFP_ATOMIC) |
353 | addr = __dma_alloc_remap(page, size, gfp, prot, caller); | 710 | addr = __alloc_from_pool(dev, size, &page, caller); |
354 | else | 711 | else |
355 | addr = page_address(page); | 712 | addr = __alloc_from_contiguous(dev, size, prot, &page); |
356 | 713 | ||
357 | if (addr) | 714 | if (addr) |
358 | *handle = pfn_to_dma(dev, page_to_pfn(page)); | 715 | *handle = pfn_to_dma(dev, page_to_pfn(page)); |
359 | else | ||
360 | __dma_free_buffer(page, size); | ||
361 | 716 | ||
362 | return addr; | 717 | return addr; |
363 | } | 718 | } |
@@ -366,138 +721,71 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
366 | * Allocate DMA-coherent memory space and return both the kernel remapped | 721 | * Allocate DMA-coherent memory space and return both the kernel remapped |
367 | * virtual and bus address for that space. | 722 | * virtual and bus address for that space. |
368 | */ | 723 | */ |
369 | void * | 724 | void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
370 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) | 725 | gfp_t gfp, struct dma_attrs *attrs) |
371 | { | 726 | { |
727 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | ||
372 | void *memory; | 728 | void *memory; |
373 | 729 | ||
374 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | 730 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) |
375 | return memory; | 731 | return memory; |
376 | 732 | ||
377 | return __dma_alloc(dev, size, handle, gfp, | 733 | return __dma_alloc(dev, size, handle, gfp, prot, |
378 | pgprot_dmacoherent(pgprot_kernel), | ||
379 | __builtin_return_address(0)); | 734 | __builtin_return_address(0)); |
380 | } | 735 | } |
381 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
382 | 736 | ||
383 | /* | 737 | /* |
384 | * Allocate a writecombining region, in much the same way as | 738 | * Create userspace mapping for the DMA-coherent memory. |
385 | * dma_alloc_coherent above. | ||
386 | */ | 739 | */ |
387 | void * | 740 | int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, |
388 | dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) | 741 | void *cpu_addr, dma_addr_t dma_addr, size_t size, |
389 | { | 742 | struct dma_attrs *attrs) |
390 | return __dma_alloc(dev, size, handle, gfp, | ||
391 | pgprot_writecombine(pgprot_kernel), | ||
392 | __builtin_return_address(0)); | ||
393 | } | ||
394 | EXPORT_SYMBOL(dma_alloc_writecombine); | ||
395 | |||
396 | static int dma_mmap(struct device *dev, struct vm_area_struct *vma, | ||
397 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
398 | { | 743 | { |
399 | int ret = -ENXIO; | 744 | int ret = -ENXIO; |
400 | #ifdef CONFIG_MMU | 745 | #ifdef CONFIG_MMU |
401 | unsigned long user_size, kern_size; | 746 | unsigned long pfn = dma_to_pfn(dev, dma_addr); |
402 | struct arm_vmregion *c; | 747 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); |
403 | 748 | ||
404 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 749 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) |
750 | return ret; | ||
405 | 751 | ||
406 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | 752 | ret = remap_pfn_range(vma, vma->vm_start, |
407 | if (c) { | 753 | pfn + vma->vm_pgoff, |
408 | unsigned long off = vma->vm_pgoff; | 754 | vma->vm_end - vma->vm_start, |
409 | 755 | vma->vm_page_prot); | |
410 | kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; | ||
411 | |||
412 | if (off < kern_size && | ||
413 | user_size <= (kern_size - off)) { | ||
414 | ret = remap_pfn_range(vma, vma->vm_start, | ||
415 | page_to_pfn(c->vm_pages) + off, | ||
416 | user_size << PAGE_SHIFT, | ||
417 | vma->vm_page_prot); | ||
418 | } | ||
419 | } | ||
420 | #endif /* CONFIG_MMU */ | 756 | #endif /* CONFIG_MMU */ |
421 | 757 | ||
422 | return ret; | 758 | return ret; |
423 | } | 759 | } |
424 | 760 | ||
425 | int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, | ||
426 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
427 | { | ||
428 | vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); | ||
429 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | ||
430 | } | ||
431 | EXPORT_SYMBOL(dma_mmap_coherent); | ||
432 | |||
433 | int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, | ||
434 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
435 | { | ||
436 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | ||
437 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | ||
438 | } | ||
439 | EXPORT_SYMBOL(dma_mmap_writecombine); | ||
440 | |||
441 | /* | 761 | /* |
442 | * free a page as defined by the above mapping. | 762 | * Free a buffer as defined by the above mapping. |
443 | * Must not be called with IRQs disabled. | ||
444 | */ | 763 | */ |
445 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) | 764 | void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, |
765 | dma_addr_t handle, struct dma_attrs *attrs) | ||
446 | { | 766 | { |
447 | WARN_ON(irqs_disabled()); | 767 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); |
448 | 768 | ||
449 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) | 769 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) |
450 | return; | 770 | return; |
451 | 771 | ||
452 | size = PAGE_ALIGN(size); | 772 | size = PAGE_ALIGN(size); |
453 | 773 | ||
454 | if (!arch_is_coherent()) | 774 | if (arch_is_coherent() || nommu()) { |
775 | __dma_free_buffer(page, size); | ||
776 | } else if (cpu_architecture() < CPU_ARCH_ARMv6) { | ||
455 | __dma_free_remap(cpu_addr, size); | 777 | __dma_free_remap(cpu_addr, size); |
456 | 778 | __dma_free_buffer(page, size); | |
457 | __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); | ||
458 | } | ||
459 | EXPORT_SYMBOL(dma_free_coherent); | ||
460 | |||
461 | /* | ||
462 | * Make an area consistent for devices. | ||
463 | * Note: Drivers should NOT use this function directly, as it will break | ||
464 | * platforms with CONFIG_DMABOUNCE. | ||
465 | * Use the driver DMA support - see dma-mapping.h (dma_sync_*) | ||
466 | */ | ||
467 | void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, | ||
468 | enum dma_data_direction dir) | ||
469 | { | ||
470 | unsigned long paddr; | ||
471 | |||
472 | BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); | ||
473 | |||
474 | dmac_map_area(kaddr, size, dir); | ||
475 | |||
476 | paddr = __pa(kaddr); | ||
477 | if (dir == DMA_FROM_DEVICE) { | ||
478 | outer_inv_range(paddr, paddr + size); | ||
479 | } else { | 779 | } else { |
480 | outer_clean_range(paddr, paddr + size); | 780 | if (__free_from_pool(cpu_addr, size)) |
481 | } | 781 | return; |
482 | /* FIXME: non-speculating: flush on bidirectional mappings? */ | 782 | /* |
483 | } | 783 | * Non-atomic allocations cannot be freed with IRQs disabled |
484 | EXPORT_SYMBOL(___dma_single_cpu_to_dev); | 784 | */ |
485 | 785 | WARN_ON(irqs_disabled()); | |
486 | void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, | 786 | __free_from_contiguous(dev, page, size); |
487 | enum dma_data_direction dir) | ||
488 | { | ||
489 | BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); | ||
490 | |||
491 | /* FIXME: non-speculating: not required */ | ||
492 | /* don't bother invalidating if DMA to device */ | ||
493 | if (dir != DMA_TO_DEVICE) { | ||
494 | unsigned long paddr = __pa(kaddr); | ||
495 | outer_inv_range(paddr, paddr + size); | ||
496 | } | 787 | } |
497 | |||
498 | dmac_unmap_area(kaddr, size, dir); | ||
499 | } | 788 | } |
500 | EXPORT_SYMBOL(___dma_single_dev_to_cpu); | ||
501 | 789 | ||
502 | static void dma_cache_maint_page(struct page *page, unsigned long offset, | 790 | static void dma_cache_maint_page(struct page *page, unsigned long offset, |
503 | size_t size, enum dma_data_direction dir, | 791 | size_t size, enum dma_data_direction dir, |
@@ -543,7 +831,13 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, | |||
543 | } while (left); | 831 | } while (left); |
544 | } | 832 | } |
545 | 833 | ||
546 | void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, | 834 | /* |
835 | * Make an area consistent for devices. | ||
836 | * Note: Drivers should NOT use this function directly, as it will break | ||
837 | * platforms with CONFIG_DMABOUNCE. | ||
838 | * Use the driver DMA support - see dma-mapping.h (dma_sync_*) | ||
839 | */ | ||
840 | static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, | ||
547 | size_t size, enum dma_data_direction dir) | 841 | size_t size, enum dma_data_direction dir) |
548 | { | 842 | { |
549 | unsigned long paddr; | 843 | unsigned long paddr; |
@@ -558,9 +852,8 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, | |||
558 | } | 852 | } |
559 | /* FIXME: non-speculating: flush on bidirectional mappings? */ | 853 | /* FIXME: non-speculating: flush on bidirectional mappings? */ |
560 | } | 854 | } |
561 | EXPORT_SYMBOL(___dma_page_cpu_to_dev); | ||
562 | 855 | ||
563 | void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, | 856 | static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, |
564 | size_t size, enum dma_data_direction dir) | 857 | size_t size, enum dma_data_direction dir) |
565 | { | 858 | { |
566 | unsigned long paddr = page_to_phys(page) + off; | 859 | unsigned long paddr = page_to_phys(page) + off; |
@@ -578,10 +871,9 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, | |||
578 | if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) | 871 | if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) |
579 | set_bit(PG_dcache_clean, &page->flags); | 872 | set_bit(PG_dcache_clean, &page->flags); |
580 | } | 873 | } |
581 | EXPORT_SYMBOL(___dma_page_dev_to_cpu); | ||
582 | 874 | ||
583 | /** | 875 | /** |
584 | * dma_map_sg - map a set of SG buffers for streaming mode DMA | 876 | * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA |
585 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 877 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
586 | * @sg: list of buffers | 878 | * @sg: list of buffers |
587 | * @nents: number of buffers to map | 879 | * @nents: number of buffers to map |
@@ -596,32 +888,32 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); | |||
596 | * Device ownership issues as mentioned for dma_map_single are the same | 888 | * Device ownership issues as mentioned for dma_map_single are the same |
597 | * here. | 889 | * here. |
598 | */ | 890 | */ |
599 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, | 891 | int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, |
600 | enum dma_data_direction dir) | 892 | enum dma_data_direction dir, struct dma_attrs *attrs) |
601 | { | 893 | { |
894 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
602 | struct scatterlist *s; | 895 | struct scatterlist *s; |
603 | int i, j; | 896 | int i, j; |
604 | 897 | ||
605 | BUG_ON(!valid_dma_direction(dir)); | ||
606 | |||
607 | for_each_sg(sg, s, nents, i) { | 898 | for_each_sg(sg, s, nents, i) { |
608 | s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, | 899 | #ifdef CONFIG_NEED_SG_DMA_LENGTH |
609 | s->length, dir); | 900 | s->dma_length = s->length; |
901 | #endif | ||
902 | s->dma_address = ops->map_page(dev, sg_page(s), s->offset, | ||
903 | s->length, dir, attrs); | ||
610 | if (dma_mapping_error(dev, s->dma_address)) | 904 | if (dma_mapping_error(dev, s->dma_address)) |
611 | goto bad_mapping; | 905 | goto bad_mapping; |
612 | } | 906 | } |
613 | debug_dma_map_sg(dev, sg, nents, nents, dir); | ||
614 | return nents; | 907 | return nents; |
615 | 908 | ||
616 | bad_mapping: | 909 | bad_mapping: |
617 | for_each_sg(sg, s, i, j) | 910 | for_each_sg(sg, s, i, j) |
618 | __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); | 911 | ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); |
619 | return 0; | 912 | return 0; |
620 | } | 913 | } |
621 | EXPORT_SYMBOL(dma_map_sg); | ||
622 | 914 | ||
623 | /** | 915 | /** |
624 | * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg | 916 | * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg |
625 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 917 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
626 | * @sg: list of buffers | 918 | * @sg: list of buffers |
627 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) | 919 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) |
@@ -630,70 +922,55 @@ EXPORT_SYMBOL(dma_map_sg); | |||
630 | * Unmap a set of streaming mode DMA translations. Again, CPU access | 922 | * Unmap a set of streaming mode DMA translations. Again, CPU access |
631 | * rules concerning calls here are the same as for dma_unmap_single(). | 923 | * rules concerning calls here are the same as for dma_unmap_single(). |
632 | */ | 924 | */ |
633 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, | 925 | void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, |
634 | enum dma_data_direction dir) | 926 | enum dma_data_direction dir, struct dma_attrs *attrs) |
635 | { | 927 | { |
928 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
636 | struct scatterlist *s; | 929 | struct scatterlist *s; |
637 | int i; | ||
638 | 930 | ||
639 | debug_dma_unmap_sg(dev, sg, nents, dir); | 931 | int i; |
640 | 932 | ||
641 | for_each_sg(sg, s, nents, i) | 933 | for_each_sg(sg, s, nents, i) |
642 | __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); | 934 | ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); |
643 | } | 935 | } |
644 | EXPORT_SYMBOL(dma_unmap_sg); | ||
645 | 936 | ||
646 | /** | 937 | /** |
647 | * dma_sync_sg_for_cpu | 938 | * arm_dma_sync_sg_for_cpu |
648 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 939 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
649 | * @sg: list of buffers | 940 | * @sg: list of buffers |
650 | * @nents: number of buffers to map (returned from dma_map_sg) | 941 | * @nents: number of buffers to map (returned from dma_map_sg) |
651 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | 942 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) |
652 | */ | 943 | */ |
653 | void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | 944 | void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, |
654 | int nents, enum dma_data_direction dir) | 945 | int nents, enum dma_data_direction dir) |
655 | { | 946 | { |
947 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
656 | struct scatterlist *s; | 948 | struct scatterlist *s; |
657 | int i; | 949 | int i; |
658 | 950 | ||
659 | for_each_sg(sg, s, nents, i) { | 951 | for_each_sg(sg, s, nents, i) |
660 | if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, | 952 | ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, |
661 | sg_dma_len(s), dir)) | 953 | dir); |
662 | continue; | ||
663 | |||
664 | __dma_page_dev_to_cpu(sg_page(s), s->offset, | ||
665 | s->length, dir); | ||
666 | } | ||
667 | |||
668 | debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); | ||
669 | } | 954 | } |
670 | EXPORT_SYMBOL(dma_sync_sg_for_cpu); | ||
671 | 955 | ||
672 | /** | 956 | /** |
673 | * dma_sync_sg_for_device | 957 | * arm_dma_sync_sg_for_device |
674 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 958 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
675 | * @sg: list of buffers | 959 | * @sg: list of buffers |
676 | * @nents: number of buffers to map (returned from dma_map_sg) | 960 | * @nents: number of buffers to map (returned from dma_map_sg) |
677 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | 961 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) |
678 | */ | 962 | */ |
679 | void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | 963 | void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, |
680 | int nents, enum dma_data_direction dir) | 964 | int nents, enum dma_data_direction dir) |
681 | { | 965 | { |
966 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
682 | struct scatterlist *s; | 967 | struct scatterlist *s; |
683 | int i; | 968 | int i; |
684 | 969 | ||
685 | for_each_sg(sg, s, nents, i) { | 970 | for_each_sg(sg, s, nents, i) |
686 | if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, | 971 | ops->sync_single_for_device(dev, sg_dma_address(s), s->length, |
687 | sg_dma_len(s), dir)) | 972 | dir); |
688 | continue; | ||
689 | |||
690 | __dma_page_cpu_to_dev(sg_page(s), s->offset, | ||
691 | s->length, dir); | ||
692 | } | ||
693 | |||
694 | debug_dma_sync_sg_for_device(dev, sg, nents, dir); | ||
695 | } | 973 | } |
696 | EXPORT_SYMBOL(dma_sync_sg_for_device); | ||
697 | 974 | ||
698 | /* | 975 | /* |
699 | * Return whether the given device DMA address mask can be supported | 976 | * Return whether the given device DMA address mask can be supported |
@@ -709,18 +986,15 @@ int dma_supported(struct device *dev, u64 mask) | |||
709 | } | 986 | } |
710 | EXPORT_SYMBOL(dma_supported); | 987 | EXPORT_SYMBOL(dma_supported); |
711 | 988 | ||
712 | int dma_set_mask(struct device *dev, u64 dma_mask) | 989 | static int arm_dma_set_mask(struct device *dev, u64 dma_mask) |
713 | { | 990 | { |
714 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | 991 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) |
715 | return -EIO; | 992 | return -EIO; |
716 | 993 | ||
717 | #ifndef CONFIG_DMABOUNCE | ||
718 | *dev->dma_mask = dma_mask; | 994 | *dev->dma_mask = dma_mask; |
719 | #endif | ||
720 | 995 | ||
721 | return 0; | 996 | return 0; |
722 | } | 997 | } |
723 | EXPORT_SYMBOL(dma_set_mask); | ||
724 | 998 | ||
725 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 | 999 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 |
726 | 1000 | ||
@@ -733,3 +1007,679 @@ static int __init dma_debug_do_init(void) | |||
733 | return 0; | 1007 | return 0; |
734 | } | 1008 | } |
735 | fs_initcall(dma_debug_do_init); | 1009 | fs_initcall(dma_debug_do_init); |
1010 | |||
1011 | #ifdef CONFIG_ARM_DMA_USE_IOMMU | ||
1012 | |||
1013 | /* IOMMU */ | ||
1014 | |||
1015 | static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, | ||
1016 | size_t size) | ||
1017 | { | ||
1018 | unsigned int order = get_order(size); | ||
1019 | unsigned int align = 0; | ||
1020 | unsigned int count, start; | ||
1021 | unsigned long flags; | ||
1022 | |||
1023 | count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + | ||
1024 | (1 << mapping->order) - 1) >> mapping->order; | ||
1025 | |||
1026 | if (order > mapping->order) | ||
1027 | align = (1 << (order - mapping->order)) - 1; | ||
1028 | |||
1029 | spin_lock_irqsave(&mapping->lock, flags); | ||
1030 | start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, | ||
1031 | count, align); | ||
1032 | if (start > mapping->bits) { | ||
1033 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
1034 | return DMA_ERROR_CODE; | ||
1035 | } | ||
1036 | |||
1037 | bitmap_set(mapping->bitmap, start, count); | ||
1038 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
1039 | |||
1040 | return mapping->base + (start << (mapping->order + PAGE_SHIFT)); | ||
1041 | } | ||
1042 | |||
1043 | static inline void __free_iova(struct dma_iommu_mapping *mapping, | ||
1044 | dma_addr_t addr, size_t size) | ||
1045 | { | ||
1046 | unsigned int start = (addr - mapping->base) >> | ||
1047 | (mapping->order + PAGE_SHIFT); | ||
1048 | unsigned int count = ((size >> PAGE_SHIFT) + | ||
1049 | (1 << mapping->order) - 1) >> mapping->order; | ||
1050 | unsigned long flags; | ||
1051 | |||
1052 | spin_lock_irqsave(&mapping->lock, flags); | ||
1053 | bitmap_clear(mapping->bitmap, start, count); | ||
1054 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
1055 | } | ||
1056 | |||
1057 | static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) | ||
1058 | { | ||
1059 | struct page **pages; | ||
1060 | int count = size >> PAGE_SHIFT; | ||
1061 | int array_size = count * sizeof(struct page *); | ||
1062 | int i = 0; | ||
1063 | |||
1064 | if (array_size <= PAGE_SIZE) | ||
1065 | pages = kzalloc(array_size, gfp); | ||
1066 | else | ||
1067 | pages = vzalloc(array_size); | ||
1068 | if (!pages) | ||
1069 | return NULL; | ||
1070 | |||
1071 | while (count) { | ||
1072 | int j, order = __ffs(count); | ||
1073 | |||
1074 | pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); | ||
1075 | while (!pages[i] && order) | ||
1076 | pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); | ||
1077 | if (!pages[i]) | ||
1078 | goto error; | ||
1079 | |||
1080 | if (order) | ||
1081 | split_page(pages[i], order); | ||
1082 | j = 1 << order; | ||
1083 | while (--j) | ||
1084 | pages[i + j] = pages[i] + j; | ||
1085 | |||
1086 | __dma_clear_buffer(pages[i], PAGE_SIZE << order); | ||
1087 | i += 1 << order; | ||
1088 | count -= 1 << order; | ||
1089 | } | ||
1090 | |||
1091 | return pages; | ||
1092 | error: | ||
1093 | while (--i) | ||
1094 | if (pages[i]) | ||
1095 | __free_pages(pages[i], 0); | ||
1096 | if (array_size < PAGE_SIZE) | ||
1097 | kfree(pages); | ||
1098 | else | ||
1099 | vfree(pages); | ||
1100 | return NULL; | ||
1101 | } | ||
1102 | |||
1103 | static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) | ||
1104 | { | ||
1105 | int count = size >> PAGE_SHIFT; | ||
1106 | int array_size = count * sizeof(struct page *); | ||
1107 | int i; | ||
1108 | for (i = 0; i < count; i++) | ||
1109 | if (pages[i]) | ||
1110 | __free_pages(pages[i], 0); | ||
1111 | if (array_size < PAGE_SIZE) | ||
1112 | kfree(pages); | ||
1113 | else | ||
1114 | vfree(pages); | ||
1115 | return 0; | ||
1116 | } | ||
1117 | |||
1118 | /* | ||
1119 | * Create a CPU mapping for a specified pages | ||
1120 | */ | ||
1121 | static void * | ||
1122 | __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) | ||
1123 | { | ||
1124 | struct arm_vmregion *c; | ||
1125 | size_t align; | ||
1126 | size_t count = size >> PAGE_SHIFT; | ||
1127 | int bit; | ||
1128 | |||
1129 | if (!consistent_pte[0]) { | ||
1130 | pr_err("%s: not initialised\n", __func__); | ||
1131 | dump_stack(); | ||
1132 | return NULL; | ||
1133 | } | ||
1134 | |||
1135 | /* | ||
1136 | * Align the virtual region allocation - maximum alignment is | ||
1137 | * a section size, minimum is a page size. This helps reduce | ||
1138 | * fragmentation of the DMA space, and also prevents allocations | ||
1139 | * smaller than a section from crossing a section boundary. | ||
1140 | */ | ||
1141 | bit = fls(size - 1); | ||
1142 | if (bit > SECTION_SHIFT) | ||
1143 | bit = SECTION_SHIFT; | ||
1144 | align = 1 << bit; | ||
1145 | |||
1146 | /* | ||
1147 | * Allocate a virtual address in the consistent mapping region. | ||
1148 | */ | ||
1149 | c = arm_vmregion_alloc(&consistent_head, align, size, | ||
1150 | gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); | ||
1151 | if (c) { | ||
1152 | pte_t *pte; | ||
1153 | int idx = CONSISTENT_PTE_INDEX(c->vm_start); | ||
1154 | int i = 0; | ||
1155 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); | ||
1156 | |||
1157 | pte = consistent_pte[idx] + off; | ||
1158 | c->priv = pages; | ||
1159 | |||
1160 | do { | ||
1161 | BUG_ON(!pte_none(*pte)); | ||
1162 | |||
1163 | set_pte_ext(pte, mk_pte(pages[i], prot), 0); | ||
1164 | pte++; | ||
1165 | off++; | ||
1166 | i++; | ||
1167 | if (off >= PTRS_PER_PTE) { | ||
1168 | off = 0; | ||
1169 | pte = consistent_pte[++idx]; | ||
1170 | } | ||
1171 | } while (i < count); | ||
1172 | |||
1173 | dsb(); | ||
1174 | |||
1175 | return (void *)c->vm_start; | ||
1176 | } | ||
1177 | return NULL; | ||
1178 | } | ||
1179 | |||
1180 | /* | ||
1181 | * Create a mapping in device IO address space for specified pages | ||
1182 | */ | ||
1183 | static dma_addr_t | ||
1184 | __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) | ||
1185 | { | ||
1186 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1187 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
1188 | dma_addr_t dma_addr, iova; | ||
1189 | int i, ret = DMA_ERROR_CODE; | ||
1190 | |||
1191 | dma_addr = __alloc_iova(mapping, size); | ||
1192 | if (dma_addr == DMA_ERROR_CODE) | ||
1193 | return dma_addr; | ||
1194 | |||
1195 | iova = dma_addr; | ||
1196 | for (i = 0; i < count; ) { | ||
1197 | unsigned int next_pfn = page_to_pfn(pages[i]) + 1; | ||
1198 | phys_addr_t phys = page_to_phys(pages[i]); | ||
1199 | unsigned int len, j; | ||
1200 | |||
1201 | for (j = i + 1; j < count; j++, next_pfn++) | ||
1202 | if (page_to_pfn(pages[j]) != next_pfn) | ||
1203 | break; | ||
1204 | |||
1205 | len = (j - i) << PAGE_SHIFT; | ||
1206 | ret = iommu_map(mapping->domain, iova, phys, len, 0); | ||
1207 | if (ret < 0) | ||
1208 | goto fail; | ||
1209 | iova += len; | ||
1210 | i = j; | ||
1211 | } | ||
1212 | return dma_addr; | ||
1213 | fail: | ||
1214 | iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); | ||
1215 | __free_iova(mapping, dma_addr, size); | ||
1216 | return DMA_ERROR_CODE; | ||
1217 | } | ||
1218 | |||
1219 | static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) | ||
1220 | { | ||
1221 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1222 | |||
1223 | /* | ||
1224 | * add optional in-page offset from iova to size and align | ||
1225 | * result to page size | ||
1226 | */ | ||
1227 | size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); | ||
1228 | iova &= PAGE_MASK; | ||
1229 | |||
1230 | iommu_unmap(mapping->domain, iova, size); | ||
1231 | __free_iova(mapping, iova, size); | ||
1232 | return 0; | ||
1233 | } | ||
1234 | |||
1235 | static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, | ||
1236 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) | ||
1237 | { | ||
1238 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | ||
1239 | struct page **pages; | ||
1240 | void *addr = NULL; | ||
1241 | |||
1242 | *handle = DMA_ERROR_CODE; | ||
1243 | size = PAGE_ALIGN(size); | ||
1244 | |||
1245 | pages = __iommu_alloc_buffer(dev, size, gfp); | ||
1246 | if (!pages) | ||
1247 | return NULL; | ||
1248 | |||
1249 | *handle = __iommu_create_mapping(dev, pages, size); | ||
1250 | if (*handle == DMA_ERROR_CODE) | ||
1251 | goto err_buffer; | ||
1252 | |||
1253 | addr = __iommu_alloc_remap(pages, size, gfp, prot); | ||
1254 | if (!addr) | ||
1255 | goto err_mapping; | ||
1256 | |||
1257 | return addr; | ||
1258 | |||
1259 | err_mapping: | ||
1260 | __iommu_remove_mapping(dev, *handle, size); | ||
1261 | err_buffer: | ||
1262 | __iommu_free_buffer(dev, pages, size); | ||
1263 | return NULL; | ||
1264 | } | ||
1265 | |||
1266 | static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, | ||
1267 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | ||
1268 | struct dma_attrs *attrs) | ||
1269 | { | ||
1270 | struct arm_vmregion *c; | ||
1271 | |||
1272 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); | ||
1273 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | ||
1274 | |||
1275 | if (c) { | ||
1276 | struct page **pages = c->priv; | ||
1277 | |||
1278 | unsigned long uaddr = vma->vm_start; | ||
1279 | unsigned long usize = vma->vm_end - vma->vm_start; | ||
1280 | int i = 0; | ||
1281 | |||
1282 | do { | ||
1283 | int ret; | ||
1284 | |||
1285 | ret = vm_insert_page(vma, uaddr, pages[i++]); | ||
1286 | if (ret) { | ||
1287 | pr_err("Remapping memory, error: %d\n", ret); | ||
1288 | return ret; | ||
1289 | } | ||
1290 | |||
1291 | uaddr += PAGE_SIZE; | ||
1292 | usize -= PAGE_SIZE; | ||
1293 | } while (usize > 0); | ||
1294 | } | ||
1295 | return 0; | ||
1296 | } | ||
1297 | |||
1298 | /* | ||
1299 | * free a page as defined by the above mapping. | ||
1300 | * Must not be called with IRQs disabled. | ||
1301 | */ | ||
1302 | void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | ||
1303 | dma_addr_t handle, struct dma_attrs *attrs) | ||
1304 | { | ||
1305 | struct arm_vmregion *c; | ||
1306 | size = PAGE_ALIGN(size); | ||
1307 | |||
1308 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | ||
1309 | if (c) { | ||
1310 | struct page **pages = c->priv; | ||
1311 | __dma_free_remap(cpu_addr, size); | ||
1312 | __iommu_remove_mapping(dev, handle, size); | ||
1313 | __iommu_free_buffer(dev, pages, size); | ||
1314 | } | ||
1315 | } | ||
1316 | |||
1317 | /* | ||
1318 | * Map a part of the scatter-gather list into contiguous io address space | ||
1319 | */ | ||
1320 | static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, | ||
1321 | size_t size, dma_addr_t *handle, | ||
1322 | enum dma_data_direction dir) | ||
1323 | { | ||
1324 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1325 | dma_addr_t iova, iova_base; | ||
1326 | int ret = 0; | ||
1327 | unsigned int count; | ||
1328 | struct scatterlist *s; | ||
1329 | |||
1330 | size = PAGE_ALIGN(size); | ||
1331 | *handle = DMA_ERROR_CODE; | ||
1332 | |||
1333 | iova_base = iova = __alloc_iova(mapping, size); | ||
1334 | if (iova == DMA_ERROR_CODE) | ||
1335 | return -ENOMEM; | ||
1336 | |||
1337 | for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { | ||
1338 | phys_addr_t phys = page_to_phys(sg_page(s)); | ||
1339 | unsigned int len = PAGE_ALIGN(s->offset + s->length); | ||
1340 | |||
1341 | if (!arch_is_coherent()) | ||
1342 | __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); | ||
1343 | |||
1344 | ret = iommu_map(mapping->domain, iova, phys, len, 0); | ||
1345 | if (ret < 0) | ||
1346 | goto fail; | ||
1347 | count += len >> PAGE_SHIFT; | ||
1348 | iova += len; | ||
1349 | } | ||
1350 | *handle = iova_base; | ||
1351 | |||
1352 | return 0; | ||
1353 | fail: | ||
1354 | iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); | ||
1355 | __free_iova(mapping, iova_base, size); | ||
1356 | return ret; | ||
1357 | } | ||
1358 | |||
1359 | /** | ||
1360 | * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA | ||
1361 | * @dev: valid struct device pointer | ||
1362 | * @sg: list of buffers | ||
1363 | * @nents: number of buffers to map | ||
1364 | * @dir: DMA transfer direction | ||
1365 | * | ||
1366 | * Map a set of buffers described by scatterlist in streaming mode for DMA. | ||
1367 | * The scatter gather list elements are merged together (if possible) and | ||
1368 | * tagged with the appropriate dma address and length. They are obtained via | ||
1369 | * sg_dma_{address,length}. | ||
1370 | */ | ||
1371 | int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
1372 | enum dma_data_direction dir, struct dma_attrs *attrs) | ||
1373 | { | ||
1374 | struct scatterlist *s = sg, *dma = sg, *start = sg; | ||
1375 | int i, count = 0; | ||
1376 | unsigned int offset = s->offset; | ||
1377 | unsigned int size = s->offset + s->length; | ||
1378 | unsigned int max = dma_get_max_seg_size(dev); | ||
1379 | |||
1380 | for (i = 1; i < nents; i++) { | ||
1381 | s = sg_next(s); | ||
1382 | |||
1383 | s->dma_address = DMA_ERROR_CODE; | ||
1384 | s->dma_length = 0; | ||
1385 | |||
1386 | if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { | ||
1387 | if (__map_sg_chunk(dev, start, size, &dma->dma_address, | ||
1388 | dir) < 0) | ||
1389 | goto bad_mapping; | ||
1390 | |||
1391 | dma->dma_address += offset; | ||
1392 | dma->dma_length = size - offset; | ||
1393 | |||
1394 | size = offset = s->offset; | ||
1395 | start = s; | ||
1396 | dma = sg_next(dma); | ||
1397 | count += 1; | ||
1398 | } | ||
1399 | size += s->length; | ||
1400 | } | ||
1401 | if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) | ||
1402 | goto bad_mapping; | ||
1403 | |||
1404 | dma->dma_address += offset; | ||
1405 | dma->dma_length = size - offset; | ||
1406 | |||
1407 | return count+1; | ||
1408 | |||
1409 | bad_mapping: | ||
1410 | for_each_sg(sg, s, count, i) | ||
1411 | __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); | ||
1412 | return 0; | ||
1413 | } | ||
1414 | |||
1415 | /** | ||
1416 | * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg | ||
1417 | * @dev: valid struct device pointer | ||
1418 | * @sg: list of buffers | ||
1419 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) | ||
1420 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
1421 | * | ||
1422 | * Unmap a set of streaming mode DMA translations. Again, CPU access | ||
1423 | * rules concerning calls here are the same as for dma_unmap_single(). | ||
1424 | */ | ||
1425 | void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
1426 | enum dma_data_direction dir, struct dma_attrs *attrs) | ||
1427 | { | ||
1428 | struct scatterlist *s; | ||
1429 | int i; | ||
1430 | |||
1431 | for_each_sg(sg, s, nents, i) { | ||
1432 | if (sg_dma_len(s)) | ||
1433 | __iommu_remove_mapping(dev, sg_dma_address(s), | ||
1434 | sg_dma_len(s)); | ||
1435 | if (!arch_is_coherent()) | ||
1436 | __dma_page_dev_to_cpu(sg_page(s), s->offset, | ||
1437 | s->length, dir); | ||
1438 | } | ||
1439 | } | ||
1440 | |||
1441 | /** | ||
1442 | * arm_iommu_sync_sg_for_cpu | ||
1443 | * @dev: valid struct device pointer | ||
1444 | * @sg: list of buffers | ||
1445 | * @nents: number of buffers to map (returned from dma_map_sg) | ||
1446 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
1447 | */ | ||
1448 | void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | ||
1449 | int nents, enum dma_data_direction dir) | ||
1450 | { | ||
1451 | struct scatterlist *s; | ||
1452 | int i; | ||
1453 | |||
1454 | for_each_sg(sg, s, nents, i) | ||
1455 | if (!arch_is_coherent()) | ||
1456 | __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); | ||
1457 | |||
1458 | } | ||
1459 | |||
1460 | /** | ||
1461 | * arm_iommu_sync_sg_for_device | ||
1462 | * @dev: valid struct device pointer | ||
1463 | * @sg: list of buffers | ||
1464 | * @nents: number of buffers to map (returned from dma_map_sg) | ||
1465 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
1466 | */ | ||
1467 | void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | ||
1468 | int nents, enum dma_data_direction dir) | ||
1469 | { | ||
1470 | struct scatterlist *s; | ||
1471 | int i; | ||
1472 | |||
1473 | for_each_sg(sg, s, nents, i) | ||
1474 | if (!arch_is_coherent()) | ||
1475 | __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); | ||
1476 | } | ||
1477 | |||
1478 | |||
1479 | /** | ||
1480 | * arm_iommu_map_page | ||
1481 | * @dev: valid struct device pointer | ||
1482 | * @page: page that buffer resides in | ||
1483 | * @offset: offset into page for start of buffer | ||
1484 | * @size: size of buffer to map | ||
1485 | * @dir: DMA transfer direction | ||
1486 | * | ||
1487 | * IOMMU aware version of arm_dma_map_page() | ||
1488 | */ | ||
1489 | static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, | ||
1490 | unsigned long offset, size_t size, enum dma_data_direction dir, | ||
1491 | struct dma_attrs *attrs) | ||
1492 | { | ||
1493 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1494 | dma_addr_t dma_addr; | ||
1495 | int ret, len = PAGE_ALIGN(size + offset); | ||
1496 | |||
1497 | if (!arch_is_coherent()) | ||
1498 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
1499 | |||
1500 | dma_addr = __alloc_iova(mapping, len); | ||
1501 | if (dma_addr == DMA_ERROR_CODE) | ||
1502 | return dma_addr; | ||
1503 | |||
1504 | ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); | ||
1505 | if (ret < 0) | ||
1506 | goto fail; | ||
1507 | |||
1508 | return dma_addr + offset; | ||
1509 | fail: | ||
1510 | __free_iova(mapping, dma_addr, len); | ||
1511 | return DMA_ERROR_CODE; | ||
1512 | } | ||
1513 | |||
1514 | /** | ||
1515 | * arm_iommu_unmap_page | ||
1516 | * @dev: valid struct device pointer | ||
1517 | * @handle: DMA address of buffer | ||
1518 | * @size: size of buffer (same as passed to dma_map_page) | ||
1519 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
1520 | * | ||
1521 | * IOMMU aware version of arm_dma_unmap_page() | ||
1522 | */ | ||
1523 | static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, | ||
1524 | size_t size, enum dma_data_direction dir, | ||
1525 | struct dma_attrs *attrs) | ||
1526 | { | ||
1527 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1528 | dma_addr_t iova = handle & PAGE_MASK; | ||
1529 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
1530 | int offset = handle & ~PAGE_MASK; | ||
1531 | int len = PAGE_ALIGN(size + offset); | ||
1532 | |||
1533 | if (!iova) | ||
1534 | return; | ||
1535 | |||
1536 | if (!arch_is_coherent()) | ||
1537 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
1538 | |||
1539 | iommu_unmap(mapping->domain, iova, len); | ||
1540 | __free_iova(mapping, iova, len); | ||
1541 | } | ||
1542 | |||
1543 | static void arm_iommu_sync_single_for_cpu(struct device *dev, | ||
1544 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
1545 | { | ||
1546 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1547 | dma_addr_t iova = handle & PAGE_MASK; | ||
1548 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
1549 | unsigned int offset = handle & ~PAGE_MASK; | ||
1550 | |||
1551 | if (!iova) | ||
1552 | return; | ||
1553 | |||
1554 | if (!arch_is_coherent()) | ||
1555 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
1556 | } | ||
1557 | |||
1558 | static void arm_iommu_sync_single_for_device(struct device *dev, | ||
1559 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
1560 | { | ||
1561 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
1562 | dma_addr_t iova = handle & PAGE_MASK; | ||
1563 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
1564 | unsigned int offset = handle & ~PAGE_MASK; | ||
1565 | |||
1566 | if (!iova) | ||
1567 | return; | ||
1568 | |||
1569 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
1570 | } | ||
1571 | |||
1572 | struct dma_map_ops iommu_ops = { | ||
1573 | .alloc = arm_iommu_alloc_attrs, | ||
1574 | .free = arm_iommu_free_attrs, | ||
1575 | .mmap = arm_iommu_mmap_attrs, | ||
1576 | |||
1577 | .map_page = arm_iommu_map_page, | ||
1578 | .unmap_page = arm_iommu_unmap_page, | ||
1579 | .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, | ||
1580 | .sync_single_for_device = arm_iommu_sync_single_for_device, | ||
1581 | |||
1582 | .map_sg = arm_iommu_map_sg, | ||
1583 | .unmap_sg = arm_iommu_unmap_sg, | ||
1584 | .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, | ||
1585 | .sync_sg_for_device = arm_iommu_sync_sg_for_device, | ||
1586 | }; | ||
1587 | |||
1588 | /** | ||
1589 | * arm_iommu_create_mapping | ||
1590 | * @bus: pointer to the bus holding the client device (for IOMMU calls) | ||
1591 | * @base: start address of the valid IO address space | ||
1592 | * @size: size of the valid IO address space | ||
1593 | * @order: accuracy of the IO addresses allocations | ||
1594 | * | ||
1595 | * Creates a mapping structure which holds information about used/unused | ||
1596 | * IO address ranges, which is required to perform memory allocation and | ||
1597 | * mapping with IOMMU aware functions. | ||
1598 | * | ||
1599 | * The client device need to be attached to the mapping with | ||
1600 | * arm_iommu_attach_device function. | ||
1601 | */ | ||
1602 | struct dma_iommu_mapping * | ||
1603 | arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, | ||
1604 | int order) | ||
1605 | { | ||
1606 | unsigned int count = size >> (PAGE_SHIFT + order); | ||
1607 | unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); | ||
1608 | struct dma_iommu_mapping *mapping; | ||
1609 | int err = -ENOMEM; | ||
1610 | |||
1611 | if (!count) | ||
1612 | return ERR_PTR(-EINVAL); | ||
1613 | |||
1614 | mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); | ||
1615 | if (!mapping) | ||
1616 | goto err; | ||
1617 | |||
1618 | mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
1619 | if (!mapping->bitmap) | ||
1620 | goto err2; | ||
1621 | |||
1622 | mapping->base = base; | ||
1623 | mapping->bits = BITS_PER_BYTE * bitmap_size; | ||
1624 | mapping->order = order; | ||
1625 | spin_lock_init(&mapping->lock); | ||
1626 | |||
1627 | mapping->domain = iommu_domain_alloc(bus); | ||
1628 | if (!mapping->domain) | ||
1629 | goto err3; | ||
1630 | |||
1631 | kref_init(&mapping->kref); | ||
1632 | return mapping; | ||
1633 | err3: | ||
1634 | kfree(mapping->bitmap); | ||
1635 | err2: | ||
1636 | kfree(mapping); | ||
1637 | err: | ||
1638 | return ERR_PTR(err); | ||
1639 | } | ||
1640 | |||
1641 | static void release_iommu_mapping(struct kref *kref) | ||
1642 | { | ||
1643 | struct dma_iommu_mapping *mapping = | ||
1644 | container_of(kref, struct dma_iommu_mapping, kref); | ||
1645 | |||
1646 | iommu_domain_free(mapping->domain); | ||
1647 | kfree(mapping->bitmap); | ||
1648 | kfree(mapping); | ||
1649 | } | ||
1650 | |||
1651 | void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) | ||
1652 | { | ||
1653 | if (mapping) | ||
1654 | kref_put(&mapping->kref, release_iommu_mapping); | ||
1655 | } | ||
1656 | |||
1657 | /** | ||
1658 | * arm_iommu_attach_device | ||
1659 | * @dev: valid struct device pointer | ||
1660 | * @mapping: io address space mapping structure (returned from | ||
1661 | * arm_iommu_create_mapping) | ||
1662 | * | ||
1663 | * Attaches specified io address space mapping to the provided device, | ||
1664 | * this replaces the dma operations (dma_map_ops pointer) with the | ||
1665 | * IOMMU aware version. More than one client might be attached to | ||
1666 | * the same io address space mapping. | ||
1667 | */ | ||
1668 | int arm_iommu_attach_device(struct device *dev, | ||
1669 | struct dma_iommu_mapping *mapping) | ||
1670 | { | ||
1671 | int err; | ||
1672 | |||
1673 | err = iommu_attach_device(mapping->domain, dev); | ||
1674 | if (err) | ||
1675 | return err; | ||
1676 | |||
1677 | kref_get(&mapping->kref); | ||
1678 | dev->archdata.mapping = mapping; | ||
1679 | set_dma_ops(dev, &iommu_ops); | ||
1680 | |||
1681 | pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); | ||
1682 | return 0; | ||
1683 | } | ||
1684 | |||
1685 | #endif | ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8f5813bbffb5..c21d06c7dd7e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/highmem.h> | 20 | #include <linux/highmem.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/memblock.h> | 22 | #include <linux/memblock.h> |
23 | #include <linux/dma-contiguous.h> | ||
23 | 24 | ||
24 | #include <asm/mach-types.h> | 25 | #include <asm/mach-types.h> |
25 | #include <asm/memblock.h> | 26 | #include <asm/memblock.h> |
@@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, | |||
226 | } | 227 | } |
227 | #endif | 228 | #endif |
228 | 229 | ||
230 | void __init setup_dma_zone(struct machine_desc *mdesc) | ||
231 | { | ||
232 | #ifdef CONFIG_ZONE_DMA | ||
233 | if (mdesc->dma_zone_size) { | ||
234 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
235 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
236 | } else | ||
237 | arm_dma_limit = 0xffffffff; | ||
238 | #endif | ||
239 | } | ||
240 | |||
229 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | 241 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, |
230 | unsigned long max_high) | 242 | unsigned long max_high) |
231 | { | 243 | { |
@@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | |||
273 | * Adjust the sizes according to any special requirements for | 285 | * Adjust the sizes according to any special requirements for |
274 | * this machine type. | 286 | * this machine type. |
275 | */ | 287 | */ |
276 | if (arm_dma_zone_size) { | 288 | if (arm_dma_zone_size) |
277 | arm_adjust_dma_zone(zone_size, zhole_size, | 289 | arm_adjust_dma_zone(zone_size, zhole_size, |
278 | arm_dma_zone_size >> PAGE_SHIFT); | 290 | arm_dma_zone_size >> PAGE_SHIFT); |
279 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
280 | } else | ||
281 | arm_dma_limit = 0xffffffff; | ||
282 | #endif | 291 | #endif |
283 | 292 | ||
284 | free_area_init_node(0, zone_size, min, zhole_size); | 293 | free_area_init_node(0, zone_size, min, zhole_size); |
@@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) | |||
364 | if (mdesc->reserve) | 373 | if (mdesc->reserve) |
365 | mdesc->reserve(); | 374 | mdesc->reserve(); |
366 | 375 | ||
376 | /* | ||
377 | * reserve memory for DMA contigouos allocations, | ||
378 | * must come from DMA area inside low memory | ||
379 | */ | ||
380 | dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); | ||
381 | |||
367 | arm_memblock_steal_permitted = false; | 382 | arm_memblock_steal_permitted = false; |
368 | memblock_allow_resize(); | 383 | memblock_allow_resize(); |
369 | memblock_dump_all(); | 384 | memblock_dump_all(); |
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 27f4a619b35d..93dc0c17cdcb 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h | |||
@@ -67,5 +67,8 @@ extern u32 arm_dma_limit; | |||
67 | #define arm_dma_limit ((u32)~0) | 67 | #define arm_dma_limit ((u32)~0) |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | extern phys_addr_t arm_lowmem_limit; | ||
71 | |||
70 | void __init bootmem_init(void); | 72 | void __init bootmem_init(void); |
71 | void arm_mm_memblock_reserve(void); | 73 | void arm_mm_memblock_reserve(void); |
74 | void dma_contiguous_remap(void); | ||
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index aa78de8bfdd3..e5dad60b558b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c | |||
@@ -288,6 +288,11 @@ static struct mem_type mem_types[] = { | |||
288 | PMD_SECT_UNCACHED | PMD_SECT_XN, | 288 | PMD_SECT_UNCACHED | PMD_SECT_XN, |
289 | .domain = DOMAIN_KERNEL, | 289 | .domain = DOMAIN_KERNEL, |
290 | }, | 290 | }, |
291 | [MT_MEMORY_DMA_READY] = { | ||
292 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, | ||
293 | .prot_l1 = PMD_TYPE_TABLE, | ||
294 | .domain = DOMAIN_KERNEL, | ||
295 | }, | ||
291 | }; | 296 | }; |
292 | 297 | ||
293 | const struct mem_type *get_mem_type(unsigned int type) | 298 | const struct mem_type *get_mem_type(unsigned int type) |
@@ -429,6 +434,7 @@ static void __init build_mem_type_table(void) | |||
429 | if (arch_is_coherent() && cpu_is_xsc3()) { | 434 | if (arch_is_coherent() && cpu_is_xsc3()) { |
430 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 435 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
431 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 436 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
437 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
432 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 438 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
433 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 439 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
434 | } | 440 | } |
@@ -460,6 +466,7 @@ static void __init build_mem_type_table(void) | |||
460 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; | 466 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; |
461 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 467 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
462 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 468 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
469 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
463 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 470 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
464 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 471 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
465 | } | 472 | } |
@@ -512,6 +519,7 @@ static void __init build_mem_type_table(void) | |||
512 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; | 519 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; |
513 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; | 520 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; |
514 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; | 521 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; |
522 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; | ||
515 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; | 523 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; |
516 | mem_types[MT_ROM].prot_sect |= cp->pmd; | 524 | mem_types[MT_ROM].prot_sect |= cp->pmd; |
517 | 525 | ||
@@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, | |||
596 | * L1 entries, whereas PGDs refer to a group of L1 entries making | 604 | * L1 entries, whereas PGDs refer to a group of L1 entries making |
597 | * up one logical pointer to an L2 table. | 605 | * up one logical pointer to an L2 table. |
598 | */ | 606 | */ |
599 | if (((addr | end | phys) & ~SECTION_MASK) == 0) { | 607 | if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { |
600 | pmd_t *p = pmd; | 608 | pmd_t *p = pmd; |
601 | 609 | ||
602 | #ifndef CONFIG_ARM_LPAE | 610 | #ifndef CONFIG_ARM_LPAE |
@@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg) | |||
814 | } | 822 | } |
815 | early_param("vmalloc", early_vmalloc); | 823 | early_param("vmalloc", early_vmalloc); |
816 | 824 | ||
817 | static phys_addr_t lowmem_limit __initdata = 0; | 825 | phys_addr_t arm_lowmem_limit __initdata = 0; |
818 | 826 | ||
819 | void __init sanity_check_meminfo(void) | 827 | void __init sanity_check_meminfo(void) |
820 | { | 828 | { |
@@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void) | |||
897 | bank->size = newsize; | 905 | bank->size = newsize; |
898 | } | 906 | } |
899 | #endif | 907 | #endif |
900 | if (!bank->highmem && bank->start + bank->size > lowmem_limit) | 908 | if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) |
901 | lowmem_limit = bank->start + bank->size; | 909 | arm_lowmem_limit = bank->start + bank->size; |
902 | 910 | ||
903 | j++; | 911 | j++; |
904 | } | 912 | } |
@@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void) | |||
923 | } | 931 | } |
924 | #endif | 932 | #endif |
925 | meminfo.nr_banks = j; | 933 | meminfo.nr_banks = j; |
926 | high_memory = __va(lowmem_limit - 1) + 1; | 934 | high_memory = __va(arm_lowmem_limit - 1) + 1; |
927 | memblock_set_current_limit(lowmem_limit); | 935 | memblock_set_current_limit(arm_lowmem_limit); |
928 | } | 936 | } |
929 | 937 | ||
930 | static inline void prepare_page_table(void) | 938 | static inline void prepare_page_table(void) |
@@ -949,8 +957,8 @@ static inline void prepare_page_table(void) | |||
949 | * Find the end of the first block of lowmem. | 957 | * Find the end of the first block of lowmem. |
950 | */ | 958 | */ |
951 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; | 959 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; |
952 | if (end >= lowmem_limit) | 960 | if (end >= arm_lowmem_limit) |
953 | end = lowmem_limit; | 961 | end = arm_lowmem_limit; |
954 | 962 | ||
955 | /* | 963 | /* |
956 | * Clear out all the kernel space mappings, except for the first | 964 | * Clear out all the kernel space mappings, except for the first |
@@ -1093,8 +1101,8 @@ static void __init map_lowmem(void) | |||
1093 | phys_addr_t end = start + reg->size; | 1101 | phys_addr_t end = start + reg->size; |
1094 | struct map_desc map; | 1102 | struct map_desc map; |
1095 | 1103 | ||
1096 | if (end > lowmem_limit) | 1104 | if (end > arm_lowmem_limit) |
1097 | end = lowmem_limit; | 1105 | end = arm_lowmem_limit; |
1098 | if (start >= end) | 1106 | if (start >= end) |
1099 | break; | 1107 | break; |
1100 | 1108 | ||
@@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc) | |||
1115 | { | 1123 | { |
1116 | void *zero_page; | 1124 | void *zero_page; |
1117 | 1125 | ||
1118 | memblock_set_current_limit(lowmem_limit); | 1126 | memblock_set_current_limit(arm_lowmem_limit); |
1119 | 1127 | ||
1120 | build_mem_type_table(); | 1128 | build_mem_type_table(); |
1121 | prepare_page_table(); | 1129 | prepare_page_table(); |
1122 | map_lowmem(); | 1130 | map_lowmem(); |
1131 | dma_contiguous_remap(); | ||
1123 | devicemaps_init(mdesc); | 1132 | devicemaps_init(mdesc); |
1124 | kmap_init(); | 1133 | kmap_init(); |
1125 | 1134 | ||
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index 162be662c088..bf312c354a21 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h | |||
@@ -17,7 +17,7 @@ struct arm_vmregion { | |||
17 | struct list_head vm_list; | 17 | struct list_head vm_list; |
18 | unsigned long vm_start; | 18 | unsigned long vm_start; |
19 | unsigned long vm_end; | 19 | unsigned long vm_end; |
20 | struct page *vm_pages; | 20 | void *priv; |
21 | int vm_active; | 21 | int vm_active; |
22 | const void *caller; | 22 | const void *caller; |
23 | }; | 23 | }; |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 66cc380bebf0..81c3e8be789a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -32,6 +32,7 @@ config X86 | |||
32 | select ARCH_WANT_OPTIONAL_GPIOLIB | 32 | select ARCH_WANT_OPTIONAL_GPIOLIB |
33 | select ARCH_WANT_FRAME_POINTERS | 33 | select ARCH_WANT_FRAME_POINTERS |
34 | select HAVE_DMA_ATTRS | 34 | select HAVE_DMA_ATTRS |
35 | select HAVE_DMA_CONTIGUOUS if !SWIOTLB | ||
35 | select HAVE_KRETPROBES | 36 | select HAVE_KRETPROBES |
36 | select HAVE_OPTPROBES | 37 | select HAVE_OPTPROBES |
37 | select HAVE_FTRACE_MCOUNT_RECORD | 38 | select HAVE_FTRACE_MCOUNT_RECORD |
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..c09241659971 --- /dev/null +++ b/arch/x86/include/asm/dma-contiguous.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef ASMX86_DMA_CONTIGUOUS_H | ||
2 | #define ASMX86_DMA_CONTIGUOUS_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | #include <asm-generic/dma-contiguous.h> | ||
8 | |||
9 | static inline void | ||
10 | dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } | ||
11 | |||
12 | #endif | ||
13 | #endif | ||
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 61c0bd25845a..f7b4c7903e7e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
14 | #include <asm/swiotlb.h> | 14 | #include <asm/swiotlb.h> |
15 | #include <asm-generic/dma-coherent.h> | 15 | #include <asm-generic/dma-coherent.h> |
16 | #include <linux/dma-contiguous.h> | ||
16 | 17 | ||
17 | #ifdef CONFIG_ISA | 18 | #ifdef CONFIG_ISA |
18 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) | 19 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) |
@@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
62 | dma_addr_t *dma_addr, gfp_t flag, | 63 | dma_addr_t *dma_addr, gfp_t flag, |
63 | struct dma_attrs *attrs); | 64 | struct dma_attrs *attrs); |
64 | 65 | ||
66 | extern void dma_generic_free_coherent(struct device *dev, size_t size, | ||
67 | void *vaddr, dma_addr_t dma_addr, | ||
68 | struct dma_attrs *attrs); | ||
69 | |||
65 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ | 70 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ |
66 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); | 71 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); |
67 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); | 72 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3003250ac51d..62c9457ccd2f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -100,14 +100,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
100 | struct dma_attrs *attrs) | 100 | struct dma_attrs *attrs) |
101 | { | 101 | { |
102 | unsigned long dma_mask; | 102 | unsigned long dma_mask; |
103 | struct page *page; | 103 | struct page *page = NULL; |
104 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
104 | dma_addr_t addr; | 105 | dma_addr_t addr; |
105 | 106 | ||
106 | dma_mask = dma_alloc_coherent_mask(dev, flag); | 107 | dma_mask = dma_alloc_coherent_mask(dev, flag); |
107 | 108 | ||
108 | flag |= __GFP_ZERO; | 109 | flag |= __GFP_ZERO; |
109 | again: | 110 | again: |
110 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | 111 | if (!(flag & GFP_ATOMIC)) |
112 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); | ||
113 | if (!page) | ||
114 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
111 | if (!page) | 115 | if (!page) |
112 | return NULL; | 116 | return NULL; |
113 | 117 | ||
@@ -127,6 +131,16 @@ again: | |||
127 | return page_address(page); | 131 | return page_address(page); |
128 | } | 132 | } |
129 | 133 | ||
134 | void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
135 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
136 | { | ||
137 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
138 | struct page *page = virt_to_page(vaddr); | ||
139 | |||
140 | if (!dma_release_from_contiguous(dev, page, count)) | ||
141 | free_pages((unsigned long)vaddr, get_order(size)); | ||
142 | } | ||
143 | |||
130 | /* | 144 | /* |
131 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel | 145 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel |
132 | * parameter documentation. | 146 | * parameter documentation. |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index f96050685b46..871be4a84c7d 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
74 | return nents; | 74 | return nents; |
75 | } | 75 | } |
76 | 76 | ||
77 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
78 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
79 | { | ||
80 | free_pages((unsigned long)vaddr, get_order(size)); | ||
81 | } | ||
82 | |||
83 | static void nommu_sync_single_for_device(struct device *dev, | 77 | static void nommu_sync_single_for_device(struct device *dev, |
84 | dma_addr_t addr, size_t size, | 78 | dma_addr_t addr, size_t size, |
85 | enum dma_data_direction dir) | 79 | enum dma_data_direction dir) |
@@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev, | |||
97 | 91 | ||
98 | struct dma_map_ops nommu_dma_ops = { | 92 | struct dma_map_ops nommu_dma_ops = { |
99 | .alloc = dma_generic_alloc_coherent, | 93 | .alloc = dma_generic_alloc_coherent, |
100 | .free = nommu_free_coherent, | 94 | .free = dma_generic_free_coherent, |
101 | .map_sg = nommu_map_sg, | 95 | .map_sg = nommu_map_sg, |
102 | .map_page = nommu_map_page, | 96 | .map_page = nommu_map_page, |
103 | .sync_single_for_device = nommu_sync_single_for_device, | 97 | .sync_single_for_device = nommu_sync_single_for_device, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 366c688d619e..f2afee6a19c1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <asm/pci-direct.h> | 49 | #include <asm/pci-direct.h> |
50 | #include <linux/init_ohci1394_dma.h> | 50 | #include <linux/init_ohci1394_dma.h> |
51 | #include <linux/kvm_para.h> | 51 | #include <linux/kvm_para.h> |
52 | #include <linux/dma-contiguous.h> | ||
52 | 53 | ||
53 | #include <linux/errno.h> | 54 | #include <linux/errno.h> |
54 | #include <linux/kernel.h> | 55 | #include <linux/kernel.h> |
@@ -925,6 +926,7 @@ void __init setup_arch(char **cmdline_p) | |||
925 | } | 926 | } |
926 | #endif | 927 | #endif |
927 | memblock.current_limit = get_max_mapped(); | 928 | memblock.current_limit = get_max_mapped(); |
929 | dma_contiguous_reserve(0); | ||
928 | 930 | ||
929 | /* | 931 | /* |
930 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 932 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 9aa618acfe97..9b21469482ae 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig | |||
@@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER | |||
192 | APIs extension; the file's descriptor can then be passed on to other | 192 | APIs extension; the file's descriptor can then be passed on to other |
193 | driver. | 193 | driver. |
194 | 194 | ||
195 | config CMA | ||
196 | bool "Contiguous Memory Allocator (EXPERIMENTAL)" | ||
197 | depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL | ||
198 | select MIGRATION | ||
199 | help | ||
200 | This enables the Contiguous Memory Allocator which allows drivers | ||
201 | to allocate big physically-contiguous blocks of memory for use with | ||
202 | hardware components that do not support I/O map nor scatter-gather. | ||
203 | |||
204 | For more information see <include/linux/dma-contiguous.h>. | ||
205 | If unsure, say "n". | ||
206 | |||
207 | if CMA | ||
208 | |||
209 | config CMA_DEBUG | ||
210 | bool "CMA debug messages (DEVELOPMENT)" | ||
211 | depends on DEBUG_KERNEL | ||
212 | help | ||
213 | Turns on debug messages in CMA. This produces KERN_DEBUG | ||
214 | messages for every CMA call as well as various messages while | ||
215 | processing calls such as dma_alloc_from_contiguous(). | ||
216 | This option does not affect warning and error messages. | ||
217 | |||
218 | comment "Default contiguous memory area size:" | ||
219 | |||
220 | config CMA_SIZE_MBYTES | ||
221 | int "Size in Mega Bytes" | ||
222 | depends on !CMA_SIZE_SEL_PERCENTAGE | ||
223 | default 16 | ||
224 | help | ||
225 | Defines the size (in MiB) of the default memory area for Contiguous | ||
226 | Memory Allocator. | ||
227 | |||
228 | config CMA_SIZE_PERCENTAGE | ||
229 | int "Percentage of total memory" | ||
230 | depends on !CMA_SIZE_SEL_MBYTES | ||
231 | default 10 | ||
232 | help | ||
233 | Defines the size of the default memory area for Contiguous Memory | ||
234 | Allocator as a percentage of the total memory in the system. | ||
235 | |||
236 | choice | ||
237 | prompt "Selected region size" | ||
238 | default CMA_SIZE_SEL_ABSOLUTE | ||
239 | |||
240 | config CMA_SIZE_SEL_MBYTES | ||
241 | bool "Use mega bytes value only" | ||
242 | |||
243 | config CMA_SIZE_SEL_PERCENTAGE | ||
244 | bool "Use percentage value only" | ||
245 | |||
246 | config CMA_SIZE_SEL_MIN | ||
247 | bool "Use lower value (minimum)" | ||
248 | |||
249 | config CMA_SIZE_SEL_MAX | ||
250 | bool "Use higher value (maximum)" | ||
251 | |||
252 | endchoice | ||
253 | |||
254 | config CMA_ALIGNMENT | ||
255 | int "Maximum PAGE_SIZE order of alignment for contiguous buffers" | ||
256 | range 4 9 | ||
257 | default 8 | ||
258 | help | ||
259 | DMA mapping framework by default aligns all buffers to the smallest | ||
260 | PAGE_SIZE order which is greater than or equal to the requested buffer | ||
261 | size. This works well for buffers up to a few hundreds kilobytes, but | ||
262 | for larger buffers it just a memory waste. With this parameter you can | ||
263 | specify the maximum PAGE_SIZE order for contiguous buffers. Larger | ||
264 | buffers will be aligned only to this specified order. The order is | ||
265 | expressed as a power of two multiplied by the PAGE_SIZE. | ||
266 | |||
267 | For example, if your system defaults to 4KiB pages, the order value | ||
268 | of 8 means that the buffers will be aligned up to 1MiB only. | ||
269 | |||
270 | If unsure, leave the default value "8". | ||
271 | |||
272 | config CMA_AREAS | ||
273 | int "Maximum count of the CMA device-private areas" | ||
274 | default 7 | ||
275 | help | ||
276 | CMA allows to create CMA areas for particular devices. This parameter | ||
277 | sets the maximum number of such device private CMA areas in the | ||
278 | system. | ||
279 | |||
280 | If unsure, leave the default value "7". | ||
281 | |||
282 | endif | ||
283 | |||
195 | endmenu | 284 | endmenu |
diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b6d1b9c4200c..5aa2d703d19f 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile | |||
@@ -6,6 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ | |||
6 | attribute_container.o transport_class.o \ | 6 | attribute_container.o transport_class.o \ |
7 | topology.o | 7 | topology.o |
8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o | 8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o |
9 | obj-$(CONFIG_CMA) += dma-contiguous.o | ||
9 | obj-y += power/ | 10 | obj-y += power/ |
10 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o | 11 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o |
11 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | 12 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o |
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index bb0025c510b3..1b85949e3d2f 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c | |||
@@ -10,6 +10,7 @@ | |||
10 | struct dma_coherent_mem { | 10 | struct dma_coherent_mem { |
11 | void *virt_base; | 11 | void *virt_base; |
12 | dma_addr_t device_base; | 12 | dma_addr_t device_base; |
13 | phys_addr_t pfn_base; | ||
13 | int size; | 14 | int size; |
14 | int flags; | 15 | int flags; |
15 | unsigned long *bitmap; | 16 | unsigned long *bitmap; |
@@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | |||
44 | 45 | ||
45 | dev->dma_mem->virt_base = mem_base; | 46 | dev->dma_mem->virt_base = mem_base; |
46 | dev->dma_mem->device_base = device_addr; | 47 | dev->dma_mem->device_base = device_addr; |
48 | dev->dma_mem->pfn_base = PFN_DOWN(bus_addr); | ||
47 | dev->dma_mem->size = pages; | 49 | dev->dma_mem->size = pages; |
48 | dev->dma_mem->flags = flags; | 50 | dev->dma_mem->flags = flags; |
49 | 51 | ||
@@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) | |||
176 | return 0; | 178 | return 0; |
177 | } | 179 | } |
178 | EXPORT_SYMBOL(dma_release_from_coherent); | 180 | EXPORT_SYMBOL(dma_release_from_coherent); |
181 | |||
182 | /** | ||
183 | * dma_mmap_from_coherent() - try to mmap the memory allocated from | ||
184 | * per-device coherent memory pool to userspace | ||
185 | * @dev: device from which the memory was allocated | ||
186 | * @vma: vm_area for the userspace memory | ||
187 | * @vaddr: cpu address returned by dma_alloc_from_coherent | ||
188 | * @size: size of the memory buffer allocated by dma_alloc_from_coherent | ||
189 | * | ||
190 | * This checks whether the memory was allocated from the per-device | ||
191 | * coherent memory pool and if so, maps that memory to the provided vma. | ||
192 | * | ||
193 | * Returns 1 if we correctly mapped the memory, or 0 if | ||
194 | * dma_release_coherent() should proceed with mapping memory from | ||
195 | * generic pools. | ||
196 | */ | ||
197 | int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, | ||
198 | void *vaddr, size_t size, int *ret) | ||
199 | { | ||
200 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
201 | |||
202 | if (mem && vaddr >= mem->virt_base && vaddr + size <= | ||
203 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
204 | unsigned long off = vma->vm_pgoff; | ||
205 | int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
206 | int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
207 | int count = size >> PAGE_SHIFT; | ||
208 | |||
209 | *ret = -ENXIO; | ||
210 | if (off < count && user_count <= count - off) { | ||
211 | unsigned pfn = mem->pfn_base + start + off; | ||
212 | *ret = remap_pfn_range(vma, vma->vm_start, pfn, | ||
213 | user_count << PAGE_SHIFT, | ||
214 | vma->vm_page_prot); | ||
215 | } | ||
216 | return 1; | ||
217 | } | ||
218 | return 0; | ||
219 | } | ||
220 | EXPORT_SYMBOL(dma_mmap_from_coherent); | ||
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c new file mode 100644 index 000000000000..78efb0306a44 --- /dev/null +++ b/drivers/base/dma-contiguous.c | |||
@@ -0,0 +1,401 @@ | |||
1 | /* | ||
2 | * Contiguous Memory Allocator for DMA mapping framework | ||
3 | * Copyright (c) 2010-2011 by Samsung Electronics. | ||
4 | * Written by: | ||
5 | * Marek Szyprowski <m.szyprowski@samsung.com> | ||
6 | * Michal Nazarewicz <mina86@mina86.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License or (at your optional) any later version of the license. | ||
12 | */ | ||
13 | |||
14 | #define pr_fmt(fmt) "cma: " fmt | ||
15 | |||
16 | #ifdef CONFIG_CMA_DEBUG | ||
17 | #ifndef DEBUG | ||
18 | # define DEBUG | ||
19 | #endif | ||
20 | #endif | ||
21 | |||
22 | #include <asm/page.h> | ||
23 | #include <asm/dma-contiguous.h> | ||
24 | |||
25 | #include <linux/memblock.h> | ||
26 | #include <linux/err.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/mutex.h> | ||
29 | #include <linux/page-isolation.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/swap.h> | ||
32 | #include <linux/mm_types.h> | ||
33 | #include <linux/dma-contiguous.h> | ||
34 | |||
35 | #ifndef SZ_1M | ||
36 | #define SZ_1M (1 << 20) | ||
37 | #endif | ||
38 | |||
39 | struct cma { | ||
40 | unsigned long base_pfn; | ||
41 | unsigned long count; | ||
42 | unsigned long *bitmap; | ||
43 | }; | ||
44 | |||
45 | struct cma *dma_contiguous_default_area; | ||
46 | |||
47 | #ifdef CONFIG_CMA_SIZE_MBYTES | ||
48 | #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES | ||
49 | #else | ||
50 | #define CMA_SIZE_MBYTES 0 | ||
51 | #endif | ||
52 | |||
53 | /* | ||
54 | * Default global CMA area size can be defined in kernel's .config. | ||
55 | * This is usefull mainly for distro maintainers to create a kernel | ||
56 | * that works correctly for most supported systems. | ||
57 | * The size can be set in bytes or as a percentage of the total memory | ||
58 | * in the system. | ||
59 | * | ||
60 | * Users, who want to set the size of global CMA area for their system | ||
61 | * should use cma= kernel parameter. | ||
62 | */ | ||
63 | static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; | ||
64 | static long size_cmdline = -1; | ||
65 | |||
66 | static int __init early_cma(char *p) | ||
67 | { | ||
68 | pr_debug("%s(%s)\n", __func__, p); | ||
69 | size_cmdline = memparse(p, &p); | ||
70 | return 0; | ||
71 | } | ||
72 | early_param("cma", early_cma); | ||
73 | |||
74 | #ifdef CONFIG_CMA_SIZE_PERCENTAGE | ||
75 | |||
76 | static unsigned long __init __maybe_unused cma_early_percent_memory(void) | ||
77 | { | ||
78 | struct memblock_region *reg; | ||
79 | unsigned long total_pages = 0; | ||
80 | |||
81 | /* | ||
82 | * We cannot use memblock_phys_mem_size() here, because | ||
83 | * memblock_analyze() has not been called yet. | ||
84 | */ | ||
85 | for_each_memblock(memory, reg) | ||
86 | total_pages += memblock_region_memory_end_pfn(reg) - | ||
87 | memblock_region_memory_base_pfn(reg); | ||
88 | |||
89 | return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; | ||
90 | } | ||
91 | |||
92 | #else | ||
93 | |||
94 | static inline __maybe_unused unsigned long cma_early_percent_memory(void) | ||
95 | { | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | #endif | ||
100 | |||
101 | /** | ||
102 | * dma_contiguous_reserve() - reserve area for contiguous memory handling | ||
103 | * @limit: End address of the reserved memory (optional, 0 for any). | ||
104 | * | ||
105 | * This function reserves memory from early allocator. It should be | ||
106 | * called by arch specific code once the early allocator (memblock or bootmem) | ||
107 | * has been activated and all other subsystems have already allocated/reserved | ||
108 | * memory. | ||
109 | */ | ||
110 | void __init dma_contiguous_reserve(phys_addr_t limit) | ||
111 | { | ||
112 | unsigned long selected_size = 0; | ||
113 | |||
114 | pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); | ||
115 | |||
116 | if (size_cmdline != -1) { | ||
117 | selected_size = size_cmdline; | ||
118 | } else { | ||
119 | #ifdef CONFIG_CMA_SIZE_SEL_MBYTES | ||
120 | selected_size = size_bytes; | ||
121 | #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) | ||
122 | selected_size = cma_early_percent_memory(); | ||
123 | #elif defined(CONFIG_CMA_SIZE_SEL_MIN) | ||
124 | selected_size = min(size_bytes, cma_early_percent_memory()); | ||
125 | #elif defined(CONFIG_CMA_SIZE_SEL_MAX) | ||
126 | selected_size = max(size_bytes, cma_early_percent_memory()); | ||
127 | #endif | ||
128 | } | ||
129 | |||
130 | if (selected_size) { | ||
131 | pr_debug("%s: reserving %ld MiB for global area\n", __func__, | ||
132 | selected_size / SZ_1M); | ||
133 | |||
134 | dma_declare_contiguous(NULL, selected_size, 0, limit); | ||
135 | } | ||
136 | }; | ||
137 | |||
138 | static DEFINE_MUTEX(cma_mutex); | ||
139 | |||
140 | static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) | ||
141 | { | ||
142 | unsigned long pfn = base_pfn; | ||
143 | unsigned i = count >> pageblock_order; | ||
144 | struct zone *zone; | ||
145 | |||
146 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
147 | zone = page_zone(pfn_to_page(pfn)); | ||
148 | |||
149 | do { | ||
150 | unsigned j; | ||
151 | base_pfn = pfn; | ||
152 | for (j = pageblock_nr_pages; j; --j, pfn++) { | ||
153 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
154 | if (page_zone(pfn_to_page(pfn)) != zone) | ||
155 | return -EINVAL; | ||
156 | } | ||
157 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); | ||
158 | } while (--i); | ||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | static __init struct cma *cma_create_area(unsigned long base_pfn, | ||
163 | unsigned long count) | ||
164 | { | ||
165 | int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); | ||
166 | struct cma *cma; | ||
167 | int ret = -ENOMEM; | ||
168 | |||
169 | pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); | ||
170 | |||
171 | cma = kmalloc(sizeof *cma, GFP_KERNEL); | ||
172 | if (!cma) | ||
173 | return ERR_PTR(-ENOMEM); | ||
174 | |||
175 | cma->base_pfn = base_pfn; | ||
176 | cma->count = count; | ||
177 | cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
178 | |||
179 | if (!cma->bitmap) | ||
180 | goto no_mem; | ||
181 | |||
182 | ret = cma_activate_area(base_pfn, count); | ||
183 | if (ret) | ||
184 | goto error; | ||
185 | |||
186 | pr_debug("%s: returned %p\n", __func__, (void *)cma); | ||
187 | return cma; | ||
188 | |||
189 | error: | ||
190 | kfree(cma->bitmap); | ||
191 | no_mem: | ||
192 | kfree(cma); | ||
193 | return ERR_PTR(ret); | ||
194 | } | ||
195 | |||
196 | static struct cma_reserved { | ||
197 | phys_addr_t start; | ||
198 | unsigned long size; | ||
199 | struct device *dev; | ||
200 | } cma_reserved[MAX_CMA_AREAS] __initdata; | ||
201 | static unsigned cma_reserved_count __initdata; | ||
202 | |||
203 | static int __init cma_init_reserved_areas(void) | ||
204 | { | ||
205 | struct cma_reserved *r = cma_reserved; | ||
206 | unsigned i = cma_reserved_count; | ||
207 | |||
208 | pr_debug("%s()\n", __func__); | ||
209 | |||
210 | for (; i; --i, ++r) { | ||
211 | struct cma *cma; | ||
212 | cma = cma_create_area(PFN_DOWN(r->start), | ||
213 | r->size >> PAGE_SHIFT); | ||
214 | if (!IS_ERR(cma)) | ||
215 | dev_set_cma_area(r->dev, cma); | ||
216 | } | ||
217 | return 0; | ||
218 | } | ||
219 | core_initcall(cma_init_reserved_areas); | ||
220 | |||
221 | /** | ||
222 | * dma_declare_contiguous() - reserve area for contiguous memory handling | ||
223 | * for particular device | ||
224 | * @dev: Pointer to device structure. | ||
225 | * @size: Size of the reserved memory. | ||
226 | * @base: Start address of the reserved memory (optional, 0 for any). | ||
227 | * @limit: End address of the reserved memory (optional, 0 for any). | ||
228 | * | ||
229 | * This function reserves memory for specified device. It should be | ||
230 | * called by board specific code when early allocator (memblock or bootmem) | ||
231 | * is still activate. | ||
232 | */ | ||
233 | int __init dma_declare_contiguous(struct device *dev, unsigned long size, | ||
234 | phys_addr_t base, phys_addr_t limit) | ||
235 | { | ||
236 | struct cma_reserved *r = &cma_reserved[cma_reserved_count]; | ||
237 | unsigned long alignment; | ||
238 | |||
239 | pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, | ||
240 | (unsigned long)size, (unsigned long)base, | ||
241 | (unsigned long)limit); | ||
242 | |||
243 | /* Sanity checks */ | ||
244 | if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { | ||
245 | pr_err("Not enough slots for CMA reserved regions!\n"); | ||
246 | return -ENOSPC; | ||
247 | } | ||
248 | |||
249 | if (!size) | ||
250 | return -EINVAL; | ||
251 | |||
252 | /* Sanitise input arguments */ | ||
253 | alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order); | ||
254 | base = ALIGN(base, alignment); | ||
255 | size = ALIGN(size, alignment); | ||
256 | limit &= ~(alignment - 1); | ||
257 | |||
258 | /* Reserve memory */ | ||
259 | if (base) { | ||
260 | if (memblock_is_region_reserved(base, size) || | ||
261 | memblock_reserve(base, size) < 0) { | ||
262 | base = -EBUSY; | ||
263 | goto err; | ||
264 | } | ||
265 | } else { | ||
266 | /* | ||
267 | * Use __memblock_alloc_base() since | ||
268 | * memblock_alloc_base() panic()s. | ||
269 | */ | ||
270 | phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); | ||
271 | if (!addr) { | ||
272 | base = -ENOMEM; | ||
273 | goto err; | ||
274 | } else if (addr + size > ~(unsigned long)0) { | ||
275 | memblock_free(addr, size); | ||
276 | base = -EINVAL; | ||
277 | goto err; | ||
278 | } else { | ||
279 | base = addr; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Each reserved area must be initialised later, when more kernel | ||
285 | * subsystems (like slab allocator) are available. | ||
286 | */ | ||
287 | r->start = base; | ||
288 | r->size = size; | ||
289 | r->dev = dev; | ||
290 | cma_reserved_count++; | ||
291 | pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, | ||
292 | (unsigned long)base); | ||
293 | |||
294 | /* Architecture specific contiguous memory fixup. */ | ||
295 | dma_contiguous_early_fixup(base, size); | ||
296 | return 0; | ||
297 | err: | ||
298 | pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); | ||
299 | return base; | ||
300 | } | ||
301 | |||
302 | /** | ||
303 | * dma_alloc_from_contiguous() - allocate pages from contiguous area | ||
304 | * @dev: Pointer to device for which the allocation is performed. | ||
305 | * @count: Requested number of pages. | ||
306 | * @align: Requested alignment of pages (in PAGE_SIZE order). | ||
307 | * | ||
308 | * This function allocates memory buffer for specified device. It uses | ||
309 | * device specific contiguous memory area if available or the default | ||
310 | * global one. Requires architecture specific get_dev_cma_area() helper | ||
311 | * function. | ||
312 | */ | ||
313 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
314 | unsigned int align) | ||
315 | { | ||
316 | unsigned long mask, pfn, pageno, start = 0; | ||
317 | struct cma *cma = dev_get_cma_area(dev); | ||
318 | int ret; | ||
319 | |||
320 | if (!cma || !cma->count) | ||
321 | return NULL; | ||
322 | |||
323 | if (align > CONFIG_CMA_ALIGNMENT) | ||
324 | align = CONFIG_CMA_ALIGNMENT; | ||
325 | |||
326 | pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, | ||
327 | count, align); | ||
328 | |||
329 | if (!count) | ||
330 | return NULL; | ||
331 | |||
332 | mask = (1 << align) - 1; | ||
333 | |||
334 | mutex_lock(&cma_mutex); | ||
335 | |||
336 | for (;;) { | ||
337 | pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, | ||
338 | start, count, mask); | ||
339 | if (pageno >= cma->count) { | ||
340 | ret = -ENOMEM; | ||
341 | goto error; | ||
342 | } | ||
343 | |||
344 | pfn = cma->base_pfn + pageno; | ||
345 | ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); | ||
346 | if (ret == 0) { | ||
347 | bitmap_set(cma->bitmap, pageno, count); | ||
348 | break; | ||
349 | } else if (ret != -EBUSY) { | ||
350 | goto error; | ||
351 | } | ||
352 | pr_debug("%s(): memory range at %p is busy, retrying\n", | ||
353 | __func__, pfn_to_page(pfn)); | ||
354 | /* try again with a bit different memory target */ | ||
355 | start = pageno + mask + 1; | ||
356 | } | ||
357 | |||
358 | mutex_unlock(&cma_mutex); | ||
359 | |||
360 | pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn)); | ||
361 | return pfn_to_page(pfn); | ||
362 | error: | ||
363 | mutex_unlock(&cma_mutex); | ||
364 | return NULL; | ||
365 | } | ||
366 | |||
367 | /** | ||
368 | * dma_release_from_contiguous() - release allocated pages | ||
369 | * @dev: Pointer to device for which the pages were allocated. | ||
370 | * @pages: Allocated pages. | ||
371 | * @count: Number of allocated pages. | ||
372 | * | ||
373 | * This function releases memory allocated by dma_alloc_from_contiguous(). | ||
374 | * It returns false when provided pages do not belong to contiguous area and | ||
375 | * true otherwise. | ||
376 | */ | ||
377 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
378 | int count) | ||
379 | { | ||
380 | struct cma *cma = dev_get_cma_area(dev); | ||
381 | unsigned long pfn; | ||
382 | |||
383 | if (!cma || !pages) | ||
384 | return false; | ||
385 | |||
386 | pr_debug("%s(page %p)\n", __func__, (void *)pages); | ||
387 | |||
388 | pfn = page_to_pfn(pages); | ||
389 | |||
390 | if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) | ||
391 | return false; | ||
392 | |||
393 | VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); | ||
394 | |||
395 | mutex_lock(&cma_mutex); | ||
396 | bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); | ||
397 | free_contig_range(pfn, count); | ||
398 | mutex_unlock(&cma_mutex); | ||
399 | |||
400 | return true; | ||
401 | } | ||
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffaa0242..abfb2682de7f 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h | |||
@@ -3,13 +3,15 @@ | |||
3 | 3 | ||
4 | #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT | 4 | #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT |
5 | /* | 5 | /* |
6 | * These two functions are only for dma allocator. | 6 | * These three functions are only for dma allocator. |
7 | * Don't use them in device drivers. | 7 | * Don't use them in device drivers. |
8 | */ | 8 | */ |
9 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, | 9 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, |
10 | dma_addr_t *dma_handle, void **ret); | 10 | dma_addr_t *dma_handle, void **ret); |
11 | int dma_release_from_coherent(struct device *dev, int order, void *vaddr); | 11 | int dma_release_from_coherent(struct device *dev, int order, void *vaddr); |
12 | 12 | ||
13 | int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, | ||
14 | void *cpu_addr, size_t size, int *ret); | ||
13 | /* | 15 | /* |
14 | * Standard interface | 16 | * Standard interface |
15 | */ | 17 | */ |
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 000000000000..c544356b374b --- /dev/null +++ b/include/asm-generic/dma-contiguous.h | |||
@@ -0,0 +1,28 @@ | |||
1 | #ifndef ASM_DMA_CONTIGUOUS_H | ||
2 | #define ASM_DMA_CONTIGUOUS_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #ifdef CONFIG_CMA | ||
6 | |||
7 | #include <linux/device.h> | ||
8 | #include <linux/dma-contiguous.h> | ||
9 | |||
10 | static inline struct cma *dev_get_cma_area(struct device *dev) | ||
11 | { | ||
12 | if (dev && dev->cma_area) | ||
13 | return dev->cma_area; | ||
14 | return dma_contiguous_default_area; | ||
15 | } | ||
16 | |||
17 | static inline void dev_set_cma_area(struct device *dev, struct cma *cma) | ||
18 | { | ||
19 | if (dev) | ||
20 | dev->cma_area = cma; | ||
21 | if (!dev || !dma_contiguous_default_area) | ||
22 | dma_contiguous_default_area = cma; | ||
23 | } | ||
24 | |||
25 | #endif | ||
26 | #endif | ||
27 | |||
28 | #endif | ||
diff --git a/include/linux/device.h b/include/linux/device.h index e04f5776f6d0..161d96241b1b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h | |||
@@ -667,6 +667,10 @@ struct device { | |||
667 | 667 | ||
668 | struct dma_coherent_mem *dma_mem; /* internal for coherent mem | 668 | struct dma_coherent_mem *dma_mem; /* internal for coherent mem |
669 | override */ | 669 | override */ |
670 | #ifdef CONFIG_CMA | ||
671 | struct cma *cma_area; /* contiguous memory area for dma | ||
672 | allocations */ | ||
673 | #endif | ||
670 | /* arch specific additions */ | 674 | /* arch specific additions */ |
671 | struct dev_archdata archdata; | 675 | struct dev_archdata archdata; |
672 | 676 | ||
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h new file mode 100644 index 000000000000..2f303e4b7ed3 --- /dev/null +++ b/include/linux/dma-contiguous.h | |||
@@ -0,0 +1,110 @@ | |||
1 | #ifndef __LINUX_CMA_H | ||
2 | #define __LINUX_CMA_H | ||
3 | |||
4 | /* | ||
5 | * Contiguous Memory Allocator for DMA mapping framework | ||
6 | * Copyright (c) 2010-2011 by Samsung Electronics. | ||
7 | * Written by: | ||
8 | * Marek Szyprowski <m.szyprowski@samsung.com> | ||
9 | * Michal Nazarewicz <mina86@mina86.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License as | ||
13 | * published by the Free Software Foundation; either version 2 of the | ||
14 | * License or (at your optional) any later version of the license. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Contiguous Memory Allocator | ||
19 | * | ||
20 | * The Contiguous Memory Allocator (CMA) makes it possible to | ||
21 | * allocate big contiguous chunks of memory after the system has | ||
22 | * booted. | ||
23 | * | ||
24 | * Why is it needed? | ||
25 | * | ||
26 | * Various devices on embedded systems have no scatter-getter and/or | ||
27 | * IO map support and require contiguous blocks of memory to | ||
28 | * operate. They include devices such as cameras, hardware video | ||
29 | * coders, etc. | ||
30 | * | ||
31 | * Such devices often require big memory buffers (a full HD frame | ||
32 | * is, for instance, more then 2 mega pixels large, i.e. more than 6 | ||
33 | * MB of memory), which makes mechanisms such as kmalloc() or | ||
34 | * alloc_page() ineffective. | ||
35 | * | ||
36 | * At the same time, a solution where a big memory region is | ||
37 | * reserved for a device is suboptimal since often more memory is | ||
38 | * reserved then strictly required and, moreover, the memory is | ||
39 | * inaccessible to page system even if device drivers don't use it. | ||
40 | * | ||
41 | * CMA tries to solve this issue by operating on memory regions | ||
42 | * where only movable pages can be allocated from. This way, kernel | ||
43 | * can use the memory for pagecache and when device driver requests | ||
44 | * it, allocated pages can be migrated. | ||
45 | * | ||
46 | * Driver usage | ||
47 | * | ||
48 | * CMA should not be used by the device drivers directly. It is | ||
49 | * only a helper framework for dma-mapping subsystem. | ||
50 | * | ||
51 | * For more information, see kernel-docs in drivers/base/dma-contiguous.c | ||
52 | */ | ||
53 | |||
54 | #ifdef __KERNEL__ | ||
55 | |||
56 | struct cma; | ||
57 | struct page; | ||
58 | struct device; | ||
59 | |||
60 | #ifdef CONFIG_CMA | ||
61 | |||
62 | /* | ||
63 | * There is always at least global CMA area and a few optional device | ||
64 | * private areas configured in kernel .config. | ||
65 | */ | ||
66 | #define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) | ||
67 | |||
68 | extern struct cma *dma_contiguous_default_area; | ||
69 | |||
70 | void dma_contiguous_reserve(phys_addr_t addr_limit); | ||
71 | int dma_declare_contiguous(struct device *dev, unsigned long size, | ||
72 | phys_addr_t base, phys_addr_t limit); | ||
73 | |||
74 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
75 | unsigned int order); | ||
76 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
77 | int count); | ||
78 | |||
79 | #else | ||
80 | |||
81 | #define MAX_CMA_AREAS (0) | ||
82 | |||
83 | static inline void dma_contiguous_reserve(phys_addr_t limit) { } | ||
84 | |||
85 | static inline | ||
86 | int dma_declare_contiguous(struct device *dev, unsigned long size, | ||
87 | phys_addr_t base, phys_addr_t limit) | ||
88 | { | ||
89 | return -ENOSYS; | ||
90 | } | ||
91 | |||
92 | static inline | ||
93 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
94 | unsigned int order) | ||
95 | { | ||
96 | return NULL; | ||
97 | } | ||
98 | |||
99 | static inline | ||
100 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
101 | int count) | ||
102 | { | ||
103 | return false; | ||
104 | } | ||
105 | |||
106 | #endif | ||
107 | |||
108 | #endif | ||
109 | |||
110 | #endif | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 581e74b7df95..1e49be49d324 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -391,4 +391,16 @@ static inline bool pm_suspended_storage(void) | |||
391 | } | 391 | } |
392 | #endif /* CONFIG_PM_SLEEP */ | 392 | #endif /* CONFIG_PM_SLEEP */ |
393 | 393 | ||
394 | #ifdef CONFIG_CMA | ||
395 | |||
396 | /* The below functions must be run on a range from a single zone. */ | ||
397 | extern int alloc_contig_range(unsigned long start, unsigned long end, | ||
398 | unsigned migratetype); | ||
399 | extern void free_contig_range(unsigned long pfn, unsigned nr_pages); | ||
400 | |||
401 | /* CMA stuff */ | ||
402 | extern void init_cma_reserved_pageblock(struct page *page); | ||
403 | |||
404 | #endif | ||
405 | |||
394 | #endif /* __LINUX_GFP_H */ | 406 | #endif /* __LINUX_GFP_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41aa49b74821..4871e31ae277 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -35,13 +35,39 @@ | |||
35 | */ | 35 | */ |
36 | #define PAGE_ALLOC_COSTLY_ORDER 3 | 36 | #define PAGE_ALLOC_COSTLY_ORDER 3 |
37 | 37 | ||
38 | #define MIGRATE_UNMOVABLE 0 | 38 | enum { |
39 | #define MIGRATE_RECLAIMABLE 1 | 39 | MIGRATE_UNMOVABLE, |
40 | #define MIGRATE_MOVABLE 2 | 40 | MIGRATE_RECLAIMABLE, |
41 | #define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ | 41 | MIGRATE_MOVABLE, |
42 | #define MIGRATE_RESERVE 3 | 42 | MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ |
43 | #define MIGRATE_ISOLATE 4 /* can't allocate from here */ | 43 | MIGRATE_RESERVE = MIGRATE_PCPTYPES, |
44 | #define MIGRATE_TYPES 5 | 44 | #ifdef CONFIG_CMA |
45 | /* | ||
46 | * MIGRATE_CMA migration type is designed to mimic the way | ||
47 | * ZONE_MOVABLE works. Only movable pages can be allocated | ||
48 | * from MIGRATE_CMA pageblocks and page allocator never | ||
49 | * implicitly change migration type of MIGRATE_CMA pageblock. | ||
50 | * | ||
51 | * The way to use it is to change migratetype of a range of | ||
52 | * pageblocks to MIGRATE_CMA which can be done by | ||
53 | * __free_pageblock_cma() function. What is important though | ||
54 | * is that a range of pageblocks must be aligned to | ||
55 | * MAX_ORDER_NR_PAGES should biggest page be bigger then | ||
56 | * a single pageblock. | ||
57 | */ | ||
58 | MIGRATE_CMA, | ||
59 | #endif | ||
60 | MIGRATE_ISOLATE, /* can't allocate from here */ | ||
61 | MIGRATE_TYPES | ||
62 | }; | ||
63 | |||
64 | #ifdef CONFIG_CMA | ||
65 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) | ||
66 | # define cma_wmark_pages(zone) zone->min_cma_pages | ||
67 | #else | ||
68 | # define is_migrate_cma(migratetype) false | ||
69 | # define cma_wmark_pages(zone) 0 | ||
70 | #endif | ||
45 | 71 | ||
46 | #define for_each_migratetype_order(order, type) \ | 72 | #define for_each_migratetype_order(order, type) \ |
47 | for (order = 0; order < MAX_ORDER; order++) \ | 73 | for (order = 0; order < MAX_ORDER; order++) \ |
@@ -347,6 +373,13 @@ struct zone { | |||
347 | /* see spanned/present_pages for more description */ | 373 | /* see spanned/present_pages for more description */ |
348 | seqlock_t span_seqlock; | 374 | seqlock_t span_seqlock; |
349 | #endif | 375 | #endif |
376 | #ifdef CONFIG_CMA | ||
377 | /* | ||
378 | * CMA needs to increase watermark levels during the allocation | ||
379 | * process to make sure that the system is not starved. | ||
380 | */ | ||
381 | unsigned long min_cma_pages; | ||
382 | #endif | ||
350 | struct free_area free_area[MAX_ORDER]; | 383 | struct free_area free_area[MAX_ORDER]; |
351 | 384 | ||
352 | #ifndef CONFIG_SPARSEMEM | 385 | #ifndef CONFIG_SPARSEMEM |
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 051c1b1ede4e..3bdcab30ca41 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. | 5 | * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. |
6 | * If specified range includes migrate types other than MOVABLE, | 6 | * If specified range includes migrate types other than MOVABLE or CMA, |
7 | * this will fail with -EBUSY. | 7 | * this will fail with -EBUSY. |
8 | * | 8 | * |
9 | * For isolating all pages in the range finally, the caller have to | 9 | * For isolating all pages in the range finally, the caller have to |
@@ -11,27 +11,27 @@ | |||
11 | * test it. | 11 | * test it. |
12 | */ | 12 | */ |
13 | extern int | 13 | extern int |
14 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); | 14 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
15 | unsigned migratetype); | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. | 18 | * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. |
18 | * target range is [start_pfn, end_pfn) | 19 | * target range is [start_pfn, end_pfn) |
19 | */ | 20 | */ |
20 | extern int | 21 | extern int |
21 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); | 22 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
23 | unsigned migratetype); | ||
22 | 24 | ||
23 | /* | 25 | /* |
24 | * test all pages in [start_pfn, end_pfn)are isolated or not. | 26 | * Test all pages in [start_pfn, end_pfn) are isolated or not. |
25 | */ | 27 | */ |
26 | extern int | 28 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); |
27 | test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Internal funcs.Changes pageblock's migrate type. | 31 | * Internal functions. Changes pageblock's migrate type. |
31 | * Please use make_pagetype_isolated()/make_pagetype_movable(). | ||
32 | */ | 32 | */ |
33 | extern int set_migratetype_isolate(struct page *page); | 33 | extern int set_migratetype_isolate(struct page *page); |
34 | extern void unset_migratetype_isolate(struct page *page); | 34 | extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); |
35 | 35 | ||
36 | 36 | ||
37 | #endif | 37 | #endif |
diff --git a/mm/Kconfig b/mm/Kconfig index e338407f1225..39220026c797 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -198,7 +198,7 @@ config COMPACTION | |||
198 | config MIGRATION | 198 | config MIGRATION |
199 | bool "Page migration" | 199 | bool "Page migration" |
200 | def_bool y | 200 | def_bool y |
201 | depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION | 201 | depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA |
202 | help | 202 | help |
203 | Allows the migration of the physical location of pages of processes | 203 | Allows the migration of the physical location of pages of processes |
204 | while the virtual addresses are not changed. This is useful in | 204 | while the virtual addresses are not changed. This is useful in |
diff --git a/mm/Makefile b/mm/Makefile index 50ec00ef2a0e..8aada89efbbb 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ | |||
13 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 13 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
14 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ | 14 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
15 | page_isolation.o mm_init.o mmu_context.o percpu.o \ | 15 | page_isolation.o mm_init.o mmu_context.o percpu.o \ |
16 | $(mmu-y) | 16 | compaction.o $(mmu-y) |
17 | obj-y += init-mm.o | 17 | obj-y += init-mm.o |
18 | 18 | ||
19 | ifdef CONFIG_NO_BOOTMEM | 19 | ifdef CONFIG_NO_BOOTMEM |
@@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o | |||
32 | obj-$(CONFIG_SPARSEMEM) += sparse.o | 32 | obj-$(CONFIG_SPARSEMEM) += sparse.o |
33 | obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o | 33 | obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o |
34 | obj-$(CONFIG_SLOB) += slob.o | 34 | obj-$(CONFIG_SLOB) += slob.o |
35 | obj-$(CONFIG_COMPACTION) += compaction.o | ||
36 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | 35 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o |
37 | obj-$(CONFIG_KSM) += ksm.o | 36 | obj-$(CONFIG_KSM) += ksm.o |
38 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 37 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o |
diff --git a/mm/compaction.c b/mm/compaction.c index 74a8c825ff28..da7d35ea5103 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -16,30 +16,11 @@ | |||
16 | #include <linux/sysfs.h> | 16 | #include <linux/sysfs.h> |
17 | #include "internal.h" | 17 | #include "internal.h" |
18 | 18 | ||
19 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
20 | |||
19 | #define CREATE_TRACE_POINTS | 21 | #define CREATE_TRACE_POINTS |
20 | #include <trace/events/compaction.h> | 22 | #include <trace/events/compaction.h> |
21 | 23 | ||
22 | /* | ||
23 | * compact_control is used to track pages being migrated and the free pages | ||
24 | * they are being migrated to during memory compaction. The free_pfn starts | ||
25 | * at the end of a zone and migrate_pfn begins at the start. Movable pages | ||
26 | * are moved to the end of a zone during a compaction run and the run | ||
27 | * completes when free_pfn <= migrate_pfn | ||
28 | */ | ||
29 | struct compact_control { | ||
30 | struct list_head freepages; /* List of free pages to migrate to */ | ||
31 | struct list_head migratepages; /* List of pages being migrated */ | ||
32 | unsigned long nr_freepages; /* Number of isolated free pages */ | ||
33 | unsigned long nr_migratepages; /* Number of pages to migrate */ | ||
34 | unsigned long free_pfn; /* isolate_freepages search base */ | ||
35 | unsigned long migrate_pfn; /* isolate_migratepages search base */ | ||
36 | bool sync; /* Synchronous migration */ | ||
37 | |||
38 | int order; /* order a direct compactor needs */ | ||
39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
40 | struct zone *zone; | ||
41 | }; | ||
42 | |||
43 | static unsigned long release_freepages(struct list_head *freelist) | 24 | static unsigned long release_freepages(struct list_head *freelist) |
44 | { | 25 | { |
45 | struct page *page, *next; | 26 | struct page *page, *next; |
@@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist) | |||
54 | return count; | 35 | return count; |
55 | } | 36 | } |
56 | 37 | ||
57 | /* Isolate free pages onto a private freelist. Must hold zone->lock */ | 38 | static void map_pages(struct list_head *list) |
58 | static unsigned long isolate_freepages_block(struct zone *zone, | 39 | { |
59 | unsigned long blockpfn, | 40 | struct page *page; |
60 | struct list_head *freelist) | 41 | |
42 | list_for_each_entry(page, list, lru) { | ||
43 | arch_alloc_page(page, 0); | ||
44 | kernel_map_pages(page, 1, 1); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | static inline bool migrate_async_suitable(int migratetype) | ||
49 | { | ||
50 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Isolate free pages onto a private freelist. Caller must hold zone->lock. | ||
55 | * If @strict is true, will abort returning 0 on any invalid PFNs or non-free | ||
56 | * pages inside of the pageblock (even though it may still end up isolating | ||
57 | * some pages). | ||
58 | */ | ||
59 | static unsigned long isolate_freepages_block(unsigned long blockpfn, | ||
60 | unsigned long end_pfn, | ||
61 | struct list_head *freelist, | ||
62 | bool strict) | ||
61 | { | 63 | { |
62 | unsigned long zone_end_pfn, end_pfn; | ||
63 | int nr_scanned = 0, total_isolated = 0; | 64 | int nr_scanned = 0, total_isolated = 0; |
64 | struct page *cursor; | 65 | struct page *cursor; |
65 | 66 | ||
66 | /* Get the last PFN we should scan for free pages at */ | ||
67 | zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
68 | end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); | ||
69 | |||
70 | /* Find the first usable PFN in the block to initialse page cursor */ | ||
71 | for (; blockpfn < end_pfn; blockpfn++) { | ||
72 | if (pfn_valid_within(blockpfn)) | ||
73 | break; | ||
74 | } | ||
75 | cursor = pfn_to_page(blockpfn); | 67 | cursor = pfn_to_page(blockpfn); |
76 | 68 | ||
77 | /* Isolate free pages. This assumes the block is valid */ | 69 | /* Isolate free pages. This assumes the block is valid */ |
@@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
79 | int isolated, i; | 71 | int isolated, i; |
80 | struct page *page = cursor; | 72 | struct page *page = cursor; |
81 | 73 | ||
82 | if (!pfn_valid_within(blockpfn)) | 74 | if (!pfn_valid_within(blockpfn)) { |
75 | if (strict) | ||
76 | return 0; | ||
83 | continue; | 77 | continue; |
78 | } | ||
84 | nr_scanned++; | 79 | nr_scanned++; |
85 | 80 | ||
86 | if (!PageBuddy(page)) | 81 | if (!PageBuddy(page)) { |
82 | if (strict) | ||
83 | return 0; | ||
87 | continue; | 84 | continue; |
85 | } | ||
88 | 86 | ||
89 | /* Found a free page, break it into order-0 pages */ | 87 | /* Found a free page, break it into order-0 pages */ |
90 | isolated = split_free_page(page); | 88 | isolated = split_free_page(page); |
89 | if (!isolated && strict) | ||
90 | return 0; | ||
91 | total_isolated += isolated; | 91 | total_isolated += isolated; |
92 | for (i = 0; i < isolated; i++) { | 92 | for (i = 0; i < isolated; i++) { |
93 | list_add(&page->lru, freelist); | 93 | list_add(&page->lru, freelist); |
@@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
105 | return total_isolated; | 105 | return total_isolated; |
106 | } | 106 | } |
107 | 107 | ||
108 | /* Returns true if the page is within a block suitable for migration to */ | 108 | /** |
109 | static bool suitable_migration_target(struct page *page) | 109 | * isolate_freepages_range() - isolate free pages. |
110 | { | 110 | * @start_pfn: The first PFN to start isolating. |
111 | 111 | * @end_pfn: The one-past-last PFN. | |
112 | int migratetype = get_pageblock_migratetype(page); | 112 | * |
113 | 113 | * Non-free pages, invalid PFNs, or zone boundaries within the | |
114 | /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ | 114 | * [start_pfn, end_pfn) range are considered errors, cause function to |
115 | if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) | 115 | * undo its actions and return zero. |
116 | return false; | 116 | * |
117 | 117 | * Otherwise, function returns one-past-the-last PFN of isolated page | |
118 | /* If the page is a large free page, then allow migration */ | 118 | * (which may be greater then end_pfn if end fell in a middle of |
119 | if (PageBuddy(page) && page_order(page) >= pageblock_order) | 119 | * a free page). |
120 | return true; | ||
121 | |||
122 | /* If the block is MIGRATE_MOVABLE, allow migration */ | ||
123 | if (migratetype == MIGRATE_MOVABLE) | ||
124 | return true; | ||
125 | |||
126 | /* Otherwise skip the block */ | ||
127 | return false; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Based on information in the current compact_control, find blocks | ||
132 | * suitable for isolating free pages from and then isolate them. | ||
133 | */ | 120 | */ |
134 | static void isolate_freepages(struct zone *zone, | 121 | unsigned long |
135 | struct compact_control *cc) | 122 | isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) |
136 | { | 123 | { |
137 | struct page *page; | 124 | unsigned long isolated, pfn, block_end_pfn, flags; |
138 | unsigned long high_pfn, low_pfn, pfn; | 125 | struct zone *zone = NULL; |
139 | unsigned long flags; | 126 | LIST_HEAD(freelist); |
140 | int nr_freepages = cc->nr_freepages; | ||
141 | struct list_head *freelist = &cc->freepages; | ||
142 | |||
143 | /* | ||
144 | * Initialise the free scanner. The starting point is where we last | ||
145 | * scanned from (or the end of the zone if starting). The low point | ||
146 | * is the end of the pageblock the migration scanner is using. | ||
147 | */ | ||
148 | pfn = cc->free_pfn; | ||
149 | low_pfn = cc->migrate_pfn + pageblock_nr_pages; | ||
150 | 127 | ||
151 | /* | 128 | if (pfn_valid(start_pfn)) |
152 | * Take care that if the migration scanner is at the end of the zone | 129 | zone = page_zone(pfn_to_page(start_pfn)); |
153 | * that the free scanner does not accidentally move to the next zone | ||
154 | * in the next isolation cycle. | ||
155 | */ | ||
156 | high_pfn = min(low_pfn, pfn); | ||
157 | |||
158 | /* | ||
159 | * Isolate free pages until enough are available to migrate the | ||
160 | * pages on cc->migratepages. We stop searching if the migrate | ||
161 | * and free page scanners meet or enough free pages are isolated. | ||
162 | */ | ||
163 | for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; | ||
164 | pfn -= pageblock_nr_pages) { | ||
165 | unsigned long isolated; | ||
166 | 130 | ||
167 | if (!pfn_valid(pfn)) | 131 | for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { |
168 | continue; | 132 | if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) |
133 | break; | ||
169 | 134 | ||
170 | /* | 135 | /* |
171 | * Check for overlapping nodes/zones. It's possible on some | 136 | * On subsequent iterations ALIGN() is actually not needed, |
172 | * configurations to have a setup like | 137 | * but we keep it that we not to complicate the code. |
173 | * node0 node1 node0 | ||
174 | * i.e. it's possible that all pages within a zones range of | ||
175 | * pages do not belong to a single zone. | ||
176 | */ | 138 | */ |
177 | page = pfn_to_page(pfn); | 139 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
178 | if (page_zone(page) != zone) | 140 | block_end_pfn = min(block_end_pfn, end_pfn); |
179 | continue; | ||
180 | 141 | ||
181 | /* Check the block is suitable for migration */ | 142 | spin_lock_irqsave(&zone->lock, flags); |
182 | if (!suitable_migration_target(page)) | 143 | isolated = isolate_freepages_block(pfn, block_end_pfn, |
183 | continue; | 144 | &freelist, true); |
145 | spin_unlock_irqrestore(&zone->lock, flags); | ||
184 | 146 | ||
185 | /* | 147 | /* |
186 | * Found a block suitable for isolating free pages from. Now | 148 | * In strict mode, isolate_freepages_block() returns 0 if |
187 | * we disabled interrupts, double check things are ok and | 149 | * there are any holes in the block (ie. invalid PFNs or |
188 | * isolate the pages. This is to minimise the time IRQs | 150 | * non-free pages). |
189 | * are disabled | ||
190 | */ | 151 | */ |
191 | isolated = 0; | 152 | if (!isolated) |
192 | spin_lock_irqsave(&zone->lock, flags); | 153 | break; |
193 | if (suitable_migration_target(page)) { | ||
194 | isolated = isolate_freepages_block(zone, pfn, freelist); | ||
195 | nr_freepages += isolated; | ||
196 | } | ||
197 | spin_unlock_irqrestore(&zone->lock, flags); | ||
198 | 154 | ||
199 | /* | 155 | /* |
200 | * Record the highest PFN we isolated pages from. When next | 156 | * If we managed to isolate pages, it is always (1 << n) * |
201 | * looking for free pages, the search will restart here as | 157 | * pageblock_nr_pages for some non-negative n. (Max order |
202 | * page migration may have returned some pages to the allocator | 158 | * page may span two pageblocks). |
203 | */ | 159 | */ |
204 | if (isolated) | ||
205 | high_pfn = max(high_pfn, pfn); | ||
206 | } | 160 | } |
207 | 161 | ||
208 | /* split_free_page does not map the pages */ | 162 | /* split_free_page does not map the pages */ |
209 | list_for_each_entry(page, freelist, lru) { | 163 | map_pages(&freelist); |
210 | arch_alloc_page(page, 0); | 164 | |
211 | kernel_map_pages(page, 1, 1); | 165 | if (pfn < end_pfn) { |
166 | /* Loop terminated early, cleanup. */ | ||
167 | release_freepages(&freelist); | ||
168 | return 0; | ||
212 | } | 169 | } |
213 | 170 | ||
214 | cc->free_pfn = high_pfn; | 171 | /* We don't use freelists for anything. */ |
215 | cc->nr_freepages = nr_freepages; | 172 | return pfn; |
216 | } | 173 | } |
217 | 174 | ||
218 | /* Update the number of anon and file isolated pages in the zone */ | 175 | /* Update the number of anon and file isolated pages in the zone */ |
@@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone) | |||
243 | return isolated > (inactive + active) / 2; | 200 | return isolated > (inactive + active) / 2; |
244 | } | 201 | } |
245 | 202 | ||
246 | /* possible outcome of isolate_migratepages */ | 203 | /** |
247 | typedef enum { | 204 | * isolate_migratepages_range() - isolate all migrate-able pages in range. |
248 | ISOLATE_ABORT, /* Abort compaction now */ | 205 | * @zone: Zone pages are in. |
249 | ISOLATE_NONE, /* No pages isolated, continue scanning */ | 206 | * @cc: Compaction control structure. |
250 | ISOLATE_SUCCESS, /* Pages isolated, migrate */ | 207 | * @low_pfn: The first PFN of the range. |
251 | } isolate_migrate_t; | 208 | * @end_pfn: The one-past-the-last PFN of the range. |
252 | 209 | * | |
253 | /* | 210 | * Isolate all pages that can be migrated from the range specified by |
254 | * Isolate all pages that can be migrated from the block pointed to by | 211 | * [low_pfn, end_pfn). Returns zero if there is a fatal signal |
255 | * the migrate scanner within compact_control. | 212 | * pending), otherwise PFN of the first page that was not scanned |
213 | * (which may be both less, equal to or more then end_pfn). | ||
214 | * | ||
215 | * Assumes that cc->migratepages is empty and cc->nr_migratepages is | ||
216 | * zero. | ||
217 | * | ||
218 | * Apart from cc->migratepages and cc->nr_migratetypes this function | ||
219 | * does not modify any cc's fields, in particular it does not modify | ||
220 | * (or read for that matter) cc->migrate_pfn. | ||
256 | */ | 221 | */ |
257 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | 222 | unsigned long |
258 | struct compact_control *cc) | 223 | isolate_migratepages_range(struct zone *zone, struct compact_control *cc, |
224 | unsigned long low_pfn, unsigned long end_pfn) | ||
259 | { | 225 | { |
260 | unsigned long low_pfn, end_pfn; | ||
261 | unsigned long last_pageblock_nr = 0, pageblock_nr; | 226 | unsigned long last_pageblock_nr = 0, pageblock_nr; |
262 | unsigned long nr_scanned = 0, nr_isolated = 0; | 227 | unsigned long nr_scanned = 0, nr_isolated = 0; |
263 | struct list_head *migratelist = &cc->migratepages; | 228 | struct list_head *migratelist = &cc->migratepages; |
264 | isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; | 229 | isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; |
265 | 230 | ||
266 | /* Do not scan outside zone boundaries */ | ||
267 | low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); | ||
268 | |||
269 | /* Only scan within a pageblock boundary */ | ||
270 | end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); | ||
271 | |||
272 | /* Do not cross the free scanner or scan within a memory hole */ | ||
273 | if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { | ||
274 | cc->migrate_pfn = end_pfn; | ||
275 | return ISOLATE_NONE; | ||
276 | } | ||
277 | |||
278 | /* | 231 | /* |
279 | * Ensure that there are not too many pages isolated from the LRU | 232 | * Ensure that there are not too many pages isolated from the LRU |
280 | * list by either parallel reclaimers or compaction. If there are, | 233 | * list by either parallel reclaimers or compaction. If there are, |
@@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
283 | while (unlikely(too_many_isolated(zone))) { | 236 | while (unlikely(too_many_isolated(zone))) { |
284 | /* async migration should just abort */ | 237 | /* async migration should just abort */ |
285 | if (!cc->sync) | 238 | if (!cc->sync) |
286 | return ISOLATE_ABORT; | 239 | return 0; |
287 | 240 | ||
288 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 241 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
289 | 242 | ||
290 | if (fatal_signal_pending(current)) | 243 | if (fatal_signal_pending(current)) |
291 | return ISOLATE_ABORT; | 244 | return 0; |
292 | } | 245 | } |
293 | 246 | ||
294 | /* Time to isolate some pages for migration */ | 247 | /* Time to isolate some pages for migration */ |
@@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
351 | */ | 304 | */ |
352 | pageblock_nr = low_pfn >> pageblock_order; | 305 | pageblock_nr = low_pfn >> pageblock_order; |
353 | if (!cc->sync && last_pageblock_nr != pageblock_nr && | 306 | if (!cc->sync && last_pageblock_nr != pageblock_nr && |
354 | get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { | 307 | !migrate_async_suitable(get_pageblock_migratetype(page))) { |
355 | low_pfn += pageblock_nr_pages; | 308 | low_pfn += pageblock_nr_pages; |
356 | low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; | 309 | low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; |
357 | last_pageblock_nr = pageblock_nr; | 310 | last_pageblock_nr = pageblock_nr; |
@@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
396 | acct_isolated(zone, cc); | 349 | acct_isolated(zone, cc); |
397 | 350 | ||
398 | spin_unlock_irq(&zone->lru_lock); | 351 | spin_unlock_irq(&zone->lru_lock); |
399 | cc->migrate_pfn = low_pfn; | ||
400 | 352 | ||
401 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); | 353 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); |
402 | 354 | ||
403 | return ISOLATE_SUCCESS; | 355 | return low_pfn; |
356 | } | ||
357 | |||
358 | #endif /* CONFIG_COMPACTION || CONFIG_CMA */ | ||
359 | #ifdef CONFIG_COMPACTION | ||
360 | |||
361 | /* Returns true if the page is within a block suitable for migration to */ | ||
362 | static bool suitable_migration_target(struct page *page) | ||
363 | { | ||
364 | |||
365 | int migratetype = get_pageblock_migratetype(page); | ||
366 | |||
367 | /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ | ||
368 | if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) | ||
369 | return false; | ||
370 | |||
371 | /* If the page is a large free page, then allow migration */ | ||
372 | if (PageBuddy(page) && page_order(page) >= pageblock_order) | ||
373 | return true; | ||
374 | |||
375 | /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ | ||
376 | if (migrate_async_suitable(migratetype)) | ||
377 | return true; | ||
378 | |||
379 | /* Otherwise skip the block */ | ||
380 | return false; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Based on information in the current compact_control, find blocks | ||
385 | * suitable for isolating free pages from and then isolate them. | ||
386 | */ | ||
387 | static void isolate_freepages(struct zone *zone, | ||
388 | struct compact_control *cc) | ||
389 | { | ||
390 | struct page *page; | ||
391 | unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; | ||
392 | unsigned long flags; | ||
393 | int nr_freepages = cc->nr_freepages; | ||
394 | struct list_head *freelist = &cc->freepages; | ||
395 | |||
396 | /* | ||
397 | * Initialise the free scanner. The starting point is where we last | ||
398 | * scanned from (or the end of the zone if starting). The low point | ||
399 | * is the end of the pageblock the migration scanner is using. | ||
400 | */ | ||
401 | pfn = cc->free_pfn; | ||
402 | low_pfn = cc->migrate_pfn + pageblock_nr_pages; | ||
403 | |||
404 | /* | ||
405 | * Take care that if the migration scanner is at the end of the zone | ||
406 | * that the free scanner does not accidentally move to the next zone | ||
407 | * in the next isolation cycle. | ||
408 | */ | ||
409 | high_pfn = min(low_pfn, pfn); | ||
410 | |||
411 | zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
412 | |||
413 | /* | ||
414 | * Isolate free pages until enough are available to migrate the | ||
415 | * pages on cc->migratepages. We stop searching if the migrate | ||
416 | * and free page scanners meet or enough free pages are isolated. | ||
417 | */ | ||
418 | for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; | ||
419 | pfn -= pageblock_nr_pages) { | ||
420 | unsigned long isolated; | ||
421 | |||
422 | if (!pfn_valid(pfn)) | ||
423 | continue; | ||
424 | |||
425 | /* | ||
426 | * Check for overlapping nodes/zones. It's possible on some | ||
427 | * configurations to have a setup like | ||
428 | * node0 node1 node0 | ||
429 | * i.e. it's possible that all pages within a zones range of | ||
430 | * pages do not belong to a single zone. | ||
431 | */ | ||
432 | page = pfn_to_page(pfn); | ||
433 | if (page_zone(page) != zone) | ||
434 | continue; | ||
435 | |||
436 | /* Check the block is suitable for migration */ | ||
437 | if (!suitable_migration_target(page)) | ||
438 | continue; | ||
439 | |||
440 | /* | ||
441 | * Found a block suitable for isolating free pages from. Now | ||
442 | * we disabled interrupts, double check things are ok and | ||
443 | * isolate the pages. This is to minimise the time IRQs | ||
444 | * are disabled | ||
445 | */ | ||
446 | isolated = 0; | ||
447 | spin_lock_irqsave(&zone->lock, flags); | ||
448 | if (suitable_migration_target(page)) { | ||
449 | end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); | ||
450 | isolated = isolate_freepages_block(pfn, end_pfn, | ||
451 | freelist, false); | ||
452 | nr_freepages += isolated; | ||
453 | } | ||
454 | spin_unlock_irqrestore(&zone->lock, flags); | ||
455 | |||
456 | /* | ||
457 | * Record the highest PFN we isolated pages from. When next | ||
458 | * looking for free pages, the search will restart here as | ||
459 | * page migration may have returned some pages to the allocator | ||
460 | */ | ||
461 | if (isolated) | ||
462 | high_pfn = max(high_pfn, pfn); | ||
463 | } | ||
464 | |||
465 | /* split_free_page does not map the pages */ | ||
466 | map_pages(freelist); | ||
467 | |||
468 | cc->free_pfn = high_pfn; | ||
469 | cc->nr_freepages = nr_freepages; | ||
404 | } | 470 | } |
405 | 471 | ||
406 | /* | 472 | /* |
@@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc) | |||
449 | cc->nr_freepages = nr_freepages; | 515 | cc->nr_freepages = nr_freepages; |
450 | } | 516 | } |
451 | 517 | ||
518 | /* possible outcome of isolate_migratepages */ | ||
519 | typedef enum { | ||
520 | ISOLATE_ABORT, /* Abort compaction now */ | ||
521 | ISOLATE_NONE, /* No pages isolated, continue scanning */ | ||
522 | ISOLATE_SUCCESS, /* Pages isolated, migrate */ | ||
523 | } isolate_migrate_t; | ||
524 | |||
525 | /* | ||
526 | * Isolate all pages that can be migrated from the block pointed to by | ||
527 | * the migrate scanner within compact_control. | ||
528 | */ | ||
529 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | ||
530 | struct compact_control *cc) | ||
531 | { | ||
532 | unsigned long low_pfn, end_pfn; | ||
533 | |||
534 | /* Do not scan outside zone boundaries */ | ||
535 | low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); | ||
536 | |||
537 | /* Only scan within a pageblock boundary */ | ||
538 | end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); | ||
539 | |||
540 | /* Do not cross the free scanner or scan within a memory hole */ | ||
541 | if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { | ||
542 | cc->migrate_pfn = end_pfn; | ||
543 | return ISOLATE_NONE; | ||
544 | } | ||
545 | |||
546 | /* Perform the isolation */ | ||
547 | low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); | ||
548 | if (!low_pfn) | ||
549 | return ISOLATE_ABORT; | ||
550 | |||
551 | cc->migrate_pfn = low_pfn; | ||
552 | |||
553 | return ISOLATE_SUCCESS; | ||
554 | } | ||
555 | |||
452 | static int compact_finished(struct zone *zone, | 556 | static int compact_finished(struct zone *zone, |
453 | struct compact_control *cc) | 557 | struct compact_control *cc) |
454 | { | 558 | { |
@@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node) | |||
795 | return device_remove_file(&node->dev, &dev_attr_compact); | 899 | return device_remove_file(&node->dev, &dev_attr_compact); |
796 | } | 900 | } |
797 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ | 901 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ |
902 | |||
903 | #endif /* CONFIG_COMPACTION */ | ||
diff --git a/mm/internal.h b/mm/internal.h index 2189af491783..aee4761cf9a9 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order); | |||
100 | extern bool is_free_buddy_page(struct page *page); | 100 | extern bool is_free_buddy_page(struct page *page); |
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
104 | |||
105 | /* | ||
106 | * in mm/compaction.c | ||
107 | */ | ||
108 | /* | ||
109 | * compact_control is used to track pages being migrated and the free pages | ||
110 | * they are being migrated to during memory compaction. The free_pfn starts | ||
111 | * at the end of a zone and migrate_pfn begins at the start. Movable pages | ||
112 | * are moved to the end of a zone during a compaction run and the run | ||
113 | * completes when free_pfn <= migrate_pfn | ||
114 | */ | ||
115 | struct compact_control { | ||
116 | struct list_head freepages; /* List of free pages to migrate to */ | ||
117 | struct list_head migratepages; /* List of pages being migrated */ | ||
118 | unsigned long nr_freepages; /* Number of isolated free pages */ | ||
119 | unsigned long nr_migratepages; /* Number of pages to migrate */ | ||
120 | unsigned long free_pfn; /* isolate_freepages search base */ | ||
121 | unsigned long migrate_pfn; /* isolate_migratepages search base */ | ||
122 | bool sync; /* Synchronous migration */ | ||
123 | |||
124 | int order; /* order a direct compactor needs */ | ||
125 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
126 | struct zone *zone; | ||
127 | }; | ||
128 | |||
129 | unsigned long | ||
130 | isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); | ||
131 | unsigned long | ||
132 | isolate_migratepages_range(struct zone *zone, struct compact_control *cc, | ||
133 | unsigned long low_pfn, unsigned long end_pfn); | ||
134 | |||
135 | #endif | ||
103 | 136 | ||
104 | /* | 137 | /* |
105 | * function for dealing with page's order in buddy system. | 138 | * function for dealing with page's order in buddy system. |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97cc2733551a..c99ad4e6b88c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1404 | /* Not a free page */ | 1404 | /* Not a free page */ |
1405 | ret = 1; | 1405 | ret = 1; |
1406 | } | 1406 | } |
1407 | unset_migratetype_isolate(p); | 1407 | unset_migratetype_isolate(p, MIGRATE_MOVABLE); |
1408 | unlock_memory_hotplug(); | 1408 | unlock_memory_hotplug(); |
1409 | return ret; | 1409 | return ret; |
1410 | } | 1410 | } |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6629fafd6ce4..fc898cb4fe8f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, | |||
891 | nr_pages = end_pfn - start_pfn; | 891 | nr_pages = end_pfn - start_pfn; |
892 | 892 | ||
893 | /* set above range as isolated */ | 893 | /* set above range as isolated */ |
894 | ret = start_isolate_page_range(start_pfn, end_pfn); | 894 | ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
895 | if (ret) | 895 | if (ret) |
896 | goto out; | 896 | goto out; |
897 | 897 | ||
@@ -956,7 +956,7 @@ repeat: | |||
956 | We cannot do rollback at this point. */ | 956 | We cannot do rollback at this point. */ |
957 | offline_isolated_pages(start_pfn, end_pfn); | 957 | offline_isolated_pages(start_pfn, end_pfn); |
958 | /* reset pagetype flags and makes migrate type to be MOVABLE */ | 958 | /* reset pagetype flags and makes migrate type to be MOVABLE */ |
959 | undo_isolate_page_range(start_pfn, end_pfn); | 959 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
960 | /* removal success */ | 960 | /* removal success */ |
961 | zone->present_pages -= offlined_pages; | 961 | zone->present_pages -= offlined_pages; |
962 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 962 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
@@ -981,7 +981,7 @@ failed_removal: | |||
981 | start_pfn, end_pfn); | 981 | start_pfn, end_pfn); |
982 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 982 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
983 | /* pushback to free area */ | 983 | /* pushback to free area */ |
984 | undo_isolate_page_range(start_pfn, end_pfn); | 984 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
985 | 985 | ||
986 | out: | 986 | out: |
987 | unlock_memory_hotplug(); | 987 | unlock_memory_hotplug(); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1851df600438..bab8e3bc4202 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/ftrace_event.h> | 57 | #include <linux/ftrace_event.h> |
58 | #include <linux/memcontrol.h> | 58 | #include <linux/memcontrol.h> |
59 | #include <linux/prefetch.h> | 59 | #include <linux/prefetch.h> |
60 | #include <linux/migrate.h> | ||
60 | #include <linux/page-debug-flags.h> | 61 | #include <linux/page-debug-flags.h> |
61 | 62 | ||
62 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
@@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
513 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's | 514 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's |
514 | * order is recorded in page_private(page) field. | 515 | * order is recorded in page_private(page) field. |
515 | * So when we are allocating or freeing one, we can derive the state of the | 516 | * So when we are allocating or freeing one, we can derive the state of the |
516 | * other. That is, if we allocate a small block, and both were | 517 | * other. That is, if we allocate a small block, and both were |
517 | * free, the remainder of the region must be split into blocks. | 518 | * free, the remainder of the region must be split into blocks. |
518 | * If a block is freed, and its buddy is also free, then this | 519 | * If a block is freed, and its buddy is also free, then this |
519 | * triggers coalescing into a block of larger size. | 520 | * triggers coalescing into a block of larger size. |
520 | * | 521 | * |
521 | * -- wli | 522 | * -- wli |
522 | */ | 523 | */ |
@@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | |||
749 | __free_pages(page, order); | 750 | __free_pages(page, order); |
750 | } | 751 | } |
751 | 752 | ||
753 | #ifdef CONFIG_CMA | ||
754 | /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ | ||
755 | void __init init_cma_reserved_pageblock(struct page *page) | ||
756 | { | ||
757 | unsigned i = pageblock_nr_pages; | ||
758 | struct page *p = page; | ||
759 | |||
760 | do { | ||
761 | __ClearPageReserved(p); | ||
762 | set_page_count(p, 0); | ||
763 | } while (++p, --i); | ||
764 | |||
765 | set_page_refcounted(page); | ||
766 | set_pageblock_migratetype(page, MIGRATE_CMA); | ||
767 | __free_pages(page, pageblock_order); | ||
768 | totalram_pages += pageblock_nr_pages; | ||
769 | } | ||
770 | #endif | ||
752 | 771 | ||
753 | /* | 772 | /* |
754 | * The order of subdivision here is critical for the IO subsystem. | 773 | * The order of subdivision here is critical for the IO subsystem. |
@@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
874 | * This array describes the order lists are fallen back to when | 893 | * This array describes the order lists are fallen back to when |
875 | * the free lists for the desirable migrate type are depleted | 894 | * the free lists for the desirable migrate type are depleted |
876 | */ | 895 | */ |
877 | static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { | 896 | static int fallbacks[MIGRATE_TYPES][4] = { |
878 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 897 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
879 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 898 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
880 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | 899 | #ifdef CONFIG_CMA |
881 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ | 900 | [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, |
901 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ | ||
902 | #else | ||
903 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
904 | #endif | ||
905 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ | ||
906 | [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ | ||
882 | }; | 907 | }; |
883 | 908 | ||
884 | /* | 909 | /* |
@@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
973 | /* Find the largest possible block of pages in the other list */ | 998 | /* Find the largest possible block of pages in the other list */ |
974 | for (current_order = MAX_ORDER-1; current_order >= order; | 999 | for (current_order = MAX_ORDER-1; current_order >= order; |
975 | --current_order) { | 1000 | --current_order) { |
976 | for (i = 0; i < MIGRATE_TYPES - 1; i++) { | 1001 | for (i = 0;; i++) { |
977 | migratetype = fallbacks[start_migratetype][i]; | 1002 | migratetype = fallbacks[start_migratetype][i]; |
978 | 1003 | ||
979 | /* MIGRATE_RESERVE handled later if necessary */ | 1004 | /* MIGRATE_RESERVE handled later if necessary */ |
980 | if (migratetype == MIGRATE_RESERVE) | 1005 | if (migratetype == MIGRATE_RESERVE) |
981 | continue; | 1006 | break; |
982 | 1007 | ||
983 | area = &(zone->free_area[current_order]); | 1008 | area = &(zone->free_area[current_order]); |
984 | if (list_empty(&area->free_list[migratetype])) | 1009 | if (list_empty(&area->free_list[migratetype])) |
@@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
993 | * pages to the preferred allocation list. If falling | 1018 | * pages to the preferred allocation list. If falling |
994 | * back for a reclaimable kernel allocation, be more | 1019 | * back for a reclaimable kernel allocation, be more |
995 | * aggressive about taking ownership of free pages | 1020 | * aggressive about taking ownership of free pages |
1021 | * | ||
1022 | * On the other hand, never change migration | ||
1023 | * type of MIGRATE_CMA pageblocks nor move CMA | ||
1024 | * pages on different free lists. We don't | ||
1025 | * want unmovable pages to be allocated from | ||
1026 | * MIGRATE_CMA areas. | ||
996 | */ | 1027 | */ |
997 | if (unlikely(current_order >= (pageblock_order >> 1)) || | 1028 | if (!is_migrate_cma(migratetype) && |
998 | start_migratetype == MIGRATE_RECLAIMABLE || | 1029 | (unlikely(current_order >= pageblock_order / 2) || |
999 | page_group_by_mobility_disabled) { | 1030 | start_migratetype == MIGRATE_RECLAIMABLE || |
1000 | unsigned long pages; | 1031 | page_group_by_mobility_disabled)) { |
1032 | int pages; | ||
1001 | pages = move_freepages_block(zone, page, | 1033 | pages = move_freepages_block(zone, page, |
1002 | start_migratetype); | 1034 | start_migratetype); |
1003 | 1035 | ||
@@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
1015 | rmv_page_order(page); | 1047 | rmv_page_order(page); |
1016 | 1048 | ||
1017 | /* Take ownership for orders >= pageblock_order */ | 1049 | /* Take ownership for orders >= pageblock_order */ |
1018 | if (current_order >= pageblock_order) | 1050 | if (current_order >= pageblock_order && |
1051 | !is_migrate_cma(migratetype)) | ||
1019 | change_pageblock_range(page, current_order, | 1052 | change_pageblock_range(page, current_order, |
1020 | start_migratetype); | 1053 | start_migratetype); |
1021 | 1054 | ||
1022 | expand(zone, page, order, current_order, area, migratetype); | 1055 | expand(zone, page, order, current_order, area, |
1056 | is_migrate_cma(migratetype) | ||
1057 | ? migratetype : start_migratetype); | ||
1023 | 1058 | ||
1024 | trace_mm_page_alloc_extfrag(page, order, current_order, | 1059 | trace_mm_page_alloc_extfrag(page, order, current_order, |
1025 | start_migratetype, migratetype); | 1060 | start_migratetype, migratetype); |
@@ -1061,17 +1096,17 @@ retry_reserve: | |||
1061 | return page; | 1096 | return page; |
1062 | } | 1097 | } |
1063 | 1098 | ||
1064 | /* | 1099 | /* |
1065 | * Obtain a specified number of elements from the buddy allocator, all under | 1100 | * Obtain a specified number of elements from the buddy allocator, all under |
1066 | * a single hold of the lock, for efficiency. Add them to the supplied list. | 1101 | * a single hold of the lock, for efficiency. Add them to the supplied list. |
1067 | * Returns the number of new pages which were placed at *list. | 1102 | * Returns the number of new pages which were placed at *list. |
1068 | */ | 1103 | */ |
1069 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 1104 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
1070 | unsigned long count, struct list_head *list, | 1105 | unsigned long count, struct list_head *list, |
1071 | int migratetype, int cold) | 1106 | int migratetype, int cold) |
1072 | { | 1107 | { |
1073 | int i; | 1108 | int mt = migratetype, i; |
1074 | 1109 | ||
1075 | spin_lock(&zone->lock); | 1110 | spin_lock(&zone->lock); |
1076 | for (i = 0; i < count; ++i) { | 1111 | for (i = 0; i < count; ++i) { |
1077 | struct page *page = __rmqueue(zone, order, migratetype); | 1112 | struct page *page = __rmqueue(zone, order, migratetype); |
@@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
1091 | list_add(&page->lru, list); | 1126 | list_add(&page->lru, list); |
1092 | else | 1127 | else |
1093 | list_add_tail(&page->lru, list); | 1128 | list_add_tail(&page->lru, list); |
1094 | set_page_private(page, migratetype); | 1129 | if (IS_ENABLED(CONFIG_CMA)) { |
1130 | mt = get_pageblock_migratetype(page); | ||
1131 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) | ||
1132 | mt = migratetype; | ||
1133 | } | ||
1134 | set_page_private(page, mt); | ||
1095 | list = &page->lru; | 1135 | list = &page->lru; |
1096 | } | 1136 | } |
1097 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 1137 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
@@ -1371,8 +1411,12 @@ int split_free_page(struct page *page) | |||
1371 | 1411 | ||
1372 | if (order >= pageblock_order - 1) { | 1412 | if (order >= pageblock_order - 1) { |
1373 | struct page *endpage = page + (1 << order) - 1; | 1413 | struct page *endpage = page + (1 << order) - 1; |
1374 | for (; page < endpage; page += pageblock_nr_pages) | 1414 | for (; page < endpage; page += pageblock_nr_pages) { |
1375 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 1415 | int mt = get_pageblock_migratetype(page); |
1416 | if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) | ||
1417 | set_pageblock_migratetype(page, | ||
1418 | MIGRATE_MOVABLE); | ||
1419 | } | ||
1376 | } | 1420 | } |
1377 | 1421 | ||
1378 | return 1 << order; | 1422 | return 1 << order; |
@@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2086 | } | 2130 | } |
2087 | #endif /* CONFIG_COMPACTION */ | 2131 | #endif /* CONFIG_COMPACTION */ |
2088 | 2132 | ||
2089 | /* The really slow allocator path where we enter direct reclaim */ | 2133 | /* Perform direct synchronous page reclaim */ |
2090 | static inline struct page * | 2134 | static int |
2091 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | 2135 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, |
2092 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2136 | nodemask_t *nodemask) |
2093 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
2094 | int migratetype, unsigned long *did_some_progress) | ||
2095 | { | 2137 | { |
2096 | struct page *page = NULL; | ||
2097 | struct reclaim_state reclaim_state; | 2138 | struct reclaim_state reclaim_state; |
2098 | bool drained = false; | 2139 | int progress; |
2099 | 2140 | ||
2100 | cond_resched(); | 2141 | cond_resched(); |
2101 | 2142 | ||
@@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
2106 | reclaim_state.reclaimed_slab = 0; | 2147 | reclaim_state.reclaimed_slab = 0; |
2107 | current->reclaim_state = &reclaim_state; | 2148 | current->reclaim_state = &reclaim_state; |
2108 | 2149 | ||
2109 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); | 2150 | progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); |
2110 | 2151 | ||
2111 | current->reclaim_state = NULL; | 2152 | current->reclaim_state = NULL; |
2112 | lockdep_clear_current_reclaim_state(); | 2153 | lockdep_clear_current_reclaim_state(); |
@@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
2114 | 2155 | ||
2115 | cond_resched(); | 2156 | cond_resched(); |
2116 | 2157 | ||
2158 | return progress; | ||
2159 | } | ||
2160 | |||
2161 | /* The really slow allocator path where we enter direct reclaim */ | ||
2162 | static inline struct page * | ||
2163 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | ||
2164 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
2165 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
2166 | int migratetype, unsigned long *did_some_progress) | ||
2167 | { | ||
2168 | struct page *page = NULL; | ||
2169 | bool drained = false; | ||
2170 | |||
2171 | *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
2172 | nodemask); | ||
2117 | if (unlikely(!(*did_some_progress))) | 2173 | if (unlikely(!(*did_some_progress))) |
2118 | return NULL; | 2174 | return NULL; |
2119 | 2175 | ||
@@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4301 | init_waitqueue_head(&pgdat->kswapd_wait); | 4357 | init_waitqueue_head(&pgdat->kswapd_wait); |
4302 | pgdat->kswapd_max_order = 0; | 4358 | pgdat->kswapd_max_order = 0; |
4303 | pgdat_page_cgroup_init(pgdat); | 4359 | pgdat_page_cgroup_init(pgdat); |
4304 | 4360 | ||
4305 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4361 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4306 | struct zone *zone = pgdat->node_zones + j; | 4362 | struct zone *zone = pgdat->node_zones + j; |
4307 | unsigned long size, realsize, memmap_pages; | 4363 | unsigned long size, realsize, memmap_pages; |
@@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
4976 | calculate_totalreserve_pages(); | 5032 | calculate_totalreserve_pages(); |
4977 | } | 5033 | } |
4978 | 5034 | ||
4979 | /** | 5035 | static void __setup_per_zone_wmarks(void) |
4980 | * setup_per_zone_wmarks - called when min_free_kbytes changes | ||
4981 | * or when memory is hot-{added|removed} | ||
4982 | * | ||
4983 | * Ensures that the watermark[min,low,high] values for each zone are set | ||
4984 | * correctly with respect to min_free_kbytes. | ||
4985 | */ | ||
4986 | void setup_per_zone_wmarks(void) | ||
4987 | { | 5036 | { |
4988 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); | 5037 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); |
4989 | unsigned long lowmem_pages = 0; | 5038 | unsigned long lowmem_pages = 0; |
@@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void) | |||
5030 | 5079 | ||
5031 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); | 5080 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); |
5032 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); | 5081 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); |
5082 | |||
5083 | zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); | ||
5084 | zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); | ||
5085 | zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); | ||
5086 | |||
5033 | setup_zone_migrate_reserve(zone); | 5087 | setup_zone_migrate_reserve(zone); |
5034 | spin_unlock_irqrestore(&zone->lock, flags); | 5088 | spin_unlock_irqrestore(&zone->lock, flags); |
5035 | } | 5089 | } |
@@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void) | |||
5038 | calculate_totalreserve_pages(); | 5092 | calculate_totalreserve_pages(); |
5039 | } | 5093 | } |
5040 | 5094 | ||
5095 | /** | ||
5096 | * setup_per_zone_wmarks - called when min_free_kbytes changes | ||
5097 | * or when memory is hot-{added|removed} | ||
5098 | * | ||
5099 | * Ensures that the watermark[min,low,high] values for each zone are set | ||
5100 | * correctly with respect to min_free_kbytes. | ||
5101 | */ | ||
5102 | void setup_per_zone_wmarks(void) | ||
5103 | { | ||
5104 | mutex_lock(&zonelists_mutex); | ||
5105 | __setup_per_zone_wmarks(); | ||
5106 | mutex_unlock(&zonelists_mutex); | ||
5107 | } | ||
5108 | |||
5041 | /* | 5109 | /* |
5042 | * The inactive anon list should be small enough that the VM never has to | 5110 | * The inactive anon list should be small enough that the VM never has to |
5043 | * do too much work, but large enough that each inactive page has a chance | 5111 | * do too much work, but large enough that each inactive page has a chance |
@@ -5415,14 +5483,16 @@ static int | |||
5415 | __count_immobile_pages(struct zone *zone, struct page *page, int count) | 5483 | __count_immobile_pages(struct zone *zone, struct page *page, int count) |
5416 | { | 5484 | { |
5417 | unsigned long pfn, iter, found; | 5485 | unsigned long pfn, iter, found; |
5486 | int mt; | ||
5487 | |||
5418 | /* | 5488 | /* |
5419 | * For avoiding noise data, lru_add_drain_all() should be called | 5489 | * For avoiding noise data, lru_add_drain_all() should be called |
5420 | * If ZONE_MOVABLE, the zone never contains immobile pages | 5490 | * If ZONE_MOVABLE, the zone never contains immobile pages |
5421 | */ | 5491 | */ |
5422 | if (zone_idx(zone) == ZONE_MOVABLE) | 5492 | if (zone_idx(zone) == ZONE_MOVABLE) |
5423 | return true; | 5493 | return true; |
5424 | 5494 | mt = get_pageblock_migratetype(page); | |
5425 | if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) | 5495 | if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) |
5426 | return true; | 5496 | return true; |
5427 | 5497 | ||
5428 | pfn = page_to_pfn(page); | 5498 | pfn = page_to_pfn(page); |
@@ -5539,7 +5609,7 @@ out: | |||
5539 | return ret; | 5609 | return ret; |
5540 | } | 5610 | } |
5541 | 5611 | ||
5542 | void unset_migratetype_isolate(struct page *page) | 5612 | void unset_migratetype_isolate(struct page *page, unsigned migratetype) |
5543 | { | 5613 | { |
5544 | struct zone *zone; | 5614 | struct zone *zone; |
5545 | unsigned long flags; | 5615 | unsigned long flags; |
@@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page) | |||
5547 | spin_lock_irqsave(&zone->lock, flags); | 5617 | spin_lock_irqsave(&zone->lock, flags); |
5548 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 5618 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
5549 | goto out; | 5619 | goto out; |
5550 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 5620 | set_pageblock_migratetype(page, migratetype); |
5551 | move_freepages_block(zone, page, MIGRATE_MOVABLE); | 5621 | move_freepages_block(zone, page, migratetype); |
5552 | out: | 5622 | out: |
5553 | spin_unlock_irqrestore(&zone->lock, flags); | 5623 | spin_unlock_irqrestore(&zone->lock, flags); |
5554 | } | 5624 | } |
5555 | 5625 | ||
5626 | #ifdef CONFIG_CMA | ||
5627 | |||
5628 | static unsigned long pfn_max_align_down(unsigned long pfn) | ||
5629 | { | ||
5630 | return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, | ||
5631 | pageblock_nr_pages) - 1); | ||
5632 | } | ||
5633 | |||
5634 | static unsigned long pfn_max_align_up(unsigned long pfn) | ||
5635 | { | ||
5636 | return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, | ||
5637 | pageblock_nr_pages)); | ||
5638 | } | ||
5639 | |||
5640 | static struct page * | ||
5641 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, | ||
5642 | int **resultp) | ||
5643 | { | ||
5644 | return alloc_page(GFP_HIGHUSER_MOVABLE); | ||
5645 | } | ||
5646 | |||
5647 | /* [start, end) must belong to a single zone. */ | ||
5648 | static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | ||
5649 | { | ||
5650 | /* This function is based on compact_zone() from compaction.c. */ | ||
5651 | |||
5652 | unsigned long pfn = start; | ||
5653 | unsigned int tries = 0; | ||
5654 | int ret = 0; | ||
5655 | |||
5656 | struct compact_control cc = { | ||
5657 | .nr_migratepages = 0, | ||
5658 | .order = -1, | ||
5659 | .zone = page_zone(pfn_to_page(start)), | ||
5660 | .sync = true, | ||
5661 | }; | ||
5662 | INIT_LIST_HEAD(&cc.migratepages); | ||
5663 | |||
5664 | migrate_prep_local(); | ||
5665 | |||
5666 | while (pfn < end || !list_empty(&cc.migratepages)) { | ||
5667 | if (fatal_signal_pending(current)) { | ||
5668 | ret = -EINTR; | ||
5669 | break; | ||
5670 | } | ||
5671 | |||
5672 | if (list_empty(&cc.migratepages)) { | ||
5673 | cc.nr_migratepages = 0; | ||
5674 | pfn = isolate_migratepages_range(cc.zone, &cc, | ||
5675 | pfn, end); | ||
5676 | if (!pfn) { | ||
5677 | ret = -EINTR; | ||
5678 | break; | ||
5679 | } | ||
5680 | tries = 0; | ||
5681 | } else if (++tries == 5) { | ||
5682 | ret = ret < 0 ? ret : -EBUSY; | ||
5683 | break; | ||
5684 | } | ||
5685 | |||
5686 | ret = migrate_pages(&cc.migratepages, | ||
5687 | __alloc_contig_migrate_alloc, | ||
5688 | 0, false, MIGRATE_SYNC); | ||
5689 | } | ||
5690 | |||
5691 | putback_lru_pages(&cc.migratepages); | ||
5692 | return ret > 0 ? 0 : ret; | ||
5693 | } | ||
5694 | |||
5695 | /* | ||
5696 | * Update zone's cma pages counter used for watermark level calculation. | ||
5697 | */ | ||
5698 | static inline void __update_cma_watermarks(struct zone *zone, int count) | ||
5699 | { | ||
5700 | unsigned long flags; | ||
5701 | spin_lock_irqsave(&zone->lock, flags); | ||
5702 | zone->min_cma_pages += count; | ||
5703 | spin_unlock_irqrestore(&zone->lock, flags); | ||
5704 | setup_per_zone_wmarks(); | ||
5705 | } | ||
5706 | |||
5707 | /* | ||
5708 | * Trigger memory pressure bump to reclaim some pages in order to be able to | ||
5709 | * allocate 'count' pages in single page units. Does similar work as | ||
5710 | *__alloc_pages_slowpath() function. | ||
5711 | */ | ||
5712 | static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | ||
5713 | { | ||
5714 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
5715 | struct zonelist *zonelist = node_zonelist(0, gfp_mask); | ||
5716 | int did_some_progress = 0; | ||
5717 | int order = 1; | ||
5718 | |||
5719 | /* | ||
5720 | * Increase level of watermarks to force kswapd do his job | ||
5721 | * to stabilise at new watermark level. | ||
5722 | */ | ||
5723 | __update_cma_watermarks(zone, count); | ||
5724 | |||
5725 | /* Obey watermarks as if the page was being allocated */ | ||
5726 | while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { | ||
5727 | wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); | ||
5728 | |||
5729 | did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
5730 | NULL); | ||
5731 | if (!did_some_progress) { | ||
5732 | /* Exhausted what can be done so it's blamo time */ | ||
5733 | out_of_memory(zonelist, gfp_mask, order, NULL, false); | ||
5734 | } | ||
5735 | } | ||
5736 | |||
5737 | /* Restore original watermark levels. */ | ||
5738 | __update_cma_watermarks(zone, -count); | ||
5739 | |||
5740 | return count; | ||
5741 | } | ||
5742 | |||
5743 | /** | ||
5744 | * alloc_contig_range() -- tries to allocate given range of pages | ||
5745 | * @start: start PFN to allocate | ||
5746 | * @end: one-past-the-last PFN to allocate | ||
5747 | * @migratetype: migratetype of the underlaying pageblocks (either | ||
5748 | * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks | ||
5749 | * in range must have the same migratetype and it must | ||
5750 | * be either of the two. | ||
5751 | * | ||
5752 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES | ||
5753 | * aligned, however it's the caller's responsibility to guarantee that | ||
5754 | * we are the only thread that changes migrate type of pageblocks the | ||
5755 | * pages fall in. | ||
5756 | * | ||
5757 | * The PFN range must belong to a single zone. | ||
5758 | * | ||
5759 | * Returns zero on success or negative error code. On success all | ||
5760 | * pages which PFN is in [start, end) are allocated for the caller and | ||
5761 | * need to be freed with free_contig_range(). | ||
5762 | */ | ||
5763 | int alloc_contig_range(unsigned long start, unsigned long end, | ||
5764 | unsigned migratetype) | ||
5765 | { | ||
5766 | struct zone *zone = page_zone(pfn_to_page(start)); | ||
5767 | unsigned long outer_start, outer_end; | ||
5768 | int ret = 0, order; | ||
5769 | |||
5770 | /* | ||
5771 | * What we do here is we mark all pageblocks in range as | ||
5772 | * MIGRATE_ISOLATE. Because pageblock and max order pages may | ||
5773 | * have different sizes, and due to the way page allocator | ||
5774 | * work, we align the range to biggest of the two pages so | ||
5775 | * that page allocator won't try to merge buddies from | ||
5776 | * different pageblocks and change MIGRATE_ISOLATE to some | ||
5777 | * other migration type. | ||
5778 | * | ||
5779 | * Once the pageblocks are marked as MIGRATE_ISOLATE, we | ||
5780 | * migrate the pages from an unaligned range (ie. pages that | ||
5781 | * we are interested in). This will put all the pages in | ||
5782 | * range back to page allocator as MIGRATE_ISOLATE. | ||
5783 | * | ||
5784 | * When this is done, we take the pages in range from page | ||
5785 | * allocator removing them from the buddy system. This way | ||
5786 | * page allocator will never consider using them. | ||
5787 | * | ||
5788 | * This lets us mark the pageblocks back as | ||
5789 | * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the | ||
5790 | * aligned range but not in the unaligned, original range are | ||
5791 | * put back to page allocator so that buddy can use them. | ||
5792 | */ | ||
5793 | |||
5794 | ret = start_isolate_page_range(pfn_max_align_down(start), | ||
5795 | pfn_max_align_up(end), migratetype); | ||
5796 | if (ret) | ||
5797 | goto done; | ||
5798 | |||
5799 | ret = __alloc_contig_migrate_range(start, end); | ||
5800 | if (ret) | ||
5801 | goto done; | ||
5802 | |||
5803 | /* | ||
5804 | * Pages from [start, end) are within a MAX_ORDER_NR_PAGES | ||
5805 | * aligned blocks that are marked as MIGRATE_ISOLATE. What's | ||
5806 | * more, all pages in [start, end) are free in page allocator. | ||
5807 | * What we are going to do is to allocate all pages from | ||
5808 | * [start, end) (that is remove them from page allocator). | ||
5809 | * | ||
5810 | * The only problem is that pages at the beginning and at the | ||
5811 | * end of interesting range may be not aligned with pages that | ||
5812 | * page allocator holds, ie. they can be part of higher order | ||
5813 | * pages. Because of this, we reserve the bigger range and | ||
5814 | * once this is done free the pages we are not interested in. | ||
5815 | * | ||
5816 | * We don't have to hold zone->lock here because the pages are | ||
5817 | * isolated thus they won't get removed from buddy. | ||
5818 | */ | ||
5819 | |||
5820 | lru_add_drain_all(); | ||
5821 | drain_all_pages(); | ||
5822 | |||
5823 | order = 0; | ||
5824 | outer_start = start; | ||
5825 | while (!PageBuddy(pfn_to_page(outer_start))) { | ||
5826 | if (++order >= MAX_ORDER) { | ||
5827 | ret = -EBUSY; | ||
5828 | goto done; | ||
5829 | } | ||
5830 | outer_start &= ~0UL << order; | ||
5831 | } | ||
5832 | |||
5833 | /* Make sure the range is really isolated. */ | ||
5834 | if (test_pages_isolated(outer_start, end)) { | ||
5835 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | ||
5836 | outer_start, end); | ||
5837 | ret = -EBUSY; | ||
5838 | goto done; | ||
5839 | } | ||
5840 | |||
5841 | /* | ||
5842 | * Reclaim enough pages to make sure that contiguous allocation | ||
5843 | * will not starve the system. | ||
5844 | */ | ||
5845 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | ||
5846 | |||
5847 | /* Grab isolated pages from freelists. */ | ||
5848 | outer_end = isolate_freepages_range(outer_start, end); | ||
5849 | if (!outer_end) { | ||
5850 | ret = -EBUSY; | ||
5851 | goto done; | ||
5852 | } | ||
5853 | |||
5854 | /* Free head and tail (if any) */ | ||
5855 | if (start != outer_start) | ||
5856 | free_contig_range(outer_start, start - outer_start); | ||
5857 | if (end != outer_end) | ||
5858 | free_contig_range(end, outer_end - end); | ||
5859 | |||
5860 | done: | ||
5861 | undo_isolate_page_range(pfn_max_align_down(start), | ||
5862 | pfn_max_align_up(end), migratetype); | ||
5863 | return ret; | ||
5864 | } | ||
5865 | |||
5866 | void free_contig_range(unsigned long pfn, unsigned nr_pages) | ||
5867 | { | ||
5868 | for (; nr_pages--; ++pfn) | ||
5869 | __free_page(pfn_to_page(pfn)); | ||
5870 | } | ||
5871 | #endif | ||
5872 | |||
5556 | #ifdef CONFIG_MEMORY_HOTREMOVE | 5873 | #ifdef CONFIG_MEMORY_HOTREMOVE |
5557 | /* | 5874 | /* |
5558 | * All pages in the range must be isolated before calling this. | 5875 | * All pages in the range must be isolated before calling this. |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4ae42bb40892..c9f04774f2b8 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) | |||
24 | * to be MIGRATE_ISOLATE. | 24 | * to be MIGRATE_ISOLATE. |
25 | * @start_pfn: The lower PFN of the range to be isolated. | 25 | * @start_pfn: The lower PFN of the range to be isolated. |
26 | * @end_pfn: The upper PFN of the range to be isolated. | 26 | * @end_pfn: The upper PFN of the range to be isolated. |
27 | * @migratetype: migrate type to set in error recovery. | ||
27 | * | 28 | * |
28 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in | 29 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
29 | * the range will never be allocated. Any free pages and pages freed in the | 30 | * the range will never be allocated. Any free pages and pages freed in the |
@@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) | |||
32 | * start_pfn/end_pfn must be aligned to pageblock_order. | 33 | * start_pfn/end_pfn must be aligned to pageblock_order. |
33 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. | 34 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. |
34 | */ | 35 | */ |
35 | int | 36 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
36 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | 37 | unsigned migratetype) |
37 | { | 38 | { |
38 | unsigned long pfn; | 39 | unsigned long pfn; |
39 | unsigned long undo_pfn; | 40 | unsigned long undo_pfn; |
@@ -56,7 +57,7 @@ undo: | |||
56 | for (pfn = start_pfn; | 57 | for (pfn = start_pfn; |
57 | pfn < undo_pfn; | 58 | pfn < undo_pfn; |
58 | pfn += pageblock_nr_pages) | 59 | pfn += pageblock_nr_pages) |
59 | unset_migratetype_isolate(pfn_to_page(pfn)); | 60 | unset_migratetype_isolate(pfn_to_page(pfn), migratetype); |
60 | 61 | ||
61 | return -EBUSY; | 62 | return -EBUSY; |
62 | } | 63 | } |
@@ -64,8 +65,8 @@ undo: | |||
64 | /* | 65 | /* |
65 | * Make isolated pages available again. | 66 | * Make isolated pages available again. |
66 | */ | 67 | */ |
67 | int | 68 | int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
68 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | 69 | unsigned migratetype) |
69 | { | 70 | { |
70 | unsigned long pfn; | 71 | unsigned long pfn; |
71 | struct page *page; | 72 | struct page *page; |
@@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | |||
77 | page = __first_valid_page(pfn, pageblock_nr_pages); | 78 | page = __first_valid_page(pfn, pageblock_nr_pages); |
78 | if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 79 | if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
79 | continue; | 80 | continue; |
80 | unset_migratetype_isolate(page); | 81 | unset_migratetype_isolate(page, migratetype); |
81 | } | 82 | } |
82 | return 0; | 83 | return 0; |
83 | } | 84 | } |
@@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | |||
86 | * all pages in [start_pfn...end_pfn) must be in the same zone. | 87 | * all pages in [start_pfn...end_pfn) must be in the same zone. |
87 | * zone->lock must be held before call this. | 88 | * zone->lock must be held before call this. |
88 | * | 89 | * |
89 | * Returns 1 if all pages in the range is isolated. | 90 | * Returns 1 if all pages in the range are isolated. |
90 | */ | 91 | */ |
91 | static int | 92 | static int |
92 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) | 93 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 7db1b9bab492..0dad31dc1618 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { | |||
613 | "Reclaimable", | 613 | "Reclaimable", |
614 | "Movable", | 614 | "Movable", |
615 | "Reserve", | 615 | "Reserve", |
616 | #ifdef CONFIG_CMA | ||
617 | "CMA", | ||
618 | #endif | ||
616 | "Isolate", | 619 | "Isolate", |
617 | }; | 620 | }; |
618 | 621 | ||