diff options
41 files changed, 2898 insertions, 780 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b69cfdc12112..f1959b7d13d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 508 | Also note the kernel might malfunction if you disable | 508 | Also note the kernel might malfunction if you disable |
| 509 | some critical bits. | 509 | some critical bits. |
| 510 | 510 | ||
| 511 | cma=nn[MG] [ARM,KNL] | ||
| 512 | Sets the size of kernel global memory area for contiguous | ||
| 513 | memory allocations. For more information, see | ||
| 514 | include/linux/dma-contiguous.h | ||
| 515 | |||
| 511 | cmo_free_hint= [PPC] Format: { yes | no } | 516 | cmo_free_hint= [PPC] Format: { yes | no } |
| 512 | Specify whether pages are marked as being inactive | 517 | Specify whether pages are marked as being inactive |
| 513 | when they are freed. This is used in CMO environments | 518 | when they are freed. This is used in CMO environments |
| @@ -515,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 515 | a hypervisor. | 520 | a hypervisor. |
| 516 | Default: yes | 521 | Default: yes |
| 517 | 522 | ||
| 523 | coherent_pool=nn[KMG] [ARM,KNL] | ||
| 524 | Sets the size of memory pool for coherent, atomic dma | ||
| 525 | allocations if Contiguous Memory Allocator (CMA) is used. | ||
| 526 | |||
| 518 | code_bytes [X86] How many bytes of object code to print | 527 | code_bytes [X86] How many bytes of object code to print |
| 519 | in an oops report. | 528 | in an oops report. |
| 520 | Range: 0 - 8192 | 529 | Range: 0 - 8192 |
diff --git a/arch/Kconfig b/arch/Kconfig index e9a910876cda..8c3d957fa8e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -159,6 +159,9 @@ config HAVE_ARCH_TRACEHOOK | |||
| 159 | config HAVE_DMA_ATTRS | 159 | config HAVE_DMA_ATTRS |
| 160 | bool | 160 | bool |
| 161 | 161 | ||
| 162 | config HAVE_DMA_CONTIGUOUS | ||
| 163 | bool | ||
| 164 | |||
| 162 | config USE_GENERIC_SMP_HELPERS | 165 | config USE_GENERIC_SMP_HELPERS |
| 163 | bool | 166 | bool |
| 164 | 167 | ||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5458aa9db067..3ca1ba981efb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
| @@ -5,6 +5,9 @@ config ARM | |||
| 5 | select HAVE_AOUT | 5 | select HAVE_AOUT |
| 6 | select HAVE_DMA_API_DEBUG | 6 | select HAVE_DMA_API_DEBUG |
| 7 | select HAVE_IDE if PCI || ISA || PCMCIA | 7 | select HAVE_IDE if PCI || ISA || PCMCIA |
| 8 | select HAVE_DMA_ATTRS | ||
| 9 | select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) | ||
| 10 | select CMA if (CPU_V6 || CPU_V6K || CPU_V7) | ||
| 8 | select HAVE_MEMBLOCK | 11 | select HAVE_MEMBLOCK |
| 9 | select RTC_LIB | 12 | select RTC_LIB |
| 10 | select SYS_SUPPORTS_APM_EMULATION | 13 | select SYS_SUPPORTS_APM_EMULATION |
| @@ -54,6 +57,14 @@ config ARM | |||
| 54 | config ARM_HAS_SG_CHAIN | 57 | config ARM_HAS_SG_CHAIN |
| 55 | bool | 58 | bool |
| 56 | 59 | ||
| 60 | config NEED_SG_DMA_LENGTH | ||
| 61 | bool | ||
| 62 | |||
| 63 | config ARM_DMA_USE_IOMMU | ||
| 64 | select NEED_SG_DMA_LENGTH | ||
| 65 | select ARM_HAS_SG_CHAIN | ||
| 66 | bool | ||
| 67 | |||
| 57 | config HAVE_PWM | 68 | config HAVE_PWM |
| 58 | bool | 69 | bool |
| 59 | 70 | ||
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 595ecd290ebf..9d7eb530f95f 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c | |||
| @@ -173,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_ | |||
| 173 | read_lock_irqsave(&device_info->lock, flags); | 173 | read_lock_irqsave(&device_info->lock, flags); |
| 174 | 174 | ||
| 175 | list_for_each_entry(b, &device_info->safe_buffers, node) | 175 | list_for_each_entry(b, &device_info->safe_buffers, node) |
| 176 | if (b->safe_dma_addr == safe_dma_addr) { | 176 | if (b->safe_dma_addr <= safe_dma_addr && |
| 177 | b->safe_dma_addr + b->size > safe_dma_addr) { | ||
| 177 | rb = b; | 178 | rb = b; |
| 178 | break; | 179 | break; |
| 179 | } | 180 | } |
| @@ -254,7 +255,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, | |||
| 254 | if (buf == NULL) { | 255 | if (buf == NULL) { |
| 255 | dev_err(dev, "%s: unable to map unsafe buffer %p!\n", | 256 | dev_err(dev, "%s: unable to map unsafe buffer %p!\n", |
| 256 | __func__, ptr); | 257 | __func__, ptr); |
| 257 | return ~0; | 258 | return DMA_ERROR_CODE; |
| 258 | } | 259 | } |
| 259 | 260 | ||
| 260 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 261 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
| @@ -307,8 +308,9 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf, | |||
| 307 | * substitute the safe buffer for the unsafe one. | 308 | * substitute the safe buffer for the unsafe one. |
| 308 | * (basically move the buffer from an unsafe area to a safe one) | 309 | * (basically move the buffer from an unsafe area to a safe one) |
| 309 | */ | 310 | */ |
| 310 | dma_addr_t __dma_map_page(struct device *dev, struct page *page, | 311 | static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, |
| 311 | unsigned long offset, size_t size, enum dma_data_direction dir) | 312 | unsigned long offset, size_t size, enum dma_data_direction dir, |
| 313 | struct dma_attrs *attrs) | ||
| 312 | { | 314 | { |
| 313 | dma_addr_t dma_addr; | 315 | dma_addr_t dma_addr; |
| 314 | int ret; | 316 | int ret; |
| @@ -320,21 +322,20 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, | |||
| 320 | 322 | ||
| 321 | ret = needs_bounce(dev, dma_addr, size); | 323 | ret = needs_bounce(dev, dma_addr, size); |
| 322 | if (ret < 0) | 324 | if (ret < 0) |
| 323 | return ~0; | 325 | return DMA_ERROR_CODE; |
| 324 | 326 | ||
| 325 | if (ret == 0) { | 327 | if (ret == 0) { |
| 326 | __dma_page_cpu_to_dev(page, offset, size, dir); | 328 | arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); |
| 327 | return dma_addr; | 329 | return dma_addr; |
| 328 | } | 330 | } |
| 329 | 331 | ||
| 330 | if (PageHighMem(page)) { | 332 | if (PageHighMem(page)) { |
| 331 | dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); | 333 | dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); |
| 332 | return ~0; | 334 | return DMA_ERROR_CODE; |
| 333 | } | 335 | } |
| 334 | 336 | ||
| 335 | return map_single(dev, page_address(page) + offset, size, dir); | 337 | return map_single(dev, page_address(page) + offset, size, dir); |
| 336 | } | 338 | } |
| 337 | EXPORT_SYMBOL(__dma_map_page); | ||
| 338 | 339 | ||
| 339 | /* | 340 | /* |
| 340 | * see if a mapped address was really a "safe" buffer and if so, copy | 341 | * see if a mapped address was really a "safe" buffer and if so, copy |
| @@ -342,8 +343,8 @@ EXPORT_SYMBOL(__dma_map_page); | |||
| 342 | * the safe buffer. (basically return things back to the way they | 343 | * the safe buffer. (basically return things back to the way they |
| 343 | * should be) | 344 | * should be) |
| 344 | */ | 345 | */ |
| 345 | void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | 346 | static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, |
| 346 | enum dma_data_direction dir) | 347 | enum dma_data_direction dir, struct dma_attrs *attrs) |
| 347 | { | 348 | { |
| 348 | struct safe_buffer *buf; | 349 | struct safe_buffer *buf; |
| 349 | 350 | ||
| @@ -352,19 +353,18 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
| 352 | 353 | ||
| 353 | buf = find_safe_buffer_dev(dev, dma_addr, __func__); | 354 | buf = find_safe_buffer_dev(dev, dma_addr, __func__); |
| 354 | if (!buf) { | 355 | if (!buf) { |
| 355 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)), | 356 | arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); |
| 356 | dma_addr & ~PAGE_MASK, size, dir); | ||
| 357 | return; | 357 | return; |
| 358 | } | 358 | } |
| 359 | 359 | ||
| 360 | unmap_single(dev, buf, size, dir); | 360 | unmap_single(dev, buf, size, dir); |
| 361 | } | 361 | } |
| 362 | EXPORT_SYMBOL(__dma_unmap_page); | ||
| 363 | 362 | ||
| 364 | int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | 363 | static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, |
| 365 | unsigned long off, size_t sz, enum dma_data_direction dir) | 364 | size_t sz, enum dma_data_direction dir) |
| 366 | { | 365 | { |
| 367 | struct safe_buffer *buf; | 366 | struct safe_buffer *buf; |
| 367 | unsigned long off; | ||
| 368 | 368 | ||
| 369 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", | 369 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", |
| 370 | __func__, addr, off, sz, dir); | 370 | __func__, addr, off, sz, dir); |
| @@ -373,6 +373,8 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | |||
| 373 | if (!buf) | 373 | if (!buf) |
| 374 | return 1; | 374 | return 1; |
| 375 | 375 | ||
| 376 | off = addr - buf->safe_dma_addr; | ||
| 377 | |||
| 376 | BUG_ON(buf->direction != dir); | 378 | BUG_ON(buf->direction != dir); |
| 377 | 379 | ||
| 378 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 380 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
| @@ -388,12 +390,21 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, | |||
| 388 | } | 390 | } |
| 389 | return 0; | 391 | return 0; |
| 390 | } | 392 | } |
| 391 | EXPORT_SYMBOL(dmabounce_sync_for_cpu); | ||
| 392 | 393 | ||
| 393 | int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | 394 | static void dmabounce_sync_for_cpu(struct device *dev, |
| 394 | unsigned long off, size_t sz, enum dma_data_direction dir) | 395 | dma_addr_t handle, size_t size, enum dma_data_direction dir) |
| 396 | { | ||
| 397 | if (!__dmabounce_sync_for_cpu(dev, handle, size, dir)) | ||
| 398 | return; | ||
| 399 | |||
| 400 | arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir); | ||
| 401 | } | ||
| 402 | |||
| 403 | static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | ||
| 404 | size_t sz, enum dma_data_direction dir) | ||
| 395 | { | 405 | { |
| 396 | struct safe_buffer *buf; | 406 | struct safe_buffer *buf; |
| 407 | unsigned long off; | ||
| 397 | 408 | ||
| 398 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", | 409 | dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", |
| 399 | __func__, addr, off, sz, dir); | 410 | __func__, addr, off, sz, dir); |
| @@ -402,6 +413,8 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | |||
| 402 | if (!buf) | 413 | if (!buf) |
| 403 | return 1; | 414 | return 1; |
| 404 | 415 | ||
| 416 | off = addr - buf->safe_dma_addr; | ||
| 417 | |||
| 405 | BUG_ON(buf->direction != dir); | 418 | BUG_ON(buf->direction != dir); |
| 406 | 419 | ||
| 407 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", | 420 | dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", |
| @@ -417,7 +430,38 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, | |||
| 417 | } | 430 | } |
| 418 | return 0; | 431 | return 0; |
| 419 | } | 432 | } |
| 420 | EXPORT_SYMBOL(dmabounce_sync_for_device); | 433 | |
| 434 | static void dmabounce_sync_for_device(struct device *dev, | ||
| 435 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 436 | { | ||
| 437 | if (!__dmabounce_sync_for_device(dev, handle, size, dir)) | ||
| 438 | return; | ||
| 439 | |||
| 440 | arm_dma_ops.sync_single_for_device(dev, handle, size, dir); | ||
| 441 | } | ||
| 442 | |||
| 443 | static int dmabounce_set_mask(struct device *dev, u64 dma_mask) | ||
| 444 | { | ||
| 445 | if (dev->archdata.dmabounce) | ||
| 446 | return 0; | ||
| 447 | |||
| 448 | return arm_dma_ops.set_dma_mask(dev, dma_mask); | ||
| 449 | } | ||
| 450 | |||
| 451 | static struct dma_map_ops dmabounce_ops = { | ||
| 452 | .alloc = arm_dma_alloc, | ||
| 453 | .free = arm_dma_free, | ||
| 454 | .mmap = arm_dma_mmap, | ||
| 455 | .map_page = dmabounce_map_page, | ||
| 456 | .unmap_page = dmabounce_unmap_page, | ||
| 457 | .sync_single_for_cpu = dmabounce_sync_for_cpu, | ||
| 458 | .sync_single_for_device = dmabounce_sync_for_device, | ||
| 459 | .map_sg = arm_dma_map_sg, | ||
| 460 | .unmap_sg = arm_dma_unmap_sg, | ||
| 461 | .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, | ||
| 462 | .sync_sg_for_device = arm_dma_sync_sg_for_device, | ||
| 463 | .set_dma_mask = dmabounce_set_mask, | ||
| 464 | }; | ||
| 421 | 465 | ||
| 422 | static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, | 466 | static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, |
| 423 | const char *name, unsigned long size) | 467 | const char *name, unsigned long size) |
| @@ -479,6 +523,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size, | |||
| 479 | #endif | 523 | #endif |
| 480 | 524 | ||
| 481 | dev->archdata.dmabounce = device_info; | 525 | dev->archdata.dmabounce = device_info; |
| 526 | set_dma_ops(dev, &dmabounce_ops); | ||
| 482 | 527 | ||
| 483 | dev_info(dev, "dmabounce: registered device\n"); | 528 | dev_info(dev, "dmabounce: registered device\n"); |
| 484 | 529 | ||
| @@ -497,6 +542,7 @@ void dmabounce_unregister_dev(struct device *dev) | |||
| 497 | struct dmabounce_device_info *device_info = dev->archdata.dmabounce; | 542 | struct dmabounce_device_info *device_info = dev->archdata.dmabounce; |
| 498 | 543 | ||
| 499 | dev->archdata.dmabounce = NULL; | 544 | dev->archdata.dmabounce = NULL; |
| 545 | set_dma_ops(dev, NULL); | ||
| 500 | 546 | ||
| 501 | if (!device_info) { | 547 | if (!device_info) { |
| 502 | dev_warn(dev, | 548 | dev_warn(dev, |
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 7aa368003b05..b69c0d3285f8 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h | |||
| @@ -7,12 +7,16 @@ | |||
| 7 | #define ASMARM_DEVICE_H | 7 | #define ASMARM_DEVICE_H |
| 8 | 8 | ||
| 9 | struct dev_archdata { | 9 | struct dev_archdata { |
| 10 | struct dma_map_ops *dma_ops; | ||
| 10 | #ifdef CONFIG_DMABOUNCE | 11 | #ifdef CONFIG_DMABOUNCE |
| 11 | struct dmabounce_device_info *dmabounce; | 12 | struct dmabounce_device_info *dmabounce; |
| 12 | #endif | 13 | #endif |
| 13 | #ifdef CONFIG_IOMMU_API | 14 | #ifdef CONFIG_IOMMU_API |
| 14 | void *iommu; /* private IOMMU data */ | 15 | void *iommu; /* private IOMMU data */ |
| 15 | #endif | 16 | #endif |
| 17 | #ifdef CONFIG_ARM_DMA_USE_IOMMU | ||
| 18 | struct dma_iommu_mapping *mapping; | ||
| 19 | #endif | ||
| 16 | }; | 20 | }; |
| 17 | 21 | ||
| 18 | struct omap_device; | 22 | struct omap_device; |
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..3ed37b4d93da --- /dev/null +++ b/arch/arm/include/asm/dma-contiguous.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #ifndef ASMARM_DMA_CONTIGUOUS_H | ||
| 2 | #define ASMARM_DMA_CONTIGUOUS_H | ||
| 3 | |||
| 4 | #ifdef __KERNEL__ | ||
| 5 | #ifdef CONFIG_CMA | ||
| 6 | |||
| 7 | #include <linux/types.h> | ||
| 8 | #include <asm-generic/dma-contiguous.h> | ||
| 9 | |||
| 10 | void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); | ||
| 11 | |||
| 12 | #endif | ||
| 13 | #endif | ||
| 14 | |||
| 15 | #endif | ||
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h new file mode 100644 index 000000000000..799b09409fad --- /dev/null +++ b/arch/arm/include/asm/dma-iommu.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | #ifndef ASMARM_DMA_IOMMU_H | ||
| 2 | #define ASMARM_DMA_IOMMU_H | ||
| 3 | |||
| 4 | #ifdef __KERNEL__ | ||
| 5 | |||
| 6 | #include <linux/mm_types.h> | ||
| 7 | #include <linux/scatterlist.h> | ||
| 8 | #include <linux/dma-debug.h> | ||
| 9 | #include <linux/kmemcheck.h> | ||
| 10 | |||
| 11 | struct dma_iommu_mapping { | ||
| 12 | /* iommu specific data */ | ||
| 13 | struct iommu_domain *domain; | ||
| 14 | |||
| 15 | void *bitmap; | ||
| 16 | size_t bits; | ||
| 17 | unsigned int order; | ||
| 18 | dma_addr_t base; | ||
| 19 | |||
| 20 | spinlock_t lock; | ||
| 21 | struct kref kref; | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct dma_iommu_mapping * | ||
| 25 | arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, | ||
| 26 | int order); | ||
| 27 | |||
| 28 | void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); | ||
| 29 | |||
| 30 | int arm_iommu_attach_device(struct device *dev, | ||
| 31 | struct dma_iommu_mapping *mapping); | ||
| 32 | |||
| 33 | #endif /* __KERNEL__ */ | ||
| 34 | #endif | ||
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c981c4b..bbef15d04890 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h | |||
| @@ -5,11 +5,35 @@ | |||
| 5 | 5 | ||
| 6 | #include <linux/mm_types.h> | 6 | #include <linux/mm_types.h> |
| 7 | #include <linux/scatterlist.h> | 7 | #include <linux/scatterlist.h> |
| 8 | #include <linux/dma-attrs.h> | ||
| 8 | #include <linux/dma-debug.h> | 9 | #include <linux/dma-debug.h> |
| 9 | 10 | ||
| 10 | #include <asm-generic/dma-coherent.h> | 11 | #include <asm-generic/dma-coherent.h> |
| 11 | #include <asm/memory.h> | 12 | #include <asm/memory.h> |
| 12 | 13 | ||
| 14 | #define DMA_ERROR_CODE (~0) | ||
| 15 | extern struct dma_map_ops arm_dma_ops; | ||
| 16 | |||
| 17 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) | ||
| 18 | { | ||
| 19 | if (dev && dev->archdata.dma_ops) | ||
| 20 | return dev->archdata.dma_ops; | ||
| 21 | return &arm_dma_ops; | ||
| 22 | } | ||
| 23 | |||
| 24 | static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) | ||
| 25 | { | ||
| 26 | BUG_ON(!dev); | ||
| 27 | dev->archdata.dma_ops = ops; | ||
| 28 | } | ||
| 29 | |||
| 30 | #include <asm-generic/dma-mapping-common.h> | ||
| 31 | |||
| 32 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
| 33 | { | ||
| 34 | return get_dma_ops(dev)->set_dma_mask(dev, mask); | ||
| 35 | } | ||
| 36 | |||
| 13 | #ifdef __arch_page_to_dma | 37 | #ifdef __arch_page_to_dma |
| 14 | #error Please update to __arch_pfn_to_dma | 38 | #error Please update to __arch_pfn_to_dma |
| 15 | #endif | 39 | #endif |
| @@ -62,68 +86,11 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) | |||
| 62 | #endif | 86 | #endif |
| 63 | 87 | ||
| 64 | /* | 88 | /* |
| 65 | * The DMA API is built upon the notion of "buffer ownership". A buffer | ||
| 66 | * is either exclusively owned by the CPU (and therefore may be accessed | ||
| 67 | * by it) or exclusively owned by the DMA device. These helper functions | ||
| 68 | * represent the transitions between these two ownership states. | ||
| 69 | * | ||
| 70 | * Note, however, that on later ARMs, this notion does not work due to | ||
| 71 | * speculative prefetches. We model our approach on the assumption that | ||
| 72 | * the CPU does do speculative prefetches, which means we clean caches | ||
| 73 | * before transfers and delay cache invalidation until transfer completion. | ||
| 74 | * | ||
| 75 | * Private support functions: these are not part of the API and are | ||
| 76 | * liable to change. Drivers must not use these. | ||
| 77 | */ | ||
| 78 | static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, | ||
| 79 | enum dma_data_direction dir) | ||
| 80 | { | ||
| 81 | extern void ___dma_single_cpu_to_dev(const void *, size_t, | ||
| 82 | enum dma_data_direction); | ||
| 83 | |||
| 84 | if (!arch_is_coherent()) | ||
| 85 | ___dma_single_cpu_to_dev(kaddr, size, dir); | ||
| 86 | } | ||
| 87 | |||
| 88 | static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, | ||
| 89 | enum dma_data_direction dir) | ||
| 90 | { | ||
| 91 | extern void ___dma_single_dev_to_cpu(const void *, size_t, | ||
| 92 | enum dma_data_direction); | ||
| 93 | |||
| 94 | if (!arch_is_coherent()) | ||
| 95 | ___dma_single_dev_to_cpu(kaddr, size, dir); | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, | ||
| 99 | size_t size, enum dma_data_direction dir) | ||
| 100 | { | ||
| 101 | extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, | ||
| 102 | size_t, enum dma_data_direction); | ||
| 103 | |||
| 104 | if (!arch_is_coherent()) | ||
| 105 | ___dma_page_cpu_to_dev(page, off, size, dir); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, | ||
| 109 | size_t size, enum dma_data_direction dir) | ||
| 110 | { | ||
| 111 | extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, | ||
| 112 | size_t, enum dma_data_direction); | ||
| 113 | |||
| 114 | if (!arch_is_coherent()) | ||
| 115 | ___dma_page_dev_to_cpu(page, off, size, dir); | ||
| 116 | } | ||
| 117 | |||
| 118 | extern int dma_supported(struct device *, u64); | ||
| 119 | extern int dma_set_mask(struct device *, u64); | ||
| 120 | |||
| 121 | /* | ||
| 122 | * DMA errors are defined by all-bits-set in the DMA address. | 89 | * DMA errors are defined by all-bits-set in the DMA address. |
| 123 | */ | 90 | */ |
| 124 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 91 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
| 125 | { | 92 | { |
| 126 | return dma_addr == ~0; | 93 | return dma_addr == DMA_ERROR_CODE; |
| 127 | } | 94 | } |
| 128 | 95 | ||
| 129 | /* | 96 | /* |
| @@ -141,69 +108,118 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, | |||
| 141 | { | 108 | { |
| 142 | } | 109 | } |
| 143 | 110 | ||
| 111 | extern int dma_supported(struct device *dev, u64 mask); | ||
| 112 | |||
| 144 | /** | 113 | /** |
| 145 | * dma_alloc_coherent - allocate consistent memory for DMA | 114 | * arm_dma_alloc - allocate consistent memory for DMA |
| 146 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 115 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 147 | * @size: required memory size | 116 | * @size: required memory size |
| 148 | * @handle: bus-specific DMA address | 117 | * @handle: bus-specific DMA address |
| 118 | * @attrs: optinal attributes that specific mapping properties | ||
| 149 | * | 119 | * |
| 150 | * Allocate some uncached, unbuffered memory for a device for | 120 | * Allocate some memory for a device for performing DMA. This function |
| 151 | * performing DMA. This function allocates pages, and will | 121 | * allocates pages, and will return the CPU-viewed address, and sets @handle |
| 152 | * return the CPU-viewed address, and sets @handle to be the | 122 | * to be the device-viewed address. |
| 153 | * device-viewed address. | ||
| 154 | */ | 123 | */ |
| 155 | extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); | 124 | extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
| 125 | gfp_t gfp, struct dma_attrs *attrs); | ||
| 126 | |||
| 127 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
| 128 | |||
| 129 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
| 130 | dma_addr_t *dma_handle, gfp_t flag, | ||
| 131 | struct dma_attrs *attrs) | ||
| 132 | { | ||
| 133 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 134 | void *cpu_addr; | ||
| 135 | BUG_ON(!ops); | ||
| 136 | |||
| 137 | cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
| 138 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
| 139 | return cpu_addr; | ||
| 140 | } | ||
| 156 | 141 | ||
| 157 | /** | 142 | /** |
| 158 | * dma_free_coherent - free memory allocated by dma_alloc_coherent | 143 | * arm_dma_free - free memory allocated by arm_dma_alloc |
| 159 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 144 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 160 | * @size: size of memory originally requested in dma_alloc_coherent | 145 | * @size: size of memory originally requested in dma_alloc_coherent |
| 161 | * @cpu_addr: CPU-view address returned from dma_alloc_coherent | 146 | * @cpu_addr: CPU-view address returned from dma_alloc_coherent |
| 162 | * @handle: device-view address returned from dma_alloc_coherent | 147 | * @handle: device-view address returned from dma_alloc_coherent |
| 148 | * @attrs: optinal attributes that specific mapping properties | ||
| 163 | * | 149 | * |
| 164 | * Free (and unmap) a DMA buffer previously allocated by | 150 | * Free (and unmap) a DMA buffer previously allocated by |
| 165 | * dma_alloc_coherent(). | 151 | * arm_dma_alloc(). |
| 166 | * | 152 | * |
| 167 | * References to memory and mappings associated with cpu_addr/handle | 153 | * References to memory and mappings associated with cpu_addr/handle |
| 168 | * during and after this call executing are illegal. | 154 | * during and after this call executing are illegal. |
| 169 | */ | 155 | */ |
| 170 | extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); | 156 | extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, |
| 157 | dma_addr_t handle, struct dma_attrs *attrs); | ||
| 158 | |||
| 159 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
| 160 | |||
| 161 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
| 162 | void *cpu_addr, dma_addr_t dma_handle, | ||
| 163 | struct dma_attrs *attrs) | ||
| 164 | { | ||
| 165 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 166 | BUG_ON(!ops); | ||
| 167 | |||
| 168 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
| 169 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
| 170 | } | ||
| 171 | 171 | ||
| 172 | /** | 172 | /** |
| 173 | * dma_mmap_coherent - map a coherent DMA allocation into user space | 173 | * arm_dma_mmap - map a coherent DMA allocation into user space |
| 174 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 174 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 175 | * @vma: vm_area_struct describing requested user mapping | 175 | * @vma: vm_area_struct describing requested user mapping |
| 176 | * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent | 176 | * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent |
| 177 | * @handle: device-view address returned from dma_alloc_coherent | 177 | * @handle: device-view address returned from dma_alloc_coherent |
| 178 | * @size: size of memory originally requested in dma_alloc_coherent | 178 | * @size: size of memory originally requested in dma_alloc_coherent |
| 179 | * @attrs: optinal attributes that specific mapping properties | ||
| 179 | * | 180 | * |
| 180 | * Map a coherent DMA buffer previously allocated by dma_alloc_coherent | 181 | * Map a coherent DMA buffer previously allocated by dma_alloc_coherent |
| 181 | * into user space. The coherent DMA buffer must not be freed by the | 182 | * into user space. The coherent DMA buffer must not be freed by the |
| 182 | * driver until the user space mapping has been released. | 183 | * driver until the user space mapping has been released. |
| 183 | */ | 184 | */ |
| 184 | int dma_mmap_coherent(struct device *, struct vm_area_struct *, | 185 | extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, |
| 185 | void *, dma_addr_t, size_t); | 186 | void *cpu_addr, dma_addr_t dma_addr, size_t size, |
| 187 | struct dma_attrs *attrs); | ||
| 186 | 188 | ||
| 189 | #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) | ||
| 187 | 190 | ||
| 188 | /** | 191 | static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, |
| 189 | * dma_alloc_writecombine - allocate writecombining memory for DMA | 192 | void *cpu_addr, dma_addr_t dma_addr, |
| 190 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 193 | size_t size, struct dma_attrs *attrs) |
| 191 | * @size: required memory size | 194 | { |
| 192 | * @handle: bus-specific DMA address | 195 | struct dma_map_ops *ops = get_dma_ops(dev); |
| 193 | * | 196 | BUG_ON(!ops); |
| 194 | * Allocate some uncached, buffered memory for a device for | 197 | return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); |
| 195 | * performing DMA. This function allocates pages, and will | 198 | } |
| 196 | * return the CPU-viewed address, and sets @handle to be the | 199 | |
| 197 | * device-viewed address. | 200 | static inline void *dma_alloc_writecombine(struct device *dev, size_t size, |
| 198 | */ | 201 | dma_addr_t *dma_handle, gfp_t flag) |
| 199 | extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *, | 202 | { |
| 200 | gfp_t); | 203 | DEFINE_DMA_ATTRS(attrs); |
| 204 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
| 205 | return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); | ||
| 206 | } | ||
| 201 | 207 | ||
| 202 | #define dma_free_writecombine(dev,size,cpu_addr,handle) \ | 208 | static inline void dma_free_writecombine(struct device *dev, size_t size, |
| 203 | dma_free_coherent(dev,size,cpu_addr,handle) | 209 | void *cpu_addr, dma_addr_t dma_handle) |
| 210 | { | ||
| 211 | DEFINE_DMA_ATTRS(attrs); | ||
| 212 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
| 213 | return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); | ||
| 214 | } | ||
| 204 | 215 | ||
| 205 | int dma_mmap_writecombine(struct device *, struct vm_area_struct *, | 216 | static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, |
| 206 | void *, dma_addr_t, size_t); | 217 | void *cpu_addr, dma_addr_t dma_addr, size_t size) |
| 218 | { | ||
| 219 | DEFINE_DMA_ATTRS(attrs); | ||
| 220 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); | ||
| 221 | return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); | ||
| 222 | } | ||
| 207 | 223 | ||
| 208 | /* | 224 | /* |
| 209 | * This can be called during boot to increase the size of the consistent | 225 | * This can be called during boot to increase the size of the consistent |
| @@ -212,8 +228,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, | |||
| 212 | */ | 228 | */ |
| 213 | extern void __init init_consistent_dma_size(unsigned long size); | 229 | extern void __init init_consistent_dma_size(unsigned long size); |
| 214 | 230 | ||
| 215 | |||
| 216 | #ifdef CONFIG_DMABOUNCE | ||
| 217 | /* | 231 | /* |
| 218 | * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" | 232 | * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" |
| 219 | * and utilize bounce buffers as needed to work around limited DMA windows. | 233 | * and utilize bounce buffers as needed to work around limited DMA windows. |
| @@ -253,222 +267,19 @@ extern int dmabounce_register_dev(struct device *, unsigned long, | |||
| 253 | */ | 267 | */ |
| 254 | extern void dmabounce_unregister_dev(struct device *); | 268 | extern void dmabounce_unregister_dev(struct device *); |
| 255 | 269 | ||
| 256 | /* | ||
| 257 | * The DMA API, implemented by dmabounce.c. See below for descriptions. | ||
| 258 | */ | ||
| 259 | extern dma_addr_t __dma_map_page(struct device *, struct page *, | ||
| 260 | unsigned long, size_t, enum dma_data_direction); | ||
| 261 | extern void __dma_unmap_page(struct device *, dma_addr_t, size_t, | ||
| 262 | enum dma_data_direction); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Private functions | ||
| 266 | */ | ||
| 267 | int dmabounce_sync_for_cpu(struct device *, dma_addr_t, unsigned long, | ||
| 268 | size_t, enum dma_data_direction); | ||
| 269 | int dmabounce_sync_for_device(struct device *, dma_addr_t, unsigned long, | ||
| 270 | size_t, enum dma_data_direction); | ||
| 271 | #else | ||
| 272 | static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr, | ||
| 273 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
| 274 | { | ||
| 275 | return 1; | ||
| 276 | } | ||
| 277 | 270 | ||
| 278 | static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, | ||
| 279 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
| 280 | { | ||
| 281 | return 1; | ||
| 282 | } | ||
| 283 | |||
| 284 | |||
| 285 | static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, | ||
| 286 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
| 287 | { | ||
| 288 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
| 289 | return pfn_to_dma(dev, page_to_pfn(page)) + offset; | ||
| 290 | } | ||
| 291 | |||
| 292 | static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
| 293 | size_t size, enum dma_data_direction dir) | ||
| 294 | { | ||
| 295 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), | ||
| 296 | handle & ~PAGE_MASK, size, dir); | ||
| 297 | } | ||
| 298 | #endif /* CONFIG_DMABOUNCE */ | ||
| 299 | |||
| 300 | /** | ||
| 301 | * dma_map_single - map a single buffer for streaming DMA | ||
| 302 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 303 | * @cpu_addr: CPU direct mapped address of buffer | ||
| 304 | * @size: size of buffer to map | ||
| 305 | * @dir: DMA transfer direction | ||
| 306 | * | ||
| 307 | * Ensure that any data held in the cache is appropriately discarded | ||
| 308 | * or written back. | ||
| 309 | * | ||
| 310 | * The device owns this memory once this call has completed. The CPU | ||
| 311 | * can regain ownership by calling dma_unmap_single() or | ||
| 312 | * dma_sync_single_for_cpu(). | ||
| 313 | */ | ||
| 314 | static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, | ||
| 315 | size_t size, enum dma_data_direction dir) | ||
| 316 | { | ||
| 317 | unsigned long offset; | ||
| 318 | struct page *page; | ||
| 319 | dma_addr_t addr; | ||
| 320 | |||
| 321 | BUG_ON(!virt_addr_valid(cpu_addr)); | ||
| 322 | BUG_ON(!virt_addr_valid(cpu_addr + size - 1)); | ||
| 323 | BUG_ON(!valid_dma_direction(dir)); | ||
| 324 | |||
| 325 | page = virt_to_page(cpu_addr); | ||
| 326 | offset = (unsigned long)cpu_addr & ~PAGE_MASK; | ||
| 327 | addr = __dma_map_page(dev, page, offset, size, dir); | ||
| 328 | debug_dma_map_page(dev, page, offset, size, dir, addr, true); | ||
| 329 | |||
| 330 | return addr; | ||
| 331 | } | ||
| 332 | |||
| 333 | /** | ||
| 334 | * dma_map_page - map a portion of a page for streaming DMA | ||
| 335 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 336 | * @page: page that buffer resides in | ||
| 337 | * @offset: offset into page for start of buffer | ||
| 338 | * @size: size of buffer to map | ||
| 339 | * @dir: DMA transfer direction | ||
| 340 | * | ||
| 341 | * Ensure that any data held in the cache is appropriately discarded | ||
| 342 | * or written back. | ||
| 343 | * | ||
| 344 | * The device owns this memory once this call has completed. The CPU | ||
| 345 | * can regain ownership by calling dma_unmap_page(). | ||
| 346 | */ | ||
| 347 | static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, | ||
| 348 | unsigned long offset, size_t size, enum dma_data_direction dir) | ||
| 349 | { | ||
| 350 | dma_addr_t addr; | ||
| 351 | |||
| 352 | BUG_ON(!valid_dma_direction(dir)); | ||
| 353 | |||
| 354 | addr = __dma_map_page(dev, page, offset, size, dir); | ||
| 355 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | ||
| 356 | |||
| 357 | return addr; | ||
| 358 | } | ||
| 359 | |||
| 360 | /** | ||
| 361 | * dma_unmap_single - unmap a single buffer previously mapped | ||
| 362 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 363 | * @handle: DMA address of buffer | ||
| 364 | * @size: size of buffer (same as passed to dma_map_single) | ||
| 365 | * @dir: DMA transfer direction (same as passed to dma_map_single) | ||
| 366 | * | ||
| 367 | * Unmap a single streaming mode DMA translation. The handle and size | ||
| 368 | * must match what was provided in the previous dma_map_single() call. | ||
| 369 | * All other usages are undefined. | ||
| 370 | * | ||
| 371 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
| 372 | * whatever the device wrote there. | ||
| 373 | */ | ||
| 374 | static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, | ||
| 375 | size_t size, enum dma_data_direction dir) | ||
| 376 | { | ||
| 377 | debug_dma_unmap_page(dev, handle, size, dir, true); | ||
| 378 | __dma_unmap_page(dev, handle, size, dir); | ||
| 379 | } | ||
| 380 | |||
| 381 | /** | ||
| 382 | * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() | ||
| 383 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 384 | * @handle: DMA address of buffer | ||
| 385 | * @size: size of buffer (same as passed to dma_map_page) | ||
| 386 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
| 387 | * | ||
| 388 | * Unmap a page streaming mode DMA translation. The handle and size | ||
| 389 | * must match what was provided in the previous dma_map_page() call. | ||
| 390 | * All other usages are undefined. | ||
| 391 | * | ||
| 392 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
| 393 | * whatever the device wrote there. | ||
| 394 | */ | ||
| 395 | static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
| 396 | size_t size, enum dma_data_direction dir) | ||
| 397 | { | ||
| 398 | debug_dma_unmap_page(dev, handle, size, dir, false); | ||
| 399 | __dma_unmap_page(dev, handle, size, dir); | ||
| 400 | } | ||
| 401 | |||
| 402 | /** | ||
| 403 | * dma_sync_single_range_for_cpu | ||
| 404 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 405 | * @handle: DMA address of buffer | ||
| 406 | * @offset: offset of region to start sync | ||
| 407 | * @size: size of region to sync | ||
| 408 | * @dir: DMA transfer direction (same as passed to dma_map_single) | ||
| 409 | * | ||
| 410 | * Make physical memory consistent for a single streaming mode DMA | ||
| 411 | * translation after a transfer. | ||
| 412 | * | ||
| 413 | * If you perform a dma_map_single() but wish to interrogate the | ||
| 414 | * buffer using the cpu, yet do not wish to teardown the PCI dma | ||
| 415 | * mapping, you must call this function before doing so. At the | ||
| 416 | * next point you give the PCI dma address back to the card, you | ||
| 417 | * must first the perform a dma_sync_for_device, and then the | ||
| 418 | * device again owns the buffer. | ||
| 419 | */ | ||
| 420 | static inline void dma_sync_single_range_for_cpu(struct device *dev, | ||
| 421 | dma_addr_t handle, unsigned long offset, size_t size, | ||
| 422 | enum dma_data_direction dir) | ||
| 423 | { | ||
| 424 | BUG_ON(!valid_dma_direction(dir)); | ||
| 425 | |||
| 426 | debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir); | ||
| 427 | |||
| 428 | if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir)) | ||
| 429 | return; | ||
| 430 | |||
| 431 | __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir); | ||
| 432 | } | ||
| 433 | |||
| 434 | static inline void dma_sync_single_range_for_device(struct device *dev, | ||
| 435 | dma_addr_t handle, unsigned long offset, size_t size, | ||
| 436 | enum dma_data_direction dir) | ||
| 437 | { | ||
| 438 | BUG_ON(!valid_dma_direction(dir)); | ||
| 439 | |||
| 440 | debug_dma_sync_single_for_device(dev, handle + offset, size, dir); | ||
| 441 | |||
| 442 | if (!dmabounce_sync_for_device(dev, handle, offset, size, dir)) | ||
| 443 | return; | ||
| 444 | |||
| 445 | __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir); | ||
| 446 | } | ||
| 447 | |||
| 448 | static inline void dma_sync_single_for_cpu(struct device *dev, | ||
| 449 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 450 | { | ||
| 451 | dma_sync_single_range_for_cpu(dev, handle, 0, size, dir); | ||
| 452 | } | ||
| 453 | |||
| 454 | static inline void dma_sync_single_for_device(struct device *dev, | ||
| 455 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 456 | { | ||
| 457 | dma_sync_single_range_for_device(dev, handle, 0, size, dir); | ||
| 458 | } | ||
| 459 | 271 | ||
| 460 | /* | 272 | /* |
| 461 | * The scatter list versions of the above methods. | 273 | * The scatter list versions of the above methods. |
| 462 | */ | 274 | */ |
| 463 | extern int dma_map_sg(struct device *, struct scatterlist *, int, | 275 | extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, |
| 464 | enum dma_data_direction); | 276 | enum dma_data_direction, struct dma_attrs *attrs); |
| 465 | extern void dma_unmap_sg(struct device *, struct scatterlist *, int, | 277 | extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, |
| 278 | enum dma_data_direction, struct dma_attrs *attrs); | ||
| 279 | extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, | ||
| 466 | enum dma_data_direction); | 280 | enum dma_data_direction); |
| 467 | extern void dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, | 281 | extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, |
| 468 | enum dma_data_direction); | 282 | enum dma_data_direction); |
| 469 | extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int, | ||
| 470 | enum dma_data_direction); | ||
| 471 | |||
| 472 | 283 | ||
| 473 | #endif /* __KERNEL__ */ | 284 | #endif /* __KERNEL__ */ |
| 474 | #endif | 285 | #endif |
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index b36f3654bf54..a6efcdd6fd25 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h | |||
| @@ -30,6 +30,7 @@ struct map_desc { | |||
| 30 | #define MT_MEMORY_DTCM 12 | 30 | #define MT_MEMORY_DTCM 12 |
| 31 | #define MT_MEMORY_ITCM 13 | 31 | #define MT_MEMORY_ITCM 13 |
| 32 | #define MT_MEMORY_SO 14 | 32 | #define MT_MEMORY_SO 14 |
| 33 | #define MT_MEMORY_DMA_READY 15 | ||
| 33 | 34 | ||
| 34 | #ifdef CONFIG_MMU | 35 | #ifdef CONFIG_MMU |
| 35 | extern void iotable_init(struct map_desc *, int); | 36 | extern void iotable_init(struct map_desc *, int); |
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac782593f..1b3096dfb964 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c | |||
| @@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup); | |||
| 81 | extern void paging_init(struct machine_desc *desc); | 81 | extern void paging_init(struct machine_desc *desc); |
| 82 | extern void sanity_check_meminfo(void); | 82 | extern void sanity_check_meminfo(void); |
| 83 | extern void reboot_setup(char *str); | 83 | extern void reboot_setup(char *str); |
| 84 | extern void setup_dma_zone(struct machine_desc *desc); | ||
| 84 | 85 | ||
| 85 | unsigned int processor_id; | 86 | unsigned int processor_id; |
| 86 | EXPORT_SYMBOL(processor_id); | 87 | EXPORT_SYMBOL(processor_id); |
| @@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 939 | machine_desc = mdesc; | 940 | machine_desc = mdesc; |
| 940 | machine_name = mdesc->name; | 941 | machine_name = mdesc->name; |
| 941 | 942 | ||
| 942 | #ifdef CONFIG_ZONE_DMA | 943 | setup_dma_zone(mdesc); |
| 943 | if (mdesc->dma_zone_size) { | 944 | |
| 944 | extern unsigned long arm_dma_zone_size; | ||
| 945 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
| 946 | } | ||
| 947 | #endif | ||
| 948 | if (mdesc->restart_mode) | 945 | if (mdesc->restart_mode) |
| 949 | reboot_setup(&mdesc->restart_mode); | 946 | reboot_setup(&mdesc->restart_mode); |
| 950 | 947 | ||
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4aaaab..ea6b43154090 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
| @@ -17,8 +17,12 @@ | |||
| 17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 18 | #include <linux/device.h> | 18 | #include <linux/device.h> |
| 19 | #include <linux/dma-mapping.h> | 19 | #include <linux/dma-mapping.h> |
| 20 | #include <linux/dma-contiguous.h> | ||
| 20 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
| 22 | #include <linux/memblock.h> | ||
| 21 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
| 24 | #include <linux/iommu.h> | ||
| 25 | #include <linux/vmalloc.h> | ||
| 22 | 26 | ||
| 23 | #include <asm/memory.h> | 27 | #include <asm/memory.h> |
| 24 | #include <asm/highmem.h> | 28 | #include <asm/highmem.h> |
| @@ -26,9 +30,112 @@ | |||
| 26 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
| 27 | #include <asm/sizes.h> | 31 | #include <asm/sizes.h> |
| 28 | #include <asm/mach/arch.h> | 32 | #include <asm/mach/arch.h> |
| 33 | #include <asm/dma-iommu.h> | ||
| 34 | #include <asm/mach/map.h> | ||
| 35 | #include <asm/system_info.h> | ||
| 36 | #include <asm/dma-contiguous.h> | ||
| 29 | 37 | ||
| 30 | #include "mm.h" | 38 | #include "mm.h" |
| 31 | 39 | ||
| 40 | /* | ||
| 41 | * The DMA API is built upon the notion of "buffer ownership". A buffer | ||
| 42 | * is either exclusively owned by the CPU (and therefore may be accessed | ||
| 43 | * by it) or exclusively owned by the DMA device. These helper functions | ||
| 44 | * represent the transitions between these two ownership states. | ||
| 45 | * | ||
| 46 | * Note, however, that on later ARMs, this notion does not work due to | ||
| 47 | * speculative prefetches. We model our approach on the assumption that | ||
| 48 | * the CPU does do speculative prefetches, which means we clean caches | ||
| 49 | * before transfers and delay cache invalidation until transfer completion. | ||
| 50 | * | ||
| 51 | */ | ||
| 52 | static void __dma_page_cpu_to_dev(struct page *, unsigned long, | ||
| 53 | size_t, enum dma_data_direction); | ||
| 54 | static void __dma_page_dev_to_cpu(struct page *, unsigned long, | ||
| 55 | size_t, enum dma_data_direction); | ||
| 56 | |||
| 57 | /** | ||
| 58 | * arm_dma_map_page - map a portion of a page for streaming DMA | ||
| 59 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 60 | * @page: page that buffer resides in | ||
| 61 | * @offset: offset into page for start of buffer | ||
| 62 | * @size: size of buffer to map | ||
| 63 | * @dir: DMA transfer direction | ||
| 64 | * | ||
| 65 | * Ensure that any data held in the cache is appropriately discarded | ||
| 66 | * or written back. | ||
| 67 | * | ||
| 68 | * The device owns this memory once this call has completed. The CPU | ||
| 69 | * can regain ownership by calling dma_unmap_page(). | ||
| 70 | */ | ||
| 71 | static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, | ||
| 72 | unsigned long offset, size_t size, enum dma_data_direction dir, | ||
| 73 | struct dma_attrs *attrs) | ||
| 74 | { | ||
| 75 | if (!arch_is_coherent()) | ||
| 76 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
| 77 | return pfn_to_dma(dev, page_to_pfn(page)) + offset; | ||
| 78 | } | ||
| 79 | |||
| 80 | /** | ||
| 81 | * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() | ||
| 82 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||
| 83 | * @handle: DMA address of buffer | ||
| 84 | * @size: size of buffer (same as passed to dma_map_page) | ||
| 85 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
| 86 | * | ||
| 87 | * Unmap a page streaming mode DMA translation. The handle and size | ||
| 88 | * must match what was provided in the previous dma_map_page() call. | ||
| 89 | * All other usages are undefined. | ||
| 90 | * | ||
| 91 | * After this call, reads by the CPU to the buffer are guaranteed to see | ||
| 92 | * whatever the device wrote there. | ||
| 93 | */ | ||
| 94 | static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, | ||
| 95 | size_t size, enum dma_data_direction dir, | ||
| 96 | struct dma_attrs *attrs) | ||
| 97 | { | ||
| 98 | if (!arch_is_coherent()) | ||
| 99 | __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), | ||
| 100 | handle & ~PAGE_MASK, size, dir); | ||
| 101 | } | ||
| 102 | |||
| 103 | static void arm_dma_sync_single_for_cpu(struct device *dev, | ||
| 104 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 105 | { | ||
| 106 | unsigned int offset = handle & (PAGE_SIZE - 1); | ||
| 107 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); | ||
| 108 | if (!arch_is_coherent()) | ||
| 109 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
| 110 | } | ||
| 111 | |||
| 112 | static void arm_dma_sync_single_for_device(struct device *dev, | ||
| 113 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 114 | { | ||
| 115 | unsigned int offset = handle & (PAGE_SIZE - 1); | ||
| 116 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); | ||
| 117 | if (!arch_is_coherent()) | ||
| 118 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
| 119 | } | ||
| 120 | |||
| 121 | static int arm_dma_set_mask(struct device *dev, u64 dma_mask); | ||
| 122 | |||
| 123 | struct dma_map_ops arm_dma_ops = { | ||
| 124 | .alloc = arm_dma_alloc, | ||
| 125 | .free = arm_dma_free, | ||
| 126 | .mmap = arm_dma_mmap, | ||
| 127 | .map_page = arm_dma_map_page, | ||
| 128 | .unmap_page = arm_dma_unmap_page, | ||
| 129 | .map_sg = arm_dma_map_sg, | ||
| 130 | .unmap_sg = arm_dma_unmap_sg, | ||
| 131 | .sync_single_for_cpu = arm_dma_sync_single_for_cpu, | ||
| 132 | .sync_single_for_device = arm_dma_sync_single_for_device, | ||
| 133 | .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, | ||
| 134 | .sync_sg_for_device = arm_dma_sync_sg_for_device, | ||
| 135 | .set_dma_mask = arm_dma_set_mask, | ||
| 136 | }; | ||
| 137 | EXPORT_SYMBOL(arm_dma_ops); | ||
| 138 | |||
| 32 | static u64 get_coherent_dma_mask(struct device *dev) | 139 | static u64 get_coherent_dma_mask(struct device *dev) |
| 33 | { | 140 | { |
| 34 | u64 mask = (u64)arm_dma_limit; | 141 | u64 mask = (u64)arm_dma_limit; |
| @@ -56,6 +163,21 @@ static u64 get_coherent_dma_mask(struct device *dev) | |||
| 56 | return mask; | 163 | return mask; |
| 57 | } | 164 | } |
| 58 | 165 | ||
| 166 | static void __dma_clear_buffer(struct page *page, size_t size) | ||
| 167 | { | ||
| 168 | void *ptr; | ||
| 169 | /* | ||
| 170 | * Ensure that the allocated pages are zeroed, and that any data | ||
| 171 | * lurking in the kernel direct-mapped region is invalidated. | ||
| 172 | */ | ||
| 173 | ptr = page_address(page); | ||
| 174 | if (ptr) { | ||
| 175 | memset(ptr, 0, size); | ||
| 176 | dmac_flush_range(ptr, ptr + size); | ||
| 177 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 59 | /* | 181 | /* |
| 60 | * Allocate a DMA buffer for 'dev' of size 'size' using the | 182 | * Allocate a DMA buffer for 'dev' of size 'size' using the |
| 61 | * specified gfp mask. Note that 'size' must be page aligned. | 183 | * specified gfp mask. Note that 'size' must be page aligned. |
| @@ -64,23 +186,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
| 64 | { | 186 | { |
| 65 | unsigned long order = get_order(size); | 187 | unsigned long order = get_order(size); |
| 66 | struct page *page, *p, *e; | 188 | struct page *page, *p, *e; |
| 67 | void *ptr; | ||
| 68 | u64 mask = get_coherent_dma_mask(dev); | ||
| 69 | |||
| 70 | #ifdef CONFIG_DMA_API_DEBUG | ||
| 71 | u64 limit = (mask + 1) & ~mask; | ||
| 72 | if (limit && size >= limit) { | ||
| 73 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
| 74 | size, mask); | ||
| 75 | return NULL; | ||
| 76 | } | ||
| 77 | #endif | ||
| 78 | |||
| 79 | if (!mask) | ||
| 80 | return NULL; | ||
| 81 | |||
| 82 | if (mask < 0xffffffffULL) | ||
| 83 | gfp |= GFP_DMA; | ||
| 84 | 189 | ||
| 85 | page = alloc_pages(gfp, order); | 190 | page = alloc_pages(gfp, order); |
| 86 | if (!page) | 191 | if (!page) |
| @@ -93,14 +198,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
| 93 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) | 198 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) |
| 94 | __free_page(p); | 199 | __free_page(p); |
| 95 | 200 | ||
| 96 | /* | 201 | __dma_clear_buffer(page, size); |
| 97 | * Ensure that the allocated pages are zeroed, and that any data | ||
| 98 | * lurking in the kernel direct-mapped region is invalidated. | ||
| 99 | */ | ||
| 100 | ptr = page_address(page); | ||
| 101 | memset(ptr, 0, size); | ||
| 102 | dmac_flush_range(ptr, ptr + size); | ||
| 103 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
| 104 | 202 | ||
| 105 | return page; | 203 | return page; |
| 106 | } | 204 | } |
| @@ -170,6 +268,11 @@ static int __init consistent_init(void) | |||
| 170 | unsigned long base = consistent_base; | 268 | unsigned long base = consistent_base; |
| 171 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; | 269 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; |
| 172 | 270 | ||
| 271 | #ifndef CONFIG_ARM_DMA_USE_IOMMU | ||
| 272 | if (cpu_architecture() >= CPU_ARCH_ARMv6) | ||
| 273 | return 0; | ||
| 274 | #endif | ||
| 275 | |||
| 173 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); | 276 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); |
| 174 | if (!consistent_pte) { | 277 | if (!consistent_pte) { |
| 175 | pr_err("%s: no memory\n", __func__); | 278 | pr_err("%s: no memory\n", __func__); |
| @@ -184,14 +287,14 @@ static int __init consistent_init(void) | |||
| 184 | 287 | ||
| 185 | pud = pud_alloc(&init_mm, pgd, base); | 288 | pud = pud_alloc(&init_mm, pgd, base); |
| 186 | if (!pud) { | 289 | if (!pud) { |
| 187 | printk(KERN_ERR "%s: no pud tables\n", __func__); | 290 | pr_err("%s: no pud tables\n", __func__); |
| 188 | ret = -ENOMEM; | 291 | ret = -ENOMEM; |
| 189 | break; | 292 | break; |
| 190 | } | 293 | } |
| 191 | 294 | ||
| 192 | pmd = pmd_alloc(&init_mm, pud, base); | 295 | pmd = pmd_alloc(&init_mm, pud, base); |
| 193 | if (!pmd) { | 296 | if (!pmd) { |
| 194 | printk(KERN_ERR "%s: no pmd tables\n", __func__); | 297 | pr_err("%s: no pmd tables\n", __func__); |
| 195 | ret = -ENOMEM; | 298 | ret = -ENOMEM; |
| 196 | break; | 299 | break; |
| 197 | } | 300 | } |
| @@ -199,7 +302,7 @@ static int __init consistent_init(void) | |||
| 199 | 302 | ||
| 200 | pte = pte_alloc_kernel(pmd, base); | 303 | pte = pte_alloc_kernel(pmd, base); |
| 201 | if (!pte) { | 304 | if (!pte) { |
| 202 | printk(KERN_ERR "%s: no pte tables\n", __func__); | 305 | pr_err("%s: no pte tables\n", __func__); |
| 203 | ret = -ENOMEM; | 306 | ret = -ENOMEM; |
| 204 | break; | 307 | break; |
| 205 | } | 308 | } |
| @@ -210,9 +313,101 @@ static int __init consistent_init(void) | |||
| 210 | 313 | ||
| 211 | return ret; | 314 | return ret; |
| 212 | } | 315 | } |
| 213 | |||
| 214 | core_initcall(consistent_init); | 316 | core_initcall(consistent_init); |
| 215 | 317 | ||
| 318 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
| 319 | pgprot_t prot, struct page **ret_page); | ||
| 320 | |||
| 321 | static struct arm_vmregion_head coherent_head = { | ||
| 322 | .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), | ||
| 323 | .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), | ||
| 324 | }; | ||
| 325 | |||
| 326 | size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; | ||
| 327 | |||
| 328 | static int __init early_coherent_pool(char *p) | ||
| 329 | { | ||
| 330 | coherent_pool_size = memparse(p, &p); | ||
| 331 | return 0; | ||
| 332 | } | ||
| 333 | early_param("coherent_pool", early_coherent_pool); | ||
| 334 | |||
| 335 | /* | ||
| 336 | * Initialise the coherent pool for atomic allocations. | ||
| 337 | */ | ||
| 338 | static int __init coherent_init(void) | ||
| 339 | { | ||
| 340 | pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); | ||
| 341 | size_t size = coherent_pool_size; | ||
| 342 | struct page *page; | ||
| 343 | void *ptr; | ||
| 344 | |||
| 345 | if (cpu_architecture() < CPU_ARCH_ARMv6) | ||
| 346 | return 0; | ||
| 347 | |||
| 348 | ptr = __alloc_from_contiguous(NULL, size, prot, &page); | ||
| 349 | if (ptr) { | ||
| 350 | coherent_head.vm_start = (unsigned long) ptr; | ||
| 351 | coherent_head.vm_end = (unsigned long) ptr + size; | ||
| 352 | printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", | ||
| 353 | (unsigned)size / 1024); | ||
| 354 | return 0; | ||
| 355 | } | ||
| 356 | printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", | ||
| 357 | (unsigned)size / 1024); | ||
| 358 | return -ENOMEM; | ||
| 359 | } | ||
| 360 | /* | ||
| 361 | * CMA is activated by core_initcall, so we must be called after it. | ||
| 362 | */ | ||
| 363 | postcore_initcall(coherent_init); | ||
| 364 | |||
| 365 | struct dma_contig_early_reserve { | ||
| 366 | phys_addr_t base; | ||
| 367 | unsigned long size; | ||
| 368 | }; | ||
| 369 | |||
| 370 | static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; | ||
| 371 | |||
| 372 | static int dma_mmu_remap_num __initdata; | ||
| 373 | |||
| 374 | void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) | ||
| 375 | { | ||
| 376 | dma_mmu_remap[dma_mmu_remap_num].base = base; | ||
| 377 | dma_mmu_remap[dma_mmu_remap_num].size = size; | ||
| 378 | dma_mmu_remap_num++; | ||
| 379 | } | ||
| 380 | |||
| 381 | void __init dma_contiguous_remap(void) | ||
| 382 | { | ||
| 383 | int i; | ||
| 384 | for (i = 0; i < dma_mmu_remap_num; i++) { | ||
| 385 | phys_addr_t start = dma_mmu_remap[i].base; | ||
| 386 | phys_addr_t end = start + dma_mmu_remap[i].size; | ||
| 387 | struct map_desc map; | ||
| 388 | unsigned long addr; | ||
| 389 | |||
| 390 | if (end > arm_lowmem_limit) | ||
| 391 | end = arm_lowmem_limit; | ||
| 392 | if (start >= end) | ||
| 393 | return; | ||
| 394 | |||
| 395 | map.pfn = __phys_to_pfn(start); | ||
| 396 | map.virtual = __phys_to_virt(start); | ||
| 397 | map.length = end - start; | ||
| 398 | map.type = MT_MEMORY_DMA_READY; | ||
| 399 | |||
| 400 | /* | ||
| 401 | * Clear previous low-memory mapping | ||
| 402 | */ | ||
| 403 | for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); | ||
| 404 | addr += PMD_SIZE) | ||
| 405 | pmd_clear(pmd_off_k(addr)); | ||
| 406 | |||
| 407 | iotable_init(&map, 1); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 216 | static void * | 411 | static void * |
| 217 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | 412 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, |
| 218 | const void *caller) | 413 | const void *caller) |
| @@ -222,7 +417,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | |||
| 222 | int bit; | 417 | int bit; |
| 223 | 418 | ||
| 224 | if (!consistent_pte) { | 419 | if (!consistent_pte) { |
| 225 | printk(KERN_ERR "%s: not initialised\n", __func__); | 420 | pr_err("%s: not initialised\n", __func__); |
| 226 | dump_stack(); | 421 | dump_stack(); |
| 227 | return NULL; | 422 | return NULL; |
| 228 | } | 423 | } |
| @@ -249,7 +444,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | |||
| 249 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); | 444 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); |
| 250 | 445 | ||
| 251 | pte = consistent_pte[idx] + off; | 446 | pte = consistent_pte[idx] + off; |
| 252 | c->vm_pages = page; | 447 | c->priv = page; |
| 253 | 448 | ||
| 254 | do { | 449 | do { |
| 255 | BUG_ON(!pte_none(*pte)); | 450 | BUG_ON(!pte_none(*pte)); |
| @@ -281,14 +476,14 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
| 281 | 476 | ||
| 282 | c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); | 477 | c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); |
| 283 | if (!c) { | 478 | if (!c) { |
| 284 | printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", | 479 | pr_err("%s: trying to free invalid coherent area: %p\n", |
| 285 | __func__, cpu_addr); | 480 | __func__, cpu_addr); |
| 286 | dump_stack(); | 481 | dump_stack(); |
| 287 | return; | 482 | return; |
| 288 | } | 483 | } |
| 289 | 484 | ||
| 290 | if ((c->vm_end - c->vm_start) != size) { | 485 | if ((c->vm_end - c->vm_start) != size) { |
| 291 | printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", | 486 | pr_err("%s: freeing wrong coherent size (%ld != %d)\n", |
| 292 | __func__, c->vm_end - c->vm_start, size); | 487 | __func__, c->vm_end - c->vm_start, size); |
| 293 | dump_stack(); | 488 | dump_stack(); |
| 294 | size = c->vm_end - c->vm_start; | 489 | size = c->vm_end - c->vm_start; |
| @@ -310,8 +505,8 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
| 310 | } | 505 | } |
| 311 | 506 | ||
| 312 | if (pte_none(pte) || !pte_present(pte)) | 507 | if (pte_none(pte) || !pte_present(pte)) |
| 313 | printk(KERN_CRIT "%s: bad page in kernel page table\n", | 508 | pr_crit("%s: bad page in kernel page table\n", |
| 314 | __func__); | 509 | __func__); |
| 315 | } while (size -= PAGE_SIZE); | 510 | } while (size -= PAGE_SIZE); |
| 316 | 511 | ||
| 317 | flush_tlb_kernel_range(c->vm_start, c->vm_end); | 512 | flush_tlb_kernel_range(c->vm_start, c->vm_end); |
| @@ -319,20 +514,182 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
| 319 | arm_vmregion_free(&consistent_head, c); | 514 | arm_vmregion_free(&consistent_head, c); |
| 320 | } | 515 | } |
| 321 | 516 | ||
| 517 | static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, | ||
| 518 | void *data) | ||
| 519 | { | ||
| 520 | struct page *page = virt_to_page(addr); | ||
| 521 | pgprot_t prot = *(pgprot_t *)data; | ||
| 522 | |||
| 523 | set_pte_ext(pte, mk_pte(page, prot), 0); | ||
| 524 | return 0; | ||
| 525 | } | ||
| 526 | |||
| 527 | static void __dma_remap(struct page *page, size_t size, pgprot_t prot) | ||
| 528 | { | ||
| 529 | unsigned long start = (unsigned long) page_address(page); | ||
| 530 | unsigned end = start + size; | ||
| 531 | |||
| 532 | apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); | ||
| 533 | dsb(); | ||
| 534 | flush_tlb_kernel_range(start, end); | ||
| 535 | } | ||
| 536 | |||
| 537 | static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, | ||
| 538 | pgprot_t prot, struct page **ret_page, | ||
| 539 | const void *caller) | ||
| 540 | { | ||
| 541 | struct page *page; | ||
| 542 | void *ptr; | ||
| 543 | page = __dma_alloc_buffer(dev, size, gfp); | ||
| 544 | if (!page) | ||
| 545 | return NULL; | ||
| 546 | |||
| 547 | ptr = __dma_alloc_remap(page, size, gfp, prot, caller); | ||
| 548 | if (!ptr) { | ||
| 549 | __dma_free_buffer(page, size); | ||
| 550 | return NULL; | ||
| 551 | } | ||
| 552 | |||
| 553 | *ret_page = page; | ||
| 554 | return ptr; | ||
| 555 | } | ||
| 556 | |||
| 557 | static void *__alloc_from_pool(struct device *dev, size_t size, | ||
| 558 | struct page **ret_page, const void *caller) | ||
| 559 | { | ||
| 560 | struct arm_vmregion *c; | ||
| 561 | size_t align; | ||
| 562 | |||
| 563 | if (!coherent_head.vm_start) { | ||
| 564 | printk(KERN_ERR "%s: coherent pool not initialised!\n", | ||
| 565 | __func__); | ||
| 566 | dump_stack(); | ||
| 567 | return NULL; | ||
| 568 | } | ||
| 569 | |||
| 570 | /* | ||
| 571 | * Align the region allocation - allocations from pool are rather | ||
| 572 | * small, so align them to their order in pages, minimum is a page | ||
| 573 | * size. This helps reduce fragmentation of the DMA space. | ||
| 574 | */ | ||
| 575 | align = PAGE_SIZE << get_order(size); | ||
| 576 | c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); | ||
| 577 | if (c) { | ||
| 578 | void *ptr = (void *)c->vm_start; | ||
| 579 | struct page *page = virt_to_page(ptr); | ||
| 580 | *ret_page = page; | ||
| 581 | return ptr; | ||
| 582 | } | ||
| 583 | return NULL; | ||
| 584 | } | ||
| 585 | |||
| 586 | static int __free_from_pool(void *cpu_addr, size_t size) | ||
| 587 | { | ||
| 588 | unsigned long start = (unsigned long)cpu_addr; | ||
| 589 | unsigned long end = start + size; | ||
| 590 | struct arm_vmregion *c; | ||
| 591 | |||
| 592 | if (start < coherent_head.vm_start || end > coherent_head.vm_end) | ||
| 593 | return 0; | ||
| 594 | |||
| 595 | c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); | ||
| 596 | |||
| 597 | if ((c->vm_end - c->vm_start) != size) { | ||
| 598 | printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", | ||
| 599 | __func__, c->vm_end - c->vm_start, size); | ||
| 600 | dump_stack(); | ||
| 601 | size = c->vm_end - c->vm_start; | ||
| 602 | } | ||
| 603 | |||
| 604 | arm_vmregion_free(&coherent_head, c); | ||
| 605 | return 1; | ||
| 606 | } | ||
| 607 | |||
| 608 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
| 609 | pgprot_t prot, struct page **ret_page) | ||
| 610 | { | ||
| 611 | unsigned long order = get_order(size); | ||
| 612 | size_t count = size >> PAGE_SHIFT; | ||
| 613 | struct page *page; | ||
| 614 | |||
| 615 | page = dma_alloc_from_contiguous(dev, count, order); | ||
| 616 | if (!page) | ||
| 617 | return NULL; | ||
| 618 | |||
| 619 | __dma_clear_buffer(page, size); | ||
| 620 | __dma_remap(page, size, prot); | ||
| 621 | |||
| 622 | *ret_page = page; | ||
| 623 | return page_address(page); | ||
| 624 | } | ||
| 625 | |||
| 626 | static void __free_from_contiguous(struct device *dev, struct page *page, | ||
| 627 | size_t size) | ||
| 628 | { | ||
| 629 | __dma_remap(page, size, pgprot_kernel); | ||
| 630 | dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); | ||
| 631 | } | ||
| 632 | |||
| 633 | static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) | ||
| 634 | { | ||
| 635 | prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? | ||
| 636 | pgprot_writecombine(prot) : | ||
| 637 | pgprot_dmacoherent(prot); | ||
| 638 | return prot; | ||
| 639 | } | ||
| 640 | |||
| 641 | #define nommu() 0 | ||
| 642 | |||
| 322 | #else /* !CONFIG_MMU */ | 643 | #else /* !CONFIG_MMU */ |
| 323 | 644 | ||
| 324 | #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) | 645 | #define nommu() 1 |
| 325 | #define __dma_free_remap(addr, size) do { } while (0) | 646 | |
| 647 | #define __get_dma_pgprot(attrs, prot) __pgprot(0) | ||
| 648 | #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL | ||
| 649 | #define __alloc_from_pool(dev, size, ret_page, c) NULL | ||
| 650 | #define __alloc_from_contiguous(dev, size, prot, ret) NULL | ||
| 651 | #define __free_from_pool(cpu_addr, size) 0 | ||
| 652 | #define __free_from_contiguous(dev, page, size) do { } while (0) | ||
| 653 | #define __dma_free_remap(cpu_addr, size) do { } while (0) | ||
| 326 | 654 | ||
| 327 | #endif /* CONFIG_MMU */ | 655 | #endif /* CONFIG_MMU */ |
| 328 | 656 | ||
| 329 | static void * | 657 | static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, |
| 330 | __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | 658 | struct page **ret_page) |
| 331 | pgprot_t prot, const void *caller) | 659 | { |
| 660 | struct page *page; | ||
| 661 | page = __dma_alloc_buffer(dev, size, gfp); | ||
| 662 | if (!page) | ||
| 663 | return NULL; | ||
| 664 | |||
| 665 | *ret_page = page; | ||
| 666 | return page_address(page); | ||
| 667 | } | ||
| 668 | |||
| 669 | |||
| 670 | |||
| 671 | static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | ||
| 672 | gfp_t gfp, pgprot_t prot, const void *caller) | ||
| 332 | { | 673 | { |
| 674 | u64 mask = get_coherent_dma_mask(dev); | ||
| 333 | struct page *page; | 675 | struct page *page; |
| 334 | void *addr; | 676 | void *addr; |
| 335 | 677 | ||
| 678 | #ifdef CONFIG_DMA_API_DEBUG | ||
| 679 | u64 limit = (mask + 1) & ~mask; | ||
| 680 | if (limit && size >= limit) { | ||
| 681 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
| 682 | size, mask); | ||
| 683 | return NULL; | ||
| 684 | } | ||
| 685 | #endif | ||
| 686 | |||
| 687 | if (!mask) | ||
| 688 | return NULL; | ||
| 689 | |||
| 690 | if (mask < 0xffffffffULL) | ||
| 691 | gfp |= GFP_DMA; | ||
| 692 | |||
| 336 | /* | 693 | /* |
| 337 | * Following is a work-around (a.k.a. hack) to prevent pages | 694 | * Following is a work-around (a.k.a. hack) to prevent pages |
| 338 | * with __GFP_COMP being passed to split_page() which cannot | 695 | * with __GFP_COMP being passed to split_page() which cannot |
| @@ -342,22 +699,20 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
| 342 | */ | 699 | */ |
| 343 | gfp &= ~(__GFP_COMP); | 700 | gfp &= ~(__GFP_COMP); |
| 344 | 701 | ||
| 345 | *handle = ~0; | 702 | *handle = DMA_ERROR_CODE; |
| 346 | size = PAGE_ALIGN(size); | 703 | size = PAGE_ALIGN(size); |
| 347 | 704 | ||
| 348 | page = __dma_alloc_buffer(dev, size, gfp); | 705 | if (arch_is_coherent() || nommu()) |
| 349 | if (!page) | 706 | addr = __alloc_simple_buffer(dev, size, gfp, &page); |
| 350 | return NULL; | 707 | else if (cpu_architecture() < CPU_ARCH_ARMv6) |
| 351 | 708 | addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); | |
| 352 | if (!arch_is_coherent()) | 709 | else if (gfp & GFP_ATOMIC) |
| 353 | addr = __dma_alloc_remap(page, size, gfp, prot, caller); | 710 | addr = __alloc_from_pool(dev, size, &page, caller); |
| 354 | else | 711 | else |
| 355 | addr = page_address(page); | 712 | addr = __alloc_from_contiguous(dev, size, prot, &page); |
| 356 | 713 | ||
| 357 | if (addr) | 714 | if (addr) |
| 358 | *handle = pfn_to_dma(dev, page_to_pfn(page)); | 715 | *handle = pfn_to_dma(dev, page_to_pfn(page)); |
| 359 | else | ||
| 360 | __dma_free_buffer(page, size); | ||
| 361 | 716 | ||
| 362 | return addr; | 717 | return addr; |
| 363 | } | 718 | } |
| @@ -366,138 +721,71 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
| 366 | * Allocate DMA-coherent memory space and return both the kernel remapped | 721 | * Allocate DMA-coherent memory space and return both the kernel remapped |
| 367 | * virtual and bus address for that space. | 722 | * virtual and bus address for that space. |
| 368 | */ | 723 | */ |
| 369 | void * | 724 | void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
| 370 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) | 725 | gfp_t gfp, struct dma_attrs *attrs) |
| 371 | { | 726 | { |
| 727 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | ||
| 372 | void *memory; | 728 | void *memory; |
| 373 | 729 | ||
| 374 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | 730 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) |
| 375 | return memory; | 731 | return memory; |
| 376 | 732 | ||
| 377 | return __dma_alloc(dev, size, handle, gfp, | 733 | return __dma_alloc(dev, size, handle, gfp, prot, |
| 378 | pgprot_dmacoherent(pgprot_kernel), | ||
| 379 | __builtin_return_address(0)); | 734 | __builtin_return_address(0)); |
| 380 | } | 735 | } |
| 381 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
| 382 | 736 | ||
| 383 | /* | 737 | /* |
| 384 | * Allocate a writecombining region, in much the same way as | 738 | * Create userspace mapping for the DMA-coherent memory. |
| 385 | * dma_alloc_coherent above. | ||
| 386 | */ | 739 | */ |
| 387 | void * | 740 | int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, |
| 388 | dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) | 741 | void *cpu_addr, dma_addr_t dma_addr, size_t size, |
| 389 | { | 742 | struct dma_attrs *attrs) |
| 390 | return __dma_alloc(dev, size, handle, gfp, | ||
| 391 | pgprot_writecombine(pgprot_kernel), | ||
| 392 | __builtin_return_address(0)); | ||
| 393 | } | ||
| 394 | EXPORT_SYMBOL(dma_alloc_writecombine); | ||
| 395 | |||
| 396 | static int dma_mmap(struct device *dev, struct vm_area_struct *vma, | ||
| 397 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
| 398 | { | 743 | { |
| 399 | int ret = -ENXIO; | 744 | int ret = -ENXIO; |
| 400 | #ifdef CONFIG_MMU | 745 | #ifdef CONFIG_MMU |
| 401 | unsigned long user_size, kern_size; | 746 | unsigned long pfn = dma_to_pfn(dev, dma_addr); |
| 402 | struct arm_vmregion *c; | 747 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); |
| 403 | 748 | ||
| 404 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 749 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) |
| 750 | return ret; | ||
| 405 | 751 | ||
| 406 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | 752 | ret = remap_pfn_range(vma, vma->vm_start, |
| 407 | if (c) { | 753 | pfn + vma->vm_pgoff, |
| 408 | unsigned long off = vma->vm_pgoff; | 754 | vma->vm_end - vma->vm_start, |
| 409 | 755 | vma->vm_page_prot); | |
| 410 | kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; | ||
| 411 | |||
| 412 | if (off < kern_size && | ||
| 413 | user_size <= (kern_size - off)) { | ||
| 414 | ret = remap_pfn_range(vma, vma->vm_start, | ||
| 415 | page_to_pfn(c->vm_pages) + off, | ||
| 416 | user_size << PAGE_SHIFT, | ||
| 417 | vma->vm_page_prot); | ||
| 418 | } | ||
| 419 | } | ||
| 420 | #endif /* CONFIG_MMU */ | 756 | #endif /* CONFIG_MMU */ |
| 421 | 757 | ||
| 422 | return ret; | 758 | return ret; |
| 423 | } | 759 | } |
| 424 | 760 | ||
| 425 | int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, | ||
| 426 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
| 427 | { | ||
| 428 | vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); | ||
| 429 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | ||
| 430 | } | ||
| 431 | EXPORT_SYMBOL(dma_mmap_coherent); | ||
| 432 | |||
| 433 | int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, | ||
| 434 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | ||
| 435 | { | ||
| 436 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | ||
| 437 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | ||
| 438 | } | ||
| 439 | EXPORT_SYMBOL(dma_mmap_writecombine); | ||
| 440 | |||
| 441 | /* | 761 | /* |
| 442 | * free a page as defined by the above mapping. | 762 | * Free a buffer as defined by the above mapping. |
| 443 | * Must not be called with IRQs disabled. | ||
| 444 | */ | 763 | */ |
| 445 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) | 764 | void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, |
| 765 | dma_addr_t handle, struct dma_attrs *attrs) | ||
| 446 | { | 766 | { |
| 447 | WARN_ON(irqs_disabled()); | 767 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); |
| 448 | 768 | ||
| 449 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) | 769 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) |
| 450 | return; | 770 | return; |
| 451 | 771 | ||
| 452 | size = PAGE_ALIGN(size); | 772 | size = PAGE_ALIGN(size); |
| 453 | 773 | ||
| 454 | if (!arch_is_coherent()) | 774 | if (arch_is_coherent() || nommu()) { |
| 775 | __dma_free_buffer(page, size); | ||
| 776 | } else if (cpu_architecture() < CPU_ARCH_ARMv6) { | ||
| 455 | __dma_free_remap(cpu_addr, size); | 777 | __dma_free_remap(cpu_addr, size); |
| 456 | 778 | __dma_free_buffer(page, size); | |
| 457 | __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); | ||
| 458 | } | ||
| 459 | EXPORT_SYMBOL(dma_free_coherent); | ||
| 460 | |||
| 461 | /* | ||
| 462 | * Make an area consistent for devices. | ||
| 463 | * Note: Drivers should NOT use this function directly, as it will break | ||
| 464 | * platforms with CONFIG_DMABOUNCE. | ||
| 465 | * Use the driver DMA support - see dma-mapping.h (dma_sync_*) | ||
| 466 | */ | ||
| 467 | void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, | ||
| 468 | enum dma_data_direction dir) | ||
| 469 | { | ||
| 470 | unsigned long paddr; | ||
| 471 | |||
| 472 | BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); | ||
| 473 | |||
| 474 | dmac_map_area(kaddr, size, dir); | ||
| 475 | |||
| 476 | paddr = __pa(kaddr); | ||
| 477 | if (dir == DMA_FROM_DEVICE) { | ||
| 478 | outer_inv_range(paddr, paddr + size); | ||
| 479 | } else { | 779 | } else { |
| 480 | outer_clean_range(paddr, paddr + size); | 780 | if (__free_from_pool(cpu_addr, size)) |
| 481 | } | 781 | return; |
| 482 | /* FIXME: non-speculating: flush on bidirectional mappings? */ | 782 | /* |
| 483 | } | 783 | * Non-atomic allocations cannot be freed with IRQs disabled |
| 484 | EXPORT_SYMBOL(___dma_single_cpu_to_dev); | 784 | */ |
| 485 | 785 | WARN_ON(irqs_disabled()); | |
| 486 | void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, | 786 | __free_from_contiguous(dev, page, size); |
| 487 | enum dma_data_direction dir) | ||
| 488 | { | ||
| 489 | BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); | ||
| 490 | |||
| 491 | /* FIXME: non-speculating: not required */ | ||
| 492 | /* don't bother invalidating if DMA to device */ | ||
| 493 | if (dir != DMA_TO_DEVICE) { | ||
| 494 | unsigned long paddr = __pa(kaddr); | ||
| 495 | outer_inv_range(paddr, paddr + size); | ||
| 496 | } | 787 | } |
| 497 | |||
| 498 | dmac_unmap_area(kaddr, size, dir); | ||
| 499 | } | 788 | } |
| 500 | EXPORT_SYMBOL(___dma_single_dev_to_cpu); | ||
| 501 | 789 | ||
| 502 | static void dma_cache_maint_page(struct page *page, unsigned long offset, | 790 | static void dma_cache_maint_page(struct page *page, unsigned long offset, |
| 503 | size_t size, enum dma_data_direction dir, | 791 | size_t size, enum dma_data_direction dir, |
| @@ -543,7 +831,13 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, | |||
| 543 | } while (left); | 831 | } while (left); |
| 544 | } | 832 | } |
| 545 | 833 | ||
| 546 | void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, | 834 | /* |
| 835 | * Make an area consistent for devices. | ||
| 836 | * Note: Drivers should NOT use this function directly, as it will break | ||
| 837 | * platforms with CONFIG_DMABOUNCE. | ||
| 838 | * Use the driver DMA support - see dma-mapping.h (dma_sync_*) | ||
| 839 | */ | ||
| 840 | static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, | ||
| 547 | size_t size, enum dma_data_direction dir) | 841 | size_t size, enum dma_data_direction dir) |
| 548 | { | 842 | { |
| 549 | unsigned long paddr; | 843 | unsigned long paddr; |
| @@ -558,9 +852,8 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, | |||
| 558 | } | 852 | } |
| 559 | /* FIXME: non-speculating: flush on bidirectional mappings? */ | 853 | /* FIXME: non-speculating: flush on bidirectional mappings? */ |
| 560 | } | 854 | } |
| 561 | EXPORT_SYMBOL(___dma_page_cpu_to_dev); | ||
| 562 | 855 | ||
| 563 | void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, | 856 | static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, |
| 564 | size_t size, enum dma_data_direction dir) | 857 | size_t size, enum dma_data_direction dir) |
| 565 | { | 858 | { |
| 566 | unsigned long paddr = page_to_phys(page) + off; | 859 | unsigned long paddr = page_to_phys(page) + off; |
| @@ -578,10 +871,9 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, | |||
| 578 | if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) | 871 | if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) |
| 579 | set_bit(PG_dcache_clean, &page->flags); | 872 | set_bit(PG_dcache_clean, &page->flags); |
| 580 | } | 873 | } |
| 581 | EXPORT_SYMBOL(___dma_page_dev_to_cpu); | ||
| 582 | 874 | ||
| 583 | /** | 875 | /** |
| 584 | * dma_map_sg - map a set of SG buffers for streaming mode DMA | 876 | * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA |
| 585 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 877 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 586 | * @sg: list of buffers | 878 | * @sg: list of buffers |
| 587 | * @nents: number of buffers to map | 879 | * @nents: number of buffers to map |
| @@ -596,32 +888,32 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); | |||
| 596 | * Device ownership issues as mentioned for dma_map_single are the same | 888 | * Device ownership issues as mentioned for dma_map_single are the same |
| 597 | * here. | 889 | * here. |
| 598 | */ | 890 | */ |
| 599 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, | 891 | int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, |
| 600 | enum dma_data_direction dir) | 892 | enum dma_data_direction dir, struct dma_attrs *attrs) |
| 601 | { | 893 | { |
| 894 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 602 | struct scatterlist *s; | 895 | struct scatterlist *s; |
| 603 | int i, j; | 896 | int i, j; |
| 604 | 897 | ||
| 605 | BUG_ON(!valid_dma_direction(dir)); | ||
| 606 | |||
| 607 | for_each_sg(sg, s, nents, i) { | 898 | for_each_sg(sg, s, nents, i) { |
| 608 | s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, | 899 | #ifdef CONFIG_NEED_SG_DMA_LENGTH |
| 609 | s->length, dir); | 900 | s->dma_length = s->length; |
| 901 | #endif | ||
| 902 | s->dma_address = ops->map_page(dev, sg_page(s), s->offset, | ||
| 903 | s->length, dir, attrs); | ||
| 610 | if (dma_mapping_error(dev, s->dma_address)) | 904 | if (dma_mapping_error(dev, s->dma_address)) |
| 611 | goto bad_mapping; | 905 | goto bad_mapping; |
| 612 | } | 906 | } |
| 613 | debug_dma_map_sg(dev, sg, nents, nents, dir); | ||
| 614 | return nents; | 907 | return nents; |
| 615 | 908 | ||
| 616 | bad_mapping: | 909 | bad_mapping: |
| 617 | for_each_sg(sg, s, i, j) | 910 | for_each_sg(sg, s, i, j) |
| 618 | __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); | 911 | ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); |
| 619 | return 0; | 912 | return 0; |
| 620 | } | 913 | } |
| 621 | EXPORT_SYMBOL(dma_map_sg); | ||
| 622 | 914 | ||
| 623 | /** | 915 | /** |
| 624 | * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg | 916 | * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg |
| 625 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 917 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 626 | * @sg: list of buffers | 918 | * @sg: list of buffers |
| 627 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) | 919 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) |
| @@ -630,70 +922,55 @@ EXPORT_SYMBOL(dma_map_sg); | |||
| 630 | * Unmap a set of streaming mode DMA translations. Again, CPU access | 922 | * Unmap a set of streaming mode DMA translations. Again, CPU access |
| 631 | * rules concerning calls here are the same as for dma_unmap_single(). | 923 | * rules concerning calls here are the same as for dma_unmap_single(). |
| 632 | */ | 924 | */ |
| 633 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, | 925 | void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, |
| 634 | enum dma_data_direction dir) | 926 | enum dma_data_direction dir, struct dma_attrs *attrs) |
| 635 | { | 927 | { |
| 928 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 636 | struct scatterlist *s; | 929 | struct scatterlist *s; |
| 637 | int i; | ||
| 638 | 930 | ||
| 639 | debug_dma_unmap_sg(dev, sg, nents, dir); | 931 | int i; |
| 640 | 932 | ||
| 641 | for_each_sg(sg, s, nents, i) | 933 | for_each_sg(sg, s, nents, i) |
| 642 | __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); | 934 | ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); |
| 643 | } | 935 | } |
| 644 | EXPORT_SYMBOL(dma_unmap_sg); | ||
| 645 | 936 | ||
| 646 | /** | 937 | /** |
| 647 | * dma_sync_sg_for_cpu | 938 | * arm_dma_sync_sg_for_cpu |
| 648 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 939 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 649 | * @sg: list of buffers | 940 | * @sg: list of buffers |
| 650 | * @nents: number of buffers to map (returned from dma_map_sg) | 941 | * @nents: number of buffers to map (returned from dma_map_sg) |
| 651 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | 942 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) |
| 652 | */ | 943 | */ |
| 653 | void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | 944 | void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, |
| 654 | int nents, enum dma_data_direction dir) | 945 | int nents, enum dma_data_direction dir) |
| 655 | { | 946 | { |
| 947 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 656 | struct scatterlist *s; | 948 | struct scatterlist *s; |
| 657 | int i; | 949 | int i; |
| 658 | 950 | ||
| 659 | for_each_sg(sg, s, nents, i) { | 951 | for_each_sg(sg, s, nents, i) |
| 660 | if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, | 952 | ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, |
| 661 | sg_dma_len(s), dir)) | 953 | dir); |
| 662 | continue; | ||
| 663 | |||
| 664 | __dma_page_dev_to_cpu(sg_page(s), s->offset, | ||
| 665 | s->length, dir); | ||
| 666 | } | ||
| 667 | |||
| 668 | debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); | ||
| 669 | } | 954 | } |
| 670 | EXPORT_SYMBOL(dma_sync_sg_for_cpu); | ||
| 671 | 955 | ||
| 672 | /** | 956 | /** |
| 673 | * dma_sync_sg_for_device | 957 | * arm_dma_sync_sg_for_device |
| 674 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 958 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
| 675 | * @sg: list of buffers | 959 | * @sg: list of buffers |
| 676 | * @nents: number of buffers to map (returned from dma_map_sg) | 960 | * @nents: number of buffers to map (returned from dma_map_sg) |
| 677 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | 961 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) |
| 678 | */ | 962 | */ |
| 679 | void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | 963 | void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, |
| 680 | int nents, enum dma_data_direction dir) | 964 | int nents, enum dma_data_direction dir) |
| 681 | { | 965 | { |
| 966 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
| 682 | struct scatterlist *s; | 967 | struct scatterlist *s; |
| 683 | int i; | 968 | int i; |
| 684 | 969 | ||
| 685 | for_each_sg(sg, s, nents, i) { | 970 | for_each_sg(sg, s, nents, i) |
| 686 | if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, | 971 | ops->sync_single_for_device(dev, sg_dma_address(s), s->length, |
| 687 | sg_dma_len(s), dir)) | 972 | dir); |
| 688 | continue; | ||
| 689 | |||
| 690 | __dma_page_cpu_to_dev(sg_page(s), s->offset, | ||
| 691 | s->length, dir); | ||
| 692 | } | ||
| 693 | |||
| 694 | debug_dma_sync_sg_for_device(dev, sg, nents, dir); | ||
| 695 | } | 973 | } |
| 696 | EXPORT_SYMBOL(dma_sync_sg_for_device); | ||
| 697 | 974 | ||
| 698 | /* | 975 | /* |
| 699 | * Return whether the given device DMA address mask can be supported | 976 | * Return whether the given device DMA address mask can be supported |
| @@ -709,18 +986,15 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 709 | } | 986 | } |
| 710 | EXPORT_SYMBOL(dma_supported); | 987 | EXPORT_SYMBOL(dma_supported); |
| 711 | 988 | ||
| 712 | int dma_set_mask(struct device *dev, u64 dma_mask) | 989 | static int arm_dma_set_mask(struct device *dev, u64 dma_mask) |
| 713 | { | 990 | { |
| 714 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | 991 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) |
| 715 | return -EIO; | 992 | return -EIO; |
| 716 | 993 | ||
| 717 | #ifndef CONFIG_DMABOUNCE | ||
| 718 | *dev->dma_mask = dma_mask; | 994 | *dev->dma_mask = dma_mask; |
| 719 | #endif | ||
| 720 | 995 | ||
| 721 | return 0; | 996 | return 0; |
| 722 | } | 997 | } |
| 723 | EXPORT_SYMBOL(dma_set_mask); | ||
| 724 | 998 | ||
| 725 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 | 999 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 |
| 726 | 1000 | ||
| @@ -733,3 +1007,679 @@ static int __init dma_debug_do_init(void) | |||
| 733 | return 0; | 1007 | return 0; |
| 734 | } | 1008 | } |
| 735 | fs_initcall(dma_debug_do_init); | 1009 | fs_initcall(dma_debug_do_init); |
| 1010 | |||
| 1011 | #ifdef CONFIG_ARM_DMA_USE_IOMMU | ||
| 1012 | |||
| 1013 | /* IOMMU */ | ||
| 1014 | |||
| 1015 | static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, | ||
| 1016 | size_t size) | ||
| 1017 | { | ||
| 1018 | unsigned int order = get_order(size); | ||
| 1019 | unsigned int align = 0; | ||
| 1020 | unsigned int count, start; | ||
| 1021 | unsigned long flags; | ||
| 1022 | |||
| 1023 | count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + | ||
| 1024 | (1 << mapping->order) - 1) >> mapping->order; | ||
| 1025 | |||
| 1026 | if (order > mapping->order) | ||
| 1027 | align = (1 << (order - mapping->order)) - 1; | ||
| 1028 | |||
| 1029 | spin_lock_irqsave(&mapping->lock, flags); | ||
| 1030 | start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, | ||
| 1031 | count, align); | ||
| 1032 | if (start > mapping->bits) { | ||
| 1033 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
| 1034 | return DMA_ERROR_CODE; | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | bitmap_set(mapping->bitmap, start, count); | ||
| 1038 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
| 1039 | |||
| 1040 | return mapping->base + (start << (mapping->order + PAGE_SHIFT)); | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | static inline void __free_iova(struct dma_iommu_mapping *mapping, | ||
| 1044 | dma_addr_t addr, size_t size) | ||
| 1045 | { | ||
| 1046 | unsigned int start = (addr - mapping->base) >> | ||
| 1047 | (mapping->order + PAGE_SHIFT); | ||
| 1048 | unsigned int count = ((size >> PAGE_SHIFT) + | ||
| 1049 | (1 << mapping->order) - 1) >> mapping->order; | ||
| 1050 | unsigned long flags; | ||
| 1051 | |||
| 1052 | spin_lock_irqsave(&mapping->lock, flags); | ||
| 1053 | bitmap_clear(mapping->bitmap, start, count); | ||
| 1054 | spin_unlock_irqrestore(&mapping->lock, flags); | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) | ||
| 1058 | { | ||
| 1059 | struct page **pages; | ||
| 1060 | int count = size >> PAGE_SHIFT; | ||
| 1061 | int array_size = count * sizeof(struct page *); | ||
| 1062 | int i = 0; | ||
| 1063 | |||
| 1064 | if (array_size <= PAGE_SIZE) | ||
| 1065 | pages = kzalloc(array_size, gfp); | ||
| 1066 | else | ||
| 1067 | pages = vzalloc(array_size); | ||
| 1068 | if (!pages) | ||
| 1069 | return NULL; | ||
| 1070 | |||
| 1071 | while (count) { | ||
| 1072 | int j, order = __ffs(count); | ||
| 1073 | |||
| 1074 | pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); | ||
| 1075 | while (!pages[i] && order) | ||
| 1076 | pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); | ||
| 1077 | if (!pages[i]) | ||
| 1078 | goto error; | ||
| 1079 | |||
| 1080 | if (order) | ||
| 1081 | split_page(pages[i], order); | ||
| 1082 | j = 1 << order; | ||
| 1083 | while (--j) | ||
| 1084 | pages[i + j] = pages[i] + j; | ||
| 1085 | |||
| 1086 | __dma_clear_buffer(pages[i], PAGE_SIZE << order); | ||
| 1087 | i += 1 << order; | ||
| 1088 | count -= 1 << order; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | return pages; | ||
| 1092 | error: | ||
| 1093 | while (--i) | ||
| 1094 | if (pages[i]) | ||
| 1095 | __free_pages(pages[i], 0); | ||
| 1096 | if (array_size < PAGE_SIZE) | ||
| 1097 | kfree(pages); | ||
| 1098 | else | ||
| 1099 | vfree(pages); | ||
| 1100 | return NULL; | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) | ||
| 1104 | { | ||
| 1105 | int count = size >> PAGE_SHIFT; | ||
| 1106 | int array_size = count * sizeof(struct page *); | ||
| 1107 | int i; | ||
| 1108 | for (i = 0; i < count; i++) | ||
| 1109 | if (pages[i]) | ||
| 1110 | __free_pages(pages[i], 0); | ||
| 1111 | if (array_size < PAGE_SIZE) | ||
| 1112 | kfree(pages); | ||
| 1113 | else | ||
| 1114 | vfree(pages); | ||
| 1115 | return 0; | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | /* | ||
| 1119 | * Create a CPU mapping for a specified pages | ||
| 1120 | */ | ||
| 1121 | static void * | ||
| 1122 | __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) | ||
| 1123 | { | ||
| 1124 | struct arm_vmregion *c; | ||
| 1125 | size_t align; | ||
| 1126 | size_t count = size >> PAGE_SHIFT; | ||
| 1127 | int bit; | ||
| 1128 | |||
| 1129 | if (!consistent_pte[0]) { | ||
| 1130 | pr_err("%s: not initialised\n", __func__); | ||
| 1131 | dump_stack(); | ||
| 1132 | return NULL; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | /* | ||
| 1136 | * Align the virtual region allocation - maximum alignment is | ||
| 1137 | * a section size, minimum is a page size. This helps reduce | ||
| 1138 | * fragmentation of the DMA space, and also prevents allocations | ||
| 1139 | * smaller than a section from crossing a section boundary. | ||
| 1140 | */ | ||
| 1141 | bit = fls(size - 1); | ||
| 1142 | if (bit > SECTION_SHIFT) | ||
| 1143 | bit = SECTION_SHIFT; | ||
| 1144 | align = 1 << bit; | ||
| 1145 | |||
| 1146 | /* | ||
| 1147 | * Allocate a virtual address in the consistent mapping region. | ||
| 1148 | */ | ||
| 1149 | c = arm_vmregion_alloc(&consistent_head, align, size, | ||
| 1150 | gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); | ||
| 1151 | if (c) { | ||
| 1152 | pte_t *pte; | ||
| 1153 | int idx = CONSISTENT_PTE_INDEX(c->vm_start); | ||
| 1154 | int i = 0; | ||
| 1155 | u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); | ||
| 1156 | |||
| 1157 | pte = consistent_pte[idx] + off; | ||
| 1158 | c->priv = pages; | ||
| 1159 | |||
| 1160 | do { | ||
| 1161 | BUG_ON(!pte_none(*pte)); | ||
| 1162 | |||
| 1163 | set_pte_ext(pte, mk_pte(pages[i], prot), 0); | ||
| 1164 | pte++; | ||
| 1165 | off++; | ||
| 1166 | i++; | ||
| 1167 | if (off >= PTRS_PER_PTE) { | ||
| 1168 | off = 0; | ||
| 1169 | pte = consistent_pte[++idx]; | ||
| 1170 | } | ||
| 1171 | } while (i < count); | ||
| 1172 | |||
| 1173 | dsb(); | ||
| 1174 | |||
| 1175 | return (void *)c->vm_start; | ||
| 1176 | } | ||
| 1177 | return NULL; | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | /* | ||
| 1181 | * Create a mapping in device IO address space for specified pages | ||
| 1182 | */ | ||
| 1183 | static dma_addr_t | ||
| 1184 | __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) | ||
| 1185 | { | ||
| 1186 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1187 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 1188 | dma_addr_t dma_addr, iova; | ||
| 1189 | int i, ret = DMA_ERROR_CODE; | ||
| 1190 | |||
| 1191 | dma_addr = __alloc_iova(mapping, size); | ||
| 1192 | if (dma_addr == DMA_ERROR_CODE) | ||
| 1193 | return dma_addr; | ||
| 1194 | |||
| 1195 | iova = dma_addr; | ||
| 1196 | for (i = 0; i < count; ) { | ||
| 1197 | unsigned int next_pfn = page_to_pfn(pages[i]) + 1; | ||
| 1198 | phys_addr_t phys = page_to_phys(pages[i]); | ||
| 1199 | unsigned int len, j; | ||
| 1200 | |||
| 1201 | for (j = i + 1; j < count; j++, next_pfn++) | ||
| 1202 | if (page_to_pfn(pages[j]) != next_pfn) | ||
| 1203 | break; | ||
| 1204 | |||
| 1205 | len = (j - i) << PAGE_SHIFT; | ||
| 1206 | ret = iommu_map(mapping->domain, iova, phys, len, 0); | ||
| 1207 | if (ret < 0) | ||
| 1208 | goto fail; | ||
| 1209 | iova += len; | ||
| 1210 | i = j; | ||
| 1211 | } | ||
| 1212 | return dma_addr; | ||
| 1213 | fail: | ||
| 1214 | iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); | ||
| 1215 | __free_iova(mapping, dma_addr, size); | ||
| 1216 | return DMA_ERROR_CODE; | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) | ||
| 1220 | { | ||
| 1221 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1222 | |||
| 1223 | /* | ||
| 1224 | * add optional in-page offset from iova to size and align | ||
| 1225 | * result to page size | ||
| 1226 | */ | ||
| 1227 | size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); | ||
| 1228 | iova &= PAGE_MASK; | ||
| 1229 | |||
| 1230 | iommu_unmap(mapping->domain, iova, size); | ||
| 1231 | __free_iova(mapping, iova, size); | ||
| 1232 | return 0; | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, | ||
| 1236 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) | ||
| 1237 | { | ||
| 1238 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | ||
| 1239 | struct page **pages; | ||
| 1240 | void *addr = NULL; | ||
| 1241 | |||
| 1242 | *handle = DMA_ERROR_CODE; | ||
| 1243 | size = PAGE_ALIGN(size); | ||
| 1244 | |||
| 1245 | pages = __iommu_alloc_buffer(dev, size, gfp); | ||
| 1246 | if (!pages) | ||
| 1247 | return NULL; | ||
| 1248 | |||
| 1249 | *handle = __iommu_create_mapping(dev, pages, size); | ||
| 1250 | if (*handle == DMA_ERROR_CODE) | ||
| 1251 | goto err_buffer; | ||
| 1252 | |||
| 1253 | addr = __iommu_alloc_remap(pages, size, gfp, prot); | ||
| 1254 | if (!addr) | ||
| 1255 | goto err_mapping; | ||
| 1256 | |||
| 1257 | return addr; | ||
| 1258 | |||
| 1259 | err_mapping: | ||
| 1260 | __iommu_remove_mapping(dev, *handle, size); | ||
| 1261 | err_buffer: | ||
| 1262 | __iommu_free_buffer(dev, pages, size); | ||
| 1263 | return NULL; | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, | ||
| 1267 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | ||
| 1268 | struct dma_attrs *attrs) | ||
| 1269 | { | ||
| 1270 | struct arm_vmregion *c; | ||
| 1271 | |||
| 1272 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); | ||
| 1273 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | ||
| 1274 | |||
| 1275 | if (c) { | ||
| 1276 | struct page **pages = c->priv; | ||
| 1277 | |||
| 1278 | unsigned long uaddr = vma->vm_start; | ||
| 1279 | unsigned long usize = vma->vm_end - vma->vm_start; | ||
| 1280 | int i = 0; | ||
| 1281 | |||
| 1282 | do { | ||
| 1283 | int ret; | ||
| 1284 | |||
| 1285 | ret = vm_insert_page(vma, uaddr, pages[i++]); | ||
| 1286 | if (ret) { | ||
| 1287 | pr_err("Remapping memory, error: %d\n", ret); | ||
| 1288 | return ret; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | uaddr += PAGE_SIZE; | ||
| 1292 | usize -= PAGE_SIZE; | ||
| 1293 | } while (usize > 0); | ||
| 1294 | } | ||
| 1295 | return 0; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | /* | ||
| 1299 | * free a page as defined by the above mapping. | ||
| 1300 | * Must not be called with IRQs disabled. | ||
| 1301 | */ | ||
| 1302 | void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | ||
| 1303 | dma_addr_t handle, struct dma_attrs *attrs) | ||
| 1304 | { | ||
| 1305 | struct arm_vmregion *c; | ||
| 1306 | size = PAGE_ALIGN(size); | ||
| 1307 | |||
| 1308 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | ||
| 1309 | if (c) { | ||
| 1310 | struct page **pages = c->priv; | ||
| 1311 | __dma_free_remap(cpu_addr, size); | ||
| 1312 | __iommu_remove_mapping(dev, handle, size); | ||
| 1313 | __iommu_free_buffer(dev, pages, size); | ||
| 1314 | } | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | /* | ||
| 1318 | * Map a part of the scatter-gather list into contiguous io address space | ||
| 1319 | */ | ||
| 1320 | static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, | ||
| 1321 | size_t size, dma_addr_t *handle, | ||
| 1322 | enum dma_data_direction dir) | ||
| 1323 | { | ||
| 1324 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1325 | dma_addr_t iova, iova_base; | ||
| 1326 | int ret = 0; | ||
| 1327 | unsigned int count; | ||
| 1328 | struct scatterlist *s; | ||
| 1329 | |||
| 1330 | size = PAGE_ALIGN(size); | ||
| 1331 | *handle = DMA_ERROR_CODE; | ||
| 1332 | |||
| 1333 | iova_base = iova = __alloc_iova(mapping, size); | ||
| 1334 | if (iova == DMA_ERROR_CODE) | ||
| 1335 | return -ENOMEM; | ||
| 1336 | |||
| 1337 | for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { | ||
| 1338 | phys_addr_t phys = page_to_phys(sg_page(s)); | ||
| 1339 | unsigned int len = PAGE_ALIGN(s->offset + s->length); | ||
| 1340 | |||
| 1341 | if (!arch_is_coherent()) | ||
| 1342 | __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); | ||
| 1343 | |||
| 1344 | ret = iommu_map(mapping->domain, iova, phys, len, 0); | ||
| 1345 | if (ret < 0) | ||
| 1346 | goto fail; | ||
| 1347 | count += len >> PAGE_SHIFT; | ||
| 1348 | iova += len; | ||
| 1349 | } | ||
| 1350 | *handle = iova_base; | ||
| 1351 | |||
| 1352 | return 0; | ||
| 1353 | fail: | ||
| 1354 | iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); | ||
| 1355 | __free_iova(mapping, iova_base, size); | ||
| 1356 | return ret; | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | /** | ||
| 1360 | * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA | ||
| 1361 | * @dev: valid struct device pointer | ||
| 1362 | * @sg: list of buffers | ||
| 1363 | * @nents: number of buffers to map | ||
| 1364 | * @dir: DMA transfer direction | ||
| 1365 | * | ||
| 1366 | * Map a set of buffers described by scatterlist in streaming mode for DMA. | ||
| 1367 | * The scatter gather list elements are merged together (if possible) and | ||
| 1368 | * tagged with the appropriate dma address and length. They are obtained via | ||
| 1369 | * sg_dma_{address,length}. | ||
| 1370 | */ | ||
| 1371 | int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
| 1372 | enum dma_data_direction dir, struct dma_attrs *attrs) | ||
| 1373 | { | ||
| 1374 | struct scatterlist *s = sg, *dma = sg, *start = sg; | ||
| 1375 | int i, count = 0; | ||
| 1376 | unsigned int offset = s->offset; | ||
| 1377 | unsigned int size = s->offset + s->length; | ||
| 1378 | unsigned int max = dma_get_max_seg_size(dev); | ||
| 1379 | |||
| 1380 | for (i = 1; i < nents; i++) { | ||
| 1381 | s = sg_next(s); | ||
| 1382 | |||
| 1383 | s->dma_address = DMA_ERROR_CODE; | ||
| 1384 | s->dma_length = 0; | ||
| 1385 | |||
| 1386 | if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { | ||
| 1387 | if (__map_sg_chunk(dev, start, size, &dma->dma_address, | ||
| 1388 | dir) < 0) | ||
| 1389 | goto bad_mapping; | ||
| 1390 | |||
| 1391 | dma->dma_address += offset; | ||
| 1392 | dma->dma_length = size - offset; | ||
| 1393 | |||
| 1394 | size = offset = s->offset; | ||
| 1395 | start = s; | ||
| 1396 | dma = sg_next(dma); | ||
| 1397 | count += 1; | ||
| 1398 | } | ||
| 1399 | size += s->length; | ||
| 1400 | } | ||
| 1401 | if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) | ||
| 1402 | goto bad_mapping; | ||
| 1403 | |||
| 1404 | dma->dma_address += offset; | ||
| 1405 | dma->dma_length = size - offset; | ||
| 1406 | |||
| 1407 | return count+1; | ||
| 1408 | |||
| 1409 | bad_mapping: | ||
| 1410 | for_each_sg(sg, s, count, i) | ||
| 1411 | __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); | ||
| 1412 | return 0; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | /** | ||
| 1416 | * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg | ||
| 1417 | * @dev: valid struct device pointer | ||
| 1418 | * @sg: list of buffers | ||
| 1419 | * @nents: number of buffers to unmap (same as was passed to dma_map_sg) | ||
| 1420 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
| 1421 | * | ||
| 1422 | * Unmap a set of streaming mode DMA translations. Again, CPU access | ||
| 1423 | * rules concerning calls here are the same as for dma_unmap_single(). | ||
| 1424 | */ | ||
| 1425 | void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
| 1426 | enum dma_data_direction dir, struct dma_attrs *attrs) | ||
| 1427 | { | ||
| 1428 | struct scatterlist *s; | ||
| 1429 | int i; | ||
| 1430 | |||
| 1431 | for_each_sg(sg, s, nents, i) { | ||
| 1432 | if (sg_dma_len(s)) | ||
| 1433 | __iommu_remove_mapping(dev, sg_dma_address(s), | ||
| 1434 | sg_dma_len(s)); | ||
| 1435 | if (!arch_is_coherent()) | ||
| 1436 | __dma_page_dev_to_cpu(sg_page(s), s->offset, | ||
| 1437 | s->length, dir); | ||
| 1438 | } | ||
| 1439 | } | ||
| 1440 | |||
| 1441 | /** | ||
| 1442 | * arm_iommu_sync_sg_for_cpu | ||
| 1443 | * @dev: valid struct device pointer | ||
| 1444 | * @sg: list of buffers | ||
| 1445 | * @nents: number of buffers to map (returned from dma_map_sg) | ||
| 1446 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
| 1447 | */ | ||
| 1448 | void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | ||
| 1449 | int nents, enum dma_data_direction dir) | ||
| 1450 | { | ||
| 1451 | struct scatterlist *s; | ||
| 1452 | int i; | ||
| 1453 | |||
| 1454 | for_each_sg(sg, s, nents, i) | ||
| 1455 | if (!arch_is_coherent()) | ||
| 1456 | __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); | ||
| 1457 | |||
| 1458 | } | ||
| 1459 | |||
| 1460 | /** | ||
| 1461 | * arm_iommu_sync_sg_for_device | ||
| 1462 | * @dev: valid struct device pointer | ||
| 1463 | * @sg: list of buffers | ||
| 1464 | * @nents: number of buffers to map (returned from dma_map_sg) | ||
| 1465 | * @dir: DMA transfer direction (same as was passed to dma_map_sg) | ||
| 1466 | */ | ||
| 1467 | void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | ||
| 1468 | int nents, enum dma_data_direction dir) | ||
| 1469 | { | ||
| 1470 | struct scatterlist *s; | ||
| 1471 | int i; | ||
| 1472 | |||
| 1473 | for_each_sg(sg, s, nents, i) | ||
| 1474 | if (!arch_is_coherent()) | ||
| 1475 | __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); | ||
| 1476 | } | ||
| 1477 | |||
| 1478 | |||
| 1479 | /** | ||
| 1480 | * arm_iommu_map_page | ||
| 1481 | * @dev: valid struct device pointer | ||
| 1482 | * @page: page that buffer resides in | ||
| 1483 | * @offset: offset into page for start of buffer | ||
| 1484 | * @size: size of buffer to map | ||
| 1485 | * @dir: DMA transfer direction | ||
| 1486 | * | ||
| 1487 | * IOMMU aware version of arm_dma_map_page() | ||
| 1488 | */ | ||
| 1489 | static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, | ||
| 1490 | unsigned long offset, size_t size, enum dma_data_direction dir, | ||
| 1491 | struct dma_attrs *attrs) | ||
| 1492 | { | ||
| 1493 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1494 | dma_addr_t dma_addr; | ||
| 1495 | int ret, len = PAGE_ALIGN(size + offset); | ||
| 1496 | |||
| 1497 | if (!arch_is_coherent()) | ||
| 1498 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
| 1499 | |||
| 1500 | dma_addr = __alloc_iova(mapping, len); | ||
| 1501 | if (dma_addr == DMA_ERROR_CODE) | ||
| 1502 | return dma_addr; | ||
| 1503 | |||
| 1504 | ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); | ||
| 1505 | if (ret < 0) | ||
| 1506 | goto fail; | ||
| 1507 | |||
| 1508 | return dma_addr + offset; | ||
| 1509 | fail: | ||
| 1510 | __free_iova(mapping, dma_addr, len); | ||
| 1511 | return DMA_ERROR_CODE; | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | /** | ||
| 1515 | * arm_iommu_unmap_page | ||
| 1516 | * @dev: valid struct device pointer | ||
| 1517 | * @handle: DMA address of buffer | ||
| 1518 | * @size: size of buffer (same as passed to dma_map_page) | ||
| 1519 | * @dir: DMA transfer direction (same as passed to dma_map_page) | ||
| 1520 | * | ||
| 1521 | * IOMMU aware version of arm_dma_unmap_page() | ||
| 1522 | */ | ||
| 1523 | static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, | ||
| 1524 | size_t size, enum dma_data_direction dir, | ||
| 1525 | struct dma_attrs *attrs) | ||
| 1526 | { | ||
| 1527 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1528 | dma_addr_t iova = handle & PAGE_MASK; | ||
| 1529 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
| 1530 | int offset = handle & ~PAGE_MASK; | ||
| 1531 | int len = PAGE_ALIGN(size + offset); | ||
| 1532 | |||
| 1533 | if (!iova) | ||
| 1534 | return; | ||
| 1535 | |||
| 1536 | if (!arch_is_coherent()) | ||
| 1537 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
| 1538 | |||
| 1539 | iommu_unmap(mapping->domain, iova, len); | ||
| 1540 | __free_iova(mapping, iova, len); | ||
| 1541 | } | ||
| 1542 | |||
| 1543 | static void arm_iommu_sync_single_for_cpu(struct device *dev, | ||
| 1544 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 1545 | { | ||
| 1546 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1547 | dma_addr_t iova = handle & PAGE_MASK; | ||
| 1548 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
| 1549 | unsigned int offset = handle & ~PAGE_MASK; | ||
| 1550 | |||
| 1551 | if (!iova) | ||
| 1552 | return; | ||
| 1553 | |||
| 1554 | if (!arch_is_coherent()) | ||
| 1555 | __dma_page_dev_to_cpu(page, offset, size, dir); | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | static void arm_iommu_sync_single_for_device(struct device *dev, | ||
| 1559 | dma_addr_t handle, size_t size, enum dma_data_direction dir) | ||
| 1560 | { | ||
| 1561 | struct dma_iommu_mapping *mapping = dev->archdata.mapping; | ||
| 1562 | dma_addr_t iova = handle & PAGE_MASK; | ||
| 1563 | struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); | ||
| 1564 | unsigned int offset = handle & ~PAGE_MASK; | ||
| 1565 | |||
| 1566 | if (!iova) | ||
| 1567 | return; | ||
| 1568 | |||
| 1569 | __dma_page_cpu_to_dev(page, offset, size, dir); | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | struct dma_map_ops iommu_ops = { | ||
| 1573 | .alloc = arm_iommu_alloc_attrs, | ||
| 1574 | .free = arm_iommu_free_attrs, | ||
| 1575 | .mmap = arm_iommu_mmap_attrs, | ||
| 1576 | |||
| 1577 | .map_page = arm_iommu_map_page, | ||
| 1578 | .unmap_page = arm_iommu_unmap_page, | ||
| 1579 | .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, | ||
| 1580 | .sync_single_for_device = arm_iommu_sync_single_for_device, | ||
| 1581 | |||
| 1582 | .map_sg = arm_iommu_map_sg, | ||
| 1583 | .unmap_sg = arm_iommu_unmap_sg, | ||
| 1584 | .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, | ||
| 1585 | .sync_sg_for_device = arm_iommu_sync_sg_for_device, | ||
| 1586 | }; | ||
| 1587 | |||
| 1588 | /** | ||
| 1589 | * arm_iommu_create_mapping | ||
| 1590 | * @bus: pointer to the bus holding the client device (for IOMMU calls) | ||
| 1591 | * @base: start address of the valid IO address space | ||
| 1592 | * @size: size of the valid IO address space | ||
| 1593 | * @order: accuracy of the IO addresses allocations | ||
| 1594 | * | ||
| 1595 | * Creates a mapping structure which holds information about used/unused | ||
| 1596 | * IO address ranges, which is required to perform memory allocation and | ||
| 1597 | * mapping with IOMMU aware functions. | ||
| 1598 | * | ||
| 1599 | * The client device need to be attached to the mapping with | ||
| 1600 | * arm_iommu_attach_device function. | ||
| 1601 | */ | ||
| 1602 | struct dma_iommu_mapping * | ||
| 1603 | arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, | ||
| 1604 | int order) | ||
| 1605 | { | ||
| 1606 | unsigned int count = size >> (PAGE_SHIFT + order); | ||
| 1607 | unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); | ||
| 1608 | struct dma_iommu_mapping *mapping; | ||
| 1609 | int err = -ENOMEM; | ||
| 1610 | |||
| 1611 | if (!count) | ||
| 1612 | return ERR_PTR(-EINVAL); | ||
| 1613 | |||
| 1614 | mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); | ||
| 1615 | if (!mapping) | ||
| 1616 | goto err; | ||
| 1617 | |||
| 1618 | mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
| 1619 | if (!mapping->bitmap) | ||
| 1620 | goto err2; | ||
| 1621 | |||
| 1622 | mapping->base = base; | ||
| 1623 | mapping->bits = BITS_PER_BYTE * bitmap_size; | ||
| 1624 | mapping->order = order; | ||
| 1625 | spin_lock_init(&mapping->lock); | ||
| 1626 | |||
| 1627 | mapping->domain = iommu_domain_alloc(bus); | ||
| 1628 | if (!mapping->domain) | ||
| 1629 | goto err3; | ||
| 1630 | |||
| 1631 | kref_init(&mapping->kref); | ||
| 1632 | return mapping; | ||
| 1633 | err3: | ||
| 1634 | kfree(mapping->bitmap); | ||
| 1635 | err2: | ||
| 1636 | kfree(mapping); | ||
| 1637 | err: | ||
| 1638 | return ERR_PTR(err); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | static void release_iommu_mapping(struct kref *kref) | ||
| 1642 | { | ||
| 1643 | struct dma_iommu_mapping *mapping = | ||
| 1644 | container_of(kref, struct dma_iommu_mapping, kref); | ||
| 1645 | |||
| 1646 | iommu_domain_free(mapping->domain); | ||
| 1647 | kfree(mapping->bitmap); | ||
| 1648 | kfree(mapping); | ||
| 1649 | } | ||
| 1650 | |||
| 1651 | void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) | ||
| 1652 | { | ||
| 1653 | if (mapping) | ||
| 1654 | kref_put(&mapping->kref, release_iommu_mapping); | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | /** | ||
| 1658 | * arm_iommu_attach_device | ||
| 1659 | * @dev: valid struct device pointer | ||
| 1660 | * @mapping: io address space mapping structure (returned from | ||
| 1661 | * arm_iommu_create_mapping) | ||
| 1662 | * | ||
| 1663 | * Attaches specified io address space mapping to the provided device, | ||
| 1664 | * this replaces the dma operations (dma_map_ops pointer) with the | ||
| 1665 | * IOMMU aware version. More than one client might be attached to | ||
| 1666 | * the same io address space mapping. | ||
| 1667 | */ | ||
| 1668 | int arm_iommu_attach_device(struct device *dev, | ||
| 1669 | struct dma_iommu_mapping *mapping) | ||
| 1670 | { | ||
| 1671 | int err; | ||
| 1672 | |||
| 1673 | err = iommu_attach_device(mapping->domain, dev); | ||
| 1674 | if (err) | ||
| 1675 | return err; | ||
| 1676 | |||
| 1677 | kref_get(&mapping->kref); | ||
| 1678 | dev->archdata.mapping = mapping; | ||
| 1679 | set_dma_ops(dev, &iommu_ops); | ||
| 1680 | |||
| 1681 | pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); | ||
| 1682 | return 0; | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | #endif | ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8f5813bbffb5..c21d06c7dd7e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/highmem.h> | 20 | #include <linux/highmem.h> |
| 21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
| 22 | #include <linux/memblock.h> | 22 | #include <linux/memblock.h> |
| 23 | #include <linux/dma-contiguous.h> | ||
| 23 | 24 | ||
| 24 | #include <asm/mach-types.h> | 25 | #include <asm/mach-types.h> |
| 25 | #include <asm/memblock.h> | 26 | #include <asm/memblock.h> |
| @@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, | |||
| 226 | } | 227 | } |
| 227 | #endif | 228 | #endif |
| 228 | 229 | ||
| 230 | void __init setup_dma_zone(struct machine_desc *mdesc) | ||
| 231 | { | ||
| 232 | #ifdef CONFIG_ZONE_DMA | ||
| 233 | if (mdesc->dma_zone_size) { | ||
| 234 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
| 235 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
| 236 | } else | ||
| 237 | arm_dma_limit = 0xffffffff; | ||
| 238 | #endif | ||
| 239 | } | ||
| 240 | |||
| 229 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | 241 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, |
| 230 | unsigned long max_high) | 242 | unsigned long max_high) |
| 231 | { | 243 | { |
| @@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | |||
| 273 | * Adjust the sizes according to any special requirements for | 285 | * Adjust the sizes according to any special requirements for |
| 274 | * this machine type. | 286 | * this machine type. |
| 275 | */ | 287 | */ |
| 276 | if (arm_dma_zone_size) { | 288 | if (arm_dma_zone_size) |
| 277 | arm_adjust_dma_zone(zone_size, zhole_size, | 289 | arm_adjust_dma_zone(zone_size, zhole_size, |
| 278 | arm_dma_zone_size >> PAGE_SHIFT); | 290 | arm_dma_zone_size >> PAGE_SHIFT); |
| 279 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
| 280 | } else | ||
| 281 | arm_dma_limit = 0xffffffff; | ||
| 282 | #endif | 291 | #endif |
| 283 | 292 | ||
| 284 | free_area_init_node(0, zone_size, min, zhole_size); | 293 | free_area_init_node(0, zone_size, min, zhole_size); |
| @@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) | |||
| 364 | if (mdesc->reserve) | 373 | if (mdesc->reserve) |
| 365 | mdesc->reserve(); | 374 | mdesc->reserve(); |
| 366 | 375 | ||
| 376 | /* | ||
| 377 | * reserve memory for DMA contigouos allocations, | ||
| 378 | * must come from DMA area inside low memory | ||
| 379 | */ | ||
| 380 | dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); | ||
| 381 | |||
| 367 | arm_memblock_steal_permitted = false; | 382 | arm_memblock_steal_permitted = false; |
| 368 | memblock_allow_resize(); | 383 | memblock_allow_resize(); |
| 369 | memblock_dump_all(); | 384 | memblock_dump_all(); |
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 27f4a619b35d..93dc0c17cdcb 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h | |||
| @@ -67,5 +67,8 @@ extern u32 arm_dma_limit; | |||
| 67 | #define arm_dma_limit ((u32)~0) | 67 | #define arm_dma_limit ((u32)~0) |
| 68 | #endif | 68 | #endif |
| 69 | 69 | ||
| 70 | extern phys_addr_t arm_lowmem_limit; | ||
| 71 | |||
| 70 | void __init bootmem_init(void); | 72 | void __init bootmem_init(void); |
| 71 | void arm_mm_memblock_reserve(void); | 73 | void arm_mm_memblock_reserve(void); |
| 74 | void dma_contiguous_remap(void); | ||
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index aa78de8bfdd3..e5dad60b558b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c | |||
| @@ -288,6 +288,11 @@ static struct mem_type mem_types[] = { | |||
| 288 | PMD_SECT_UNCACHED | PMD_SECT_XN, | 288 | PMD_SECT_UNCACHED | PMD_SECT_XN, |
| 289 | .domain = DOMAIN_KERNEL, | 289 | .domain = DOMAIN_KERNEL, |
| 290 | }, | 290 | }, |
| 291 | [MT_MEMORY_DMA_READY] = { | ||
| 292 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, | ||
| 293 | .prot_l1 = PMD_TYPE_TABLE, | ||
| 294 | .domain = DOMAIN_KERNEL, | ||
| 295 | }, | ||
| 291 | }; | 296 | }; |
| 292 | 297 | ||
| 293 | const struct mem_type *get_mem_type(unsigned int type) | 298 | const struct mem_type *get_mem_type(unsigned int type) |
| @@ -429,6 +434,7 @@ static void __init build_mem_type_table(void) | |||
| 429 | if (arch_is_coherent() && cpu_is_xsc3()) { | 434 | if (arch_is_coherent() && cpu_is_xsc3()) { |
| 430 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 435 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
| 431 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 436 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
| 437 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
| 432 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 438 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
| 433 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 439 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
| 434 | } | 440 | } |
| @@ -460,6 +466,7 @@ static void __init build_mem_type_table(void) | |||
| 460 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; | 466 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; |
| 461 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 467 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
| 462 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 468 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
| 469 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
| 463 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 470 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
| 464 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 471 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
| 465 | } | 472 | } |
| @@ -512,6 +519,7 @@ static void __init build_mem_type_table(void) | |||
| 512 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; | 519 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; |
| 513 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; | 520 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; |
| 514 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; | 521 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; |
| 522 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; | ||
| 515 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; | 523 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; |
| 516 | mem_types[MT_ROM].prot_sect |= cp->pmd; | 524 | mem_types[MT_ROM].prot_sect |= cp->pmd; |
| 517 | 525 | ||
| @@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, | |||
| 596 | * L1 entries, whereas PGDs refer to a group of L1 entries making | 604 | * L1 entries, whereas PGDs refer to a group of L1 entries making |
| 597 | * up one logical pointer to an L2 table. | 605 | * up one logical pointer to an L2 table. |
| 598 | */ | 606 | */ |
| 599 | if (((addr | end | phys) & ~SECTION_MASK) == 0) { | 607 | if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { |
| 600 | pmd_t *p = pmd; | 608 | pmd_t *p = pmd; |
| 601 | 609 | ||
| 602 | #ifndef CONFIG_ARM_LPAE | 610 | #ifndef CONFIG_ARM_LPAE |
| @@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg) | |||
| 814 | } | 822 | } |
| 815 | early_param("vmalloc", early_vmalloc); | 823 | early_param("vmalloc", early_vmalloc); |
| 816 | 824 | ||
| 817 | static phys_addr_t lowmem_limit __initdata = 0; | 825 | phys_addr_t arm_lowmem_limit __initdata = 0; |
| 818 | 826 | ||
| 819 | void __init sanity_check_meminfo(void) | 827 | void __init sanity_check_meminfo(void) |
| 820 | { | 828 | { |
| @@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void) | |||
| 897 | bank->size = newsize; | 905 | bank->size = newsize; |
| 898 | } | 906 | } |
| 899 | #endif | 907 | #endif |
| 900 | if (!bank->highmem && bank->start + bank->size > lowmem_limit) | 908 | if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) |
| 901 | lowmem_limit = bank->start + bank->size; | 909 | arm_lowmem_limit = bank->start + bank->size; |
| 902 | 910 | ||
| 903 | j++; | 911 | j++; |
| 904 | } | 912 | } |
| @@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void) | |||
| 923 | } | 931 | } |
| 924 | #endif | 932 | #endif |
| 925 | meminfo.nr_banks = j; | 933 | meminfo.nr_banks = j; |
| 926 | high_memory = __va(lowmem_limit - 1) + 1; | 934 | high_memory = __va(arm_lowmem_limit - 1) + 1; |
| 927 | memblock_set_current_limit(lowmem_limit); | 935 | memblock_set_current_limit(arm_lowmem_limit); |
| 928 | } | 936 | } |
| 929 | 937 | ||
| 930 | static inline void prepare_page_table(void) | 938 | static inline void prepare_page_table(void) |
| @@ -949,8 +957,8 @@ static inline void prepare_page_table(void) | |||
| 949 | * Find the end of the first block of lowmem. | 957 | * Find the end of the first block of lowmem. |
| 950 | */ | 958 | */ |
| 951 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; | 959 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; |
| 952 | if (end >= lowmem_limit) | 960 | if (end >= arm_lowmem_limit) |
| 953 | end = lowmem_limit; | 961 | end = arm_lowmem_limit; |
| 954 | 962 | ||
| 955 | /* | 963 | /* |
| 956 | * Clear out all the kernel space mappings, except for the first | 964 | * Clear out all the kernel space mappings, except for the first |
| @@ -1093,8 +1101,8 @@ static void __init map_lowmem(void) | |||
| 1093 | phys_addr_t end = start + reg->size; | 1101 | phys_addr_t end = start + reg->size; |
| 1094 | struct map_desc map; | 1102 | struct map_desc map; |
| 1095 | 1103 | ||
| 1096 | if (end > lowmem_limit) | 1104 | if (end > arm_lowmem_limit) |
| 1097 | end = lowmem_limit; | 1105 | end = arm_lowmem_limit; |
| 1098 | if (start >= end) | 1106 | if (start >= end) |
| 1099 | break; | 1107 | break; |
| 1100 | 1108 | ||
| @@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc) | |||
| 1115 | { | 1123 | { |
| 1116 | void *zero_page; | 1124 | void *zero_page; |
| 1117 | 1125 | ||
| 1118 | memblock_set_current_limit(lowmem_limit); | 1126 | memblock_set_current_limit(arm_lowmem_limit); |
| 1119 | 1127 | ||
| 1120 | build_mem_type_table(); | 1128 | build_mem_type_table(); |
| 1121 | prepare_page_table(); | 1129 | prepare_page_table(); |
| 1122 | map_lowmem(); | 1130 | map_lowmem(); |
| 1131 | dma_contiguous_remap(); | ||
| 1123 | devicemaps_init(mdesc); | 1132 | devicemaps_init(mdesc); |
| 1124 | kmap_init(); | 1133 | kmap_init(); |
| 1125 | 1134 | ||
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index 162be662c088..bf312c354a21 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h | |||
| @@ -17,7 +17,7 @@ struct arm_vmregion { | |||
| 17 | struct list_head vm_list; | 17 | struct list_head vm_list; |
| 18 | unsigned long vm_start; | 18 | unsigned long vm_start; |
| 19 | unsigned long vm_end; | 19 | unsigned long vm_end; |
| 20 | struct page *vm_pages; | 20 | void *priv; |
| 21 | int vm_active; | 21 | int vm_active; |
| 22 | const void *caller; | 22 | const void *caller; |
| 23 | }; | 23 | }; |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 66cc380bebf0..81c3e8be789a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -32,6 +32,7 @@ config X86 | |||
| 32 | select ARCH_WANT_OPTIONAL_GPIOLIB | 32 | select ARCH_WANT_OPTIONAL_GPIOLIB |
| 33 | select ARCH_WANT_FRAME_POINTERS | 33 | select ARCH_WANT_FRAME_POINTERS |
| 34 | select HAVE_DMA_ATTRS | 34 | select HAVE_DMA_ATTRS |
| 35 | select HAVE_DMA_CONTIGUOUS if !SWIOTLB | ||
| 35 | select HAVE_KRETPROBES | 36 | select HAVE_KRETPROBES |
| 36 | select HAVE_OPTPROBES | 37 | select HAVE_OPTPROBES |
| 37 | select HAVE_FTRACE_MCOUNT_RECORD | 38 | select HAVE_FTRACE_MCOUNT_RECORD |
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..c09241659971 --- /dev/null +++ b/arch/x86/include/asm/dma-contiguous.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #ifndef ASMX86_DMA_CONTIGUOUS_H | ||
| 2 | #define ASMX86_DMA_CONTIGUOUS_H | ||
| 3 | |||
| 4 | #ifdef __KERNEL__ | ||
| 5 | |||
| 6 | #include <linux/types.h> | ||
| 7 | #include <asm-generic/dma-contiguous.h> | ||
| 8 | |||
| 9 | static inline void | ||
| 10 | dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } | ||
| 11 | |||
| 12 | #endif | ||
| 13 | #endif | ||
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 61c0bd25845a..f7b4c7903e7e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
| 14 | #include <asm/swiotlb.h> | 14 | #include <asm/swiotlb.h> |
| 15 | #include <asm-generic/dma-coherent.h> | 15 | #include <asm-generic/dma-coherent.h> |
| 16 | #include <linux/dma-contiguous.h> | ||
| 16 | 17 | ||
| 17 | #ifdef CONFIG_ISA | 18 | #ifdef CONFIG_ISA |
| 18 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) | 19 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) |
| @@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
| 62 | dma_addr_t *dma_addr, gfp_t flag, | 63 | dma_addr_t *dma_addr, gfp_t flag, |
| 63 | struct dma_attrs *attrs); | 64 | struct dma_attrs *attrs); |
| 64 | 65 | ||
| 66 | extern void dma_generic_free_coherent(struct device *dev, size_t size, | ||
| 67 | void *vaddr, dma_addr_t dma_addr, | ||
| 68 | struct dma_attrs *attrs); | ||
| 69 | |||
| 65 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ | 70 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ |
| 66 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); | 71 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); |
| 67 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); | 72 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3003250ac51d..62c9457ccd2f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -100,14 +100,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
| 100 | struct dma_attrs *attrs) | 100 | struct dma_attrs *attrs) |
| 101 | { | 101 | { |
| 102 | unsigned long dma_mask; | 102 | unsigned long dma_mask; |
| 103 | struct page *page; | 103 | struct page *page = NULL; |
| 104 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 104 | dma_addr_t addr; | 105 | dma_addr_t addr; |
| 105 | 106 | ||
| 106 | dma_mask = dma_alloc_coherent_mask(dev, flag); | 107 | dma_mask = dma_alloc_coherent_mask(dev, flag); |
| 107 | 108 | ||
| 108 | flag |= __GFP_ZERO; | 109 | flag |= __GFP_ZERO; |
| 109 | again: | 110 | again: |
| 110 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | 111 | if (!(flag & GFP_ATOMIC)) |
| 112 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); | ||
| 113 | if (!page) | ||
| 114 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
| 111 | if (!page) | 115 | if (!page) |
| 112 | return NULL; | 116 | return NULL; |
| 113 | 117 | ||
| @@ -127,6 +131,16 @@ again: | |||
| 127 | return page_address(page); | 131 | return page_address(page); |
| 128 | } | 132 | } |
| 129 | 133 | ||
| 134 | void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 135 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
| 136 | { | ||
| 137 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 138 | struct page *page = virt_to_page(vaddr); | ||
| 139 | |||
| 140 | if (!dma_release_from_contiguous(dev, page, count)) | ||
| 141 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 142 | } | ||
| 143 | |||
| 130 | /* | 144 | /* |
| 131 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel | 145 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel |
| 132 | * parameter documentation. | 146 | * parameter documentation. |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index f96050685b46..871be4a84c7d 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 74 | return nents; | 74 | return nents; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 78 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
| 79 | { | ||
| 80 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 81 | } | ||
| 82 | |||
| 83 | static void nommu_sync_single_for_device(struct device *dev, | 77 | static void nommu_sync_single_for_device(struct device *dev, |
| 84 | dma_addr_t addr, size_t size, | 78 | dma_addr_t addr, size_t size, |
| 85 | enum dma_data_direction dir) | 79 | enum dma_data_direction dir) |
| @@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev, | |||
| 97 | 91 | ||
| 98 | struct dma_map_ops nommu_dma_ops = { | 92 | struct dma_map_ops nommu_dma_ops = { |
| 99 | .alloc = dma_generic_alloc_coherent, | 93 | .alloc = dma_generic_alloc_coherent, |
| 100 | .free = nommu_free_coherent, | 94 | .free = dma_generic_free_coherent, |
| 101 | .map_sg = nommu_map_sg, | 95 | .map_sg = nommu_map_sg, |
| 102 | .map_page = nommu_map_page, | 96 | .map_page = nommu_map_page, |
| 103 | .sync_single_for_device = nommu_sync_single_for_device, | 97 | .sync_single_for_device = nommu_sync_single_for_device, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 366c688d619e..f2afee6a19c1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <asm/pci-direct.h> | 49 | #include <asm/pci-direct.h> |
| 50 | #include <linux/init_ohci1394_dma.h> | 50 | #include <linux/init_ohci1394_dma.h> |
| 51 | #include <linux/kvm_para.h> | 51 | #include <linux/kvm_para.h> |
| 52 | #include <linux/dma-contiguous.h> | ||
| 52 | 53 | ||
| 53 | #include <linux/errno.h> | 54 | #include <linux/errno.h> |
| 54 | #include <linux/kernel.h> | 55 | #include <linux/kernel.h> |
| @@ -925,6 +926,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 925 | } | 926 | } |
| 926 | #endif | 927 | #endif |
| 927 | memblock.current_limit = get_max_mapped(); | 928 | memblock.current_limit = get_max_mapped(); |
| 929 | dma_contiguous_reserve(0); | ||
| 928 | 930 | ||
| 929 | /* | 931 | /* |
| 930 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 932 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 9aa618acfe97..9b21469482ae 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig | |||
| @@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER | |||
| 192 | APIs extension; the file's descriptor can then be passed on to other | 192 | APIs extension; the file's descriptor can then be passed on to other |
| 193 | driver. | 193 | driver. |
| 194 | 194 | ||
| 195 | config CMA | ||
| 196 | bool "Contiguous Memory Allocator (EXPERIMENTAL)" | ||
| 197 | depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL | ||
| 198 | select MIGRATION | ||
| 199 | help | ||
| 200 | This enables the Contiguous Memory Allocator which allows drivers | ||
| 201 | to allocate big physically-contiguous blocks of memory for use with | ||
| 202 | hardware components that do not support I/O map nor scatter-gather. | ||
| 203 | |||
| 204 | For more information see <include/linux/dma-contiguous.h>. | ||
| 205 | If unsure, say "n". | ||
| 206 | |||
| 207 | if CMA | ||
| 208 | |||
| 209 | config CMA_DEBUG | ||
| 210 | bool "CMA debug messages (DEVELOPMENT)" | ||
| 211 | depends on DEBUG_KERNEL | ||
| 212 | help | ||
| 213 | Turns on debug messages in CMA. This produces KERN_DEBUG | ||
| 214 | messages for every CMA call as well as various messages while | ||
| 215 | processing calls such as dma_alloc_from_contiguous(). | ||
| 216 | This option does not affect warning and error messages. | ||
| 217 | |||
| 218 | comment "Default contiguous memory area size:" | ||
| 219 | |||
| 220 | config CMA_SIZE_MBYTES | ||
| 221 | int "Size in Mega Bytes" | ||
| 222 | depends on !CMA_SIZE_SEL_PERCENTAGE | ||
| 223 | default 16 | ||
| 224 | help | ||
| 225 | Defines the size (in MiB) of the default memory area for Contiguous | ||
| 226 | Memory Allocator. | ||
| 227 | |||
| 228 | config CMA_SIZE_PERCENTAGE | ||
| 229 | int "Percentage of total memory" | ||
| 230 | depends on !CMA_SIZE_SEL_MBYTES | ||
| 231 | default 10 | ||
| 232 | help | ||
| 233 | Defines the size of the default memory area for Contiguous Memory | ||
| 234 | Allocator as a percentage of the total memory in the system. | ||
| 235 | |||
| 236 | choice | ||
| 237 | prompt "Selected region size" | ||
| 238 | default CMA_SIZE_SEL_ABSOLUTE | ||
| 239 | |||
| 240 | config CMA_SIZE_SEL_MBYTES | ||
| 241 | bool "Use mega bytes value only" | ||
| 242 | |||
| 243 | config CMA_SIZE_SEL_PERCENTAGE | ||
| 244 | bool "Use percentage value only" | ||
| 245 | |||
| 246 | config CMA_SIZE_SEL_MIN | ||
| 247 | bool "Use lower value (minimum)" | ||
| 248 | |||
| 249 | config CMA_SIZE_SEL_MAX | ||
| 250 | bool "Use higher value (maximum)" | ||
| 251 | |||
| 252 | endchoice | ||
| 253 | |||
| 254 | config CMA_ALIGNMENT | ||
| 255 | int "Maximum PAGE_SIZE order of alignment for contiguous buffers" | ||
| 256 | range 4 9 | ||
| 257 | default 8 | ||
| 258 | help | ||
| 259 | DMA mapping framework by default aligns all buffers to the smallest | ||
| 260 | PAGE_SIZE order which is greater than or equal to the requested buffer | ||
| 261 | size. This works well for buffers up to a few hundreds kilobytes, but | ||
| 262 | for larger buffers it just a memory waste. With this parameter you can | ||
| 263 | specify the maximum PAGE_SIZE order for contiguous buffers. Larger | ||
| 264 | buffers will be aligned only to this specified order. The order is | ||
| 265 | expressed as a power of two multiplied by the PAGE_SIZE. | ||
| 266 | |||
| 267 | For example, if your system defaults to 4KiB pages, the order value | ||
| 268 | of 8 means that the buffers will be aligned up to 1MiB only. | ||
| 269 | |||
| 270 | If unsure, leave the default value "8". | ||
| 271 | |||
| 272 | config CMA_AREAS | ||
| 273 | int "Maximum count of the CMA device-private areas" | ||
| 274 | default 7 | ||
| 275 | help | ||
| 276 | CMA allows to create CMA areas for particular devices. This parameter | ||
| 277 | sets the maximum number of such device private CMA areas in the | ||
| 278 | system. | ||
| 279 | |||
| 280 | If unsure, leave the default value "7". | ||
| 281 | |||
| 282 | endif | ||
| 283 | |||
| 195 | endmenu | 284 | endmenu |
diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b6d1b9c4200c..5aa2d703d19f 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile | |||
| @@ -6,6 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ | |||
| 6 | attribute_container.o transport_class.o \ | 6 | attribute_container.o transport_class.o \ |
| 7 | topology.o | 7 | topology.o |
| 8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o | 8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o |
| 9 | obj-$(CONFIG_CMA) += dma-contiguous.o | ||
| 9 | obj-y += power/ | 10 | obj-y += power/ |
| 10 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o | 11 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o |
| 11 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | 12 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o |
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index bb0025c510b3..1b85949e3d2f 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | struct dma_coherent_mem { | 10 | struct dma_coherent_mem { |
| 11 | void *virt_base; | 11 | void *virt_base; |
| 12 | dma_addr_t device_base; | 12 | dma_addr_t device_base; |
| 13 | phys_addr_t pfn_base; | ||
| 13 | int size; | 14 | int size; |
| 14 | int flags; | 15 | int flags; |
| 15 | unsigned long *bitmap; | 16 | unsigned long *bitmap; |
| @@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | |||
| 44 | 45 | ||
| 45 | dev->dma_mem->virt_base = mem_base; | 46 | dev->dma_mem->virt_base = mem_base; |
| 46 | dev->dma_mem->device_base = device_addr; | 47 | dev->dma_mem->device_base = device_addr; |
| 48 | dev->dma_mem->pfn_base = PFN_DOWN(bus_addr); | ||
| 47 | dev->dma_mem->size = pages; | 49 | dev->dma_mem->size = pages; |
| 48 | dev->dma_mem->flags = flags; | 50 | dev->dma_mem->flags = flags; |
| 49 | 51 | ||
| @@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) | |||
| 176 | return 0; | 178 | return 0; |
| 177 | } | 179 | } |
| 178 | EXPORT_SYMBOL(dma_release_from_coherent); | 180 | EXPORT_SYMBOL(dma_release_from_coherent); |
| 181 | |||
| 182 | /** | ||
| 183 | * dma_mmap_from_coherent() - try to mmap the memory allocated from | ||
| 184 | * per-device coherent memory pool to userspace | ||
| 185 | * @dev: device from which the memory was allocated | ||
| 186 | * @vma: vm_area for the userspace memory | ||
| 187 | * @vaddr: cpu address returned by dma_alloc_from_coherent | ||
| 188 | * @size: size of the memory buffer allocated by dma_alloc_from_coherent | ||
| 189 | * | ||
| 190 | * This checks whether the memory was allocated from the per-device | ||
| 191 | * coherent memory pool and if so, maps that memory to the provided vma. | ||
| 192 | * | ||
| 193 | * Returns 1 if we correctly mapped the memory, or 0 if | ||
| 194 | * dma_release_coherent() should proceed with mapping memory from | ||
| 195 | * generic pools. | ||
| 196 | */ | ||
| 197 | int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, | ||
| 198 | void *vaddr, size_t size, int *ret) | ||
| 199 | { | ||
| 200 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
| 201 | |||
| 202 | if (mem && vaddr >= mem->virt_base && vaddr + size <= | ||
| 203 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
| 204 | unsigned long off = vma->vm_pgoff; | ||
| 205 | int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
| 206 | int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
| 207 | int count = size >> PAGE_SHIFT; | ||
| 208 | |||
| 209 | *ret = -ENXIO; | ||
| 210 | if (off < count && user_count <= count - off) { | ||
| 211 | unsigned pfn = mem->pfn_base + start + off; | ||
| 212 | *ret = remap_pfn_range(vma, vma->vm_start, pfn, | ||
| 213 | user_count << PAGE_SHIFT, | ||
| 214 | vma->vm_page_prot); | ||
| 215 | } | ||
| 216 | return 1; | ||
| 217 | } | ||
| 218 | return 0; | ||
| 219 | } | ||
| 220 | EXPORT_SYMBOL(dma_mmap_from_coherent); | ||
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c new file mode 100644 index 000000000000..78efb0306a44 --- /dev/null +++ b/drivers/base/dma-contiguous.c | |||
| @@ -0,0 +1,401 @@ | |||
| 1 | /* | ||
| 2 | * Contiguous Memory Allocator for DMA mapping framework | ||
| 3 | * Copyright (c) 2010-2011 by Samsung Electronics. | ||
| 4 | * Written by: | ||
| 5 | * Marek Szyprowski <m.szyprowski@samsung.com> | ||
| 6 | * Michal Nazarewicz <mina86@mina86.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License as | ||
| 10 | * published by the Free Software Foundation; either version 2 of the | ||
| 11 | * License or (at your optional) any later version of the license. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #define pr_fmt(fmt) "cma: " fmt | ||
| 15 | |||
| 16 | #ifdef CONFIG_CMA_DEBUG | ||
| 17 | #ifndef DEBUG | ||
| 18 | # define DEBUG | ||
| 19 | #endif | ||
| 20 | #endif | ||
| 21 | |||
| 22 | #include <asm/page.h> | ||
| 23 | #include <asm/dma-contiguous.h> | ||
| 24 | |||
| 25 | #include <linux/memblock.h> | ||
| 26 | #include <linux/err.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/mutex.h> | ||
| 29 | #include <linux/page-isolation.h> | ||
| 30 | #include <linux/slab.h> | ||
| 31 | #include <linux/swap.h> | ||
| 32 | #include <linux/mm_types.h> | ||
| 33 | #include <linux/dma-contiguous.h> | ||
| 34 | |||
| 35 | #ifndef SZ_1M | ||
| 36 | #define SZ_1M (1 << 20) | ||
| 37 | #endif | ||
| 38 | |||
| 39 | struct cma { | ||
| 40 | unsigned long base_pfn; | ||
| 41 | unsigned long count; | ||
| 42 | unsigned long *bitmap; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct cma *dma_contiguous_default_area; | ||
| 46 | |||
| 47 | #ifdef CONFIG_CMA_SIZE_MBYTES | ||
| 48 | #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES | ||
| 49 | #else | ||
| 50 | #define CMA_SIZE_MBYTES 0 | ||
| 51 | #endif | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Default global CMA area size can be defined in kernel's .config. | ||
| 55 | * This is usefull mainly for distro maintainers to create a kernel | ||
| 56 | * that works correctly for most supported systems. | ||
| 57 | * The size can be set in bytes or as a percentage of the total memory | ||
| 58 | * in the system. | ||
| 59 | * | ||
| 60 | * Users, who want to set the size of global CMA area for their system | ||
| 61 | * should use cma= kernel parameter. | ||
| 62 | */ | ||
| 63 | static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; | ||
| 64 | static long size_cmdline = -1; | ||
| 65 | |||
| 66 | static int __init early_cma(char *p) | ||
| 67 | { | ||
| 68 | pr_debug("%s(%s)\n", __func__, p); | ||
| 69 | size_cmdline = memparse(p, &p); | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | early_param("cma", early_cma); | ||
| 73 | |||
| 74 | #ifdef CONFIG_CMA_SIZE_PERCENTAGE | ||
| 75 | |||
| 76 | static unsigned long __init __maybe_unused cma_early_percent_memory(void) | ||
| 77 | { | ||
| 78 | struct memblock_region *reg; | ||
| 79 | unsigned long total_pages = 0; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * We cannot use memblock_phys_mem_size() here, because | ||
| 83 | * memblock_analyze() has not been called yet. | ||
| 84 | */ | ||
| 85 | for_each_memblock(memory, reg) | ||
| 86 | total_pages += memblock_region_memory_end_pfn(reg) - | ||
| 87 | memblock_region_memory_base_pfn(reg); | ||
| 88 | |||
| 89 | return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; | ||
| 90 | } | ||
| 91 | |||
| 92 | #else | ||
| 93 | |||
| 94 | static inline __maybe_unused unsigned long cma_early_percent_memory(void) | ||
| 95 | { | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | #endif | ||
| 100 | |||
| 101 | /** | ||
| 102 | * dma_contiguous_reserve() - reserve area for contiguous memory handling | ||
| 103 | * @limit: End address of the reserved memory (optional, 0 for any). | ||
| 104 | * | ||
| 105 | * This function reserves memory from early allocator. It should be | ||
| 106 | * called by arch specific code once the early allocator (memblock or bootmem) | ||
| 107 | * has been activated and all other subsystems have already allocated/reserved | ||
| 108 | * memory. | ||
| 109 | */ | ||
| 110 | void __init dma_contiguous_reserve(phys_addr_t limit) | ||
| 111 | { | ||
| 112 | unsigned long selected_size = 0; | ||
| 113 | |||
| 114 | pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); | ||
| 115 | |||
| 116 | if (size_cmdline != -1) { | ||
| 117 | selected_size = size_cmdline; | ||
| 118 | } else { | ||
| 119 | #ifdef CONFIG_CMA_SIZE_SEL_MBYTES | ||
| 120 | selected_size = size_bytes; | ||
| 121 | #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) | ||
| 122 | selected_size = cma_early_percent_memory(); | ||
| 123 | #elif defined(CONFIG_CMA_SIZE_SEL_MIN) | ||
| 124 | selected_size = min(size_bytes, cma_early_percent_memory()); | ||
| 125 | #elif defined(CONFIG_CMA_SIZE_SEL_MAX) | ||
| 126 | selected_size = max(size_bytes, cma_early_percent_memory()); | ||
| 127 | #endif | ||
| 128 | } | ||
| 129 | |||
| 130 | if (selected_size) { | ||
| 131 | pr_debug("%s: reserving %ld MiB for global area\n", __func__, | ||
| 132 | selected_size / SZ_1M); | ||
| 133 | |||
| 134 | dma_declare_contiguous(NULL, selected_size, 0, limit); | ||
| 135 | } | ||
| 136 | }; | ||
| 137 | |||
| 138 | static DEFINE_MUTEX(cma_mutex); | ||
| 139 | |||
| 140 | static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) | ||
| 141 | { | ||
| 142 | unsigned long pfn = base_pfn; | ||
| 143 | unsigned i = count >> pageblock_order; | ||
| 144 | struct zone *zone; | ||
| 145 | |||
| 146 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
| 147 | zone = page_zone(pfn_to_page(pfn)); | ||
| 148 | |||
| 149 | do { | ||
| 150 | unsigned j; | ||
| 151 | base_pfn = pfn; | ||
| 152 | for (j = pageblock_nr_pages; j; --j, pfn++) { | ||
| 153 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
| 154 | if (page_zone(pfn_to_page(pfn)) != zone) | ||
| 155 | return -EINVAL; | ||
| 156 | } | ||
| 157 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); | ||
| 158 | } while (--i); | ||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | static __init struct cma *cma_create_area(unsigned long base_pfn, | ||
| 163 | unsigned long count) | ||
| 164 | { | ||
| 165 | int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); | ||
| 166 | struct cma *cma; | ||
| 167 | int ret = -ENOMEM; | ||
| 168 | |||
| 169 | pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); | ||
| 170 | |||
| 171 | cma = kmalloc(sizeof *cma, GFP_KERNEL); | ||
| 172 | if (!cma) | ||
| 173 | return ERR_PTR(-ENOMEM); | ||
| 174 | |||
| 175 | cma->base_pfn = base_pfn; | ||
| 176 | cma->count = count; | ||
| 177 | cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
| 178 | |||
| 179 | if (!cma->bitmap) | ||
| 180 | goto no_mem; | ||
| 181 | |||
| 182 | ret = cma_activate_area(base_pfn, count); | ||
| 183 | if (ret) | ||
| 184 | goto error; | ||
| 185 | |||
| 186 | pr_debug("%s: returned %p\n", __func__, (void *)cma); | ||
| 187 | return cma; | ||
| 188 | |||
| 189 | error: | ||
| 190 | kfree(cma->bitmap); | ||
| 191 | no_mem: | ||
| 192 | kfree(cma); | ||
| 193 | return ERR_PTR(ret); | ||
| 194 | } | ||
| 195 | |||
| 196 | static struct cma_reserved { | ||
| 197 | phys_addr_t start; | ||
| 198 | unsigned long size; | ||
| 199 | struct device *dev; | ||
| 200 | } cma_reserved[MAX_CMA_AREAS] __initdata; | ||
| 201 | static unsigned cma_reserved_count __initdata; | ||
| 202 | |||
| 203 | static int __init cma_init_reserved_areas(void) | ||
| 204 | { | ||
| 205 | struct cma_reserved *r = cma_reserved; | ||
| 206 | unsigned i = cma_reserved_count; | ||
| 207 | |||
| 208 | pr_debug("%s()\n", __func__); | ||
| 209 | |||
| 210 | for (; i; --i, ++r) { | ||
| 211 | struct cma *cma; | ||
| 212 | cma = cma_create_area(PFN_DOWN(r->start), | ||
| 213 | r->size >> PAGE_SHIFT); | ||
| 214 | if (!IS_ERR(cma)) | ||
| 215 | dev_set_cma_area(r->dev, cma); | ||
| 216 | } | ||
| 217 | return 0; | ||
| 218 | } | ||
| 219 | core_initcall(cma_init_reserved_areas); | ||
| 220 | |||
| 221 | /** | ||
| 222 | * dma_declare_contiguous() - reserve area for contiguous memory handling | ||
| 223 | * for particular device | ||
| 224 | * @dev: Pointer to device structure. | ||
| 225 | * @size: Size of the reserved memory. | ||
| 226 | * @base: Start address of the reserved memory (optional, 0 for any). | ||
| 227 | * @limit: End address of the reserved memory (optional, 0 for any). | ||
| 228 | * | ||
| 229 | * This function reserves memory for specified device. It should be | ||
| 230 | * called by board specific code when early allocator (memblock or bootmem) | ||
| 231 | * is still activate. | ||
| 232 | */ | ||
| 233 | int __init dma_declare_contiguous(struct device *dev, unsigned long size, | ||
| 234 | phys_addr_t base, phys_addr_t limit) | ||
| 235 | { | ||
| 236 | struct cma_reserved *r = &cma_reserved[cma_reserved_count]; | ||
| 237 | unsigned long alignment; | ||
| 238 | |||
| 239 | pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, | ||
| 240 | (unsigned long)size, (unsigned long)base, | ||
| 241 | (unsigned long)limit); | ||
| 242 | |||
| 243 | /* Sanity checks */ | ||
| 244 | if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { | ||
| 245 | pr_err("Not enough slots for CMA reserved regions!\n"); | ||
| 246 | return -ENOSPC; | ||
| 247 | } | ||
| 248 | |||
| 249 | if (!size) | ||
| 250 | return -EINVAL; | ||
| 251 | |||
| 252 | /* Sanitise input arguments */ | ||
| 253 | alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order); | ||
| 254 | base = ALIGN(base, alignment); | ||
| 255 | size = ALIGN(size, alignment); | ||
| 256 | limit &= ~(alignment - 1); | ||
| 257 | |||
| 258 | /* Reserve memory */ | ||
| 259 | if (base) { | ||
| 260 | if (memblock_is_region_reserved(base, size) || | ||
| 261 | memblock_reserve(base, size) < 0) { | ||
| 262 | base = -EBUSY; | ||
| 263 | goto err; | ||
| 264 | } | ||
| 265 | } else { | ||
| 266 | /* | ||
| 267 | * Use __memblock_alloc_base() since | ||
| 268 | * memblock_alloc_base() panic()s. | ||
| 269 | */ | ||
| 270 | phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); | ||
| 271 | if (!addr) { | ||
| 272 | base = -ENOMEM; | ||
| 273 | goto err; | ||
| 274 | } else if (addr + size > ~(unsigned long)0) { | ||
| 275 | memblock_free(addr, size); | ||
| 276 | base = -EINVAL; | ||
| 277 | goto err; | ||
| 278 | } else { | ||
| 279 | base = addr; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Each reserved area must be initialised later, when more kernel | ||
| 285 | * subsystems (like slab allocator) are available. | ||
| 286 | */ | ||
| 287 | r->start = base; | ||
| 288 | r->size = size; | ||
| 289 | r->dev = dev; | ||
| 290 | cma_reserved_count++; | ||
| 291 | pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, | ||
| 292 | (unsigned long)base); | ||
| 293 | |||
| 294 | /* Architecture specific contiguous memory fixup. */ | ||
| 295 | dma_contiguous_early_fixup(base, size); | ||
| 296 | return 0; | ||
| 297 | err: | ||
| 298 | pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); | ||
| 299 | return base; | ||
| 300 | } | ||
| 301 | |||
| 302 | /** | ||
| 303 | * dma_alloc_from_contiguous() - allocate pages from contiguous area | ||
| 304 | * @dev: Pointer to device for which the allocation is performed. | ||
| 305 | * @count: Requested number of pages. | ||
| 306 | * @align: Requested alignment of pages (in PAGE_SIZE order). | ||
| 307 | * | ||
| 308 | * This function allocates memory buffer for specified device. It uses | ||
| 309 | * device specific contiguous memory area if available or the default | ||
| 310 | * global one. Requires architecture specific get_dev_cma_area() helper | ||
| 311 | * function. | ||
| 312 | */ | ||
| 313 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
| 314 | unsigned int align) | ||
| 315 | { | ||
| 316 | unsigned long mask, pfn, pageno, start = 0; | ||
| 317 | struct cma *cma = dev_get_cma_area(dev); | ||
| 318 | int ret; | ||
| 319 | |||
| 320 | if (!cma || !cma->count) | ||
| 321 | return NULL; | ||
| 322 | |||
| 323 | if (align > CONFIG_CMA_ALIGNMENT) | ||
| 324 | align = CONFIG_CMA_ALIGNMENT; | ||
| 325 | |||
| 326 | pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, | ||
| 327 | count, align); | ||
| 328 | |||
| 329 | if (!count) | ||
| 330 | return NULL; | ||
| 331 | |||
| 332 | mask = (1 << align) - 1; | ||
| 333 | |||
| 334 | mutex_lock(&cma_mutex); | ||
| 335 | |||
| 336 | for (;;) { | ||
| 337 | pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, | ||
| 338 | start, count, mask); | ||
| 339 | if (pageno >= cma->count) { | ||
| 340 | ret = -ENOMEM; | ||
| 341 | goto error; | ||
| 342 | } | ||
| 343 | |||
| 344 | pfn = cma->base_pfn + pageno; | ||
| 345 | ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); | ||
| 346 | if (ret == 0) { | ||
| 347 | bitmap_set(cma->bitmap, pageno, count); | ||
| 348 | break; | ||
| 349 | } else if (ret != -EBUSY) { | ||
| 350 | goto error; | ||
| 351 | } | ||
| 352 | pr_debug("%s(): memory range at %p is busy, retrying\n", | ||
| 353 | __func__, pfn_to_page(pfn)); | ||
| 354 | /* try again with a bit different memory target */ | ||
| 355 | start = pageno + mask + 1; | ||
| 356 | } | ||
| 357 | |||
| 358 | mutex_unlock(&cma_mutex); | ||
| 359 | |||
| 360 | pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn)); | ||
| 361 | return pfn_to_page(pfn); | ||
| 362 | error: | ||
| 363 | mutex_unlock(&cma_mutex); | ||
| 364 | return NULL; | ||
| 365 | } | ||
| 366 | |||
| 367 | /** | ||
| 368 | * dma_release_from_contiguous() - release allocated pages | ||
| 369 | * @dev: Pointer to device for which the pages were allocated. | ||
| 370 | * @pages: Allocated pages. | ||
| 371 | * @count: Number of allocated pages. | ||
| 372 | * | ||
| 373 | * This function releases memory allocated by dma_alloc_from_contiguous(). | ||
| 374 | * It returns false when provided pages do not belong to contiguous area and | ||
| 375 | * true otherwise. | ||
| 376 | */ | ||
| 377 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
| 378 | int count) | ||
| 379 | { | ||
| 380 | struct cma *cma = dev_get_cma_area(dev); | ||
| 381 | unsigned long pfn; | ||
| 382 | |||
| 383 | if (!cma || !pages) | ||
| 384 | return false; | ||
| 385 | |||
| 386 | pr_debug("%s(page %p)\n", __func__, (void *)pages); | ||
| 387 | |||
| 388 | pfn = page_to_pfn(pages); | ||
| 389 | |||
| 390 | if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) | ||
| 391 | return false; | ||
| 392 | |||
| 393 | VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); | ||
| 394 | |||
| 395 | mutex_lock(&cma_mutex); | ||
| 396 | bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); | ||
| 397 | free_contig_range(pfn, count); | ||
| 398 | mutex_unlock(&cma_mutex); | ||
| 399 | |||
| 400 | return true; | ||
| 401 | } | ||
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffaa0242..abfb2682de7f 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h | |||
| @@ -3,13 +3,15 @@ | |||
| 3 | 3 | ||
| 4 | #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT | 4 | #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT |
| 5 | /* | 5 | /* |
| 6 | * These two functions are only for dma allocator. | 6 | * These three functions are only for dma allocator. |
| 7 | * Don't use them in device drivers. | 7 | * Don't use them in device drivers. |
| 8 | */ | 8 | */ |
| 9 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, | 9 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, |
| 10 | dma_addr_t *dma_handle, void **ret); | 10 | dma_addr_t *dma_handle, void **ret); |
| 11 | int dma_release_from_coherent(struct device *dev, int order, void *vaddr); | 11 | int dma_release_from_coherent(struct device *dev, int order, void *vaddr); |
| 12 | 12 | ||
| 13 | int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, | ||
| 14 | void *cpu_addr, size_t size, int *ret); | ||
| 13 | /* | 15 | /* |
| 14 | * Standard interface | 16 | * Standard interface |
| 15 | */ | 17 | */ |
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 000000000000..c544356b374b --- /dev/null +++ b/include/asm-generic/dma-contiguous.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | #ifndef ASM_DMA_CONTIGUOUS_H | ||
| 2 | #define ASM_DMA_CONTIGUOUS_H | ||
| 3 | |||
| 4 | #ifdef __KERNEL__ | ||
| 5 | #ifdef CONFIG_CMA | ||
| 6 | |||
| 7 | #include <linux/device.h> | ||
| 8 | #include <linux/dma-contiguous.h> | ||
| 9 | |||
| 10 | static inline struct cma *dev_get_cma_area(struct device *dev) | ||
| 11 | { | ||
| 12 | if (dev && dev->cma_area) | ||
| 13 | return dev->cma_area; | ||
| 14 | return dma_contiguous_default_area; | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline void dev_set_cma_area(struct device *dev, struct cma *cma) | ||
| 18 | { | ||
| 19 | if (dev) | ||
| 20 | dev->cma_area = cma; | ||
| 21 | if (!dev || !dma_contiguous_default_area) | ||
| 22 | dma_contiguous_default_area = cma; | ||
| 23 | } | ||
| 24 | |||
| 25 | #endif | ||
| 26 | #endif | ||
| 27 | |||
| 28 | #endif | ||
diff --git a/include/linux/device.h b/include/linux/device.h index e04f5776f6d0..161d96241b1b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h | |||
| @@ -667,6 +667,10 @@ struct device { | |||
| 667 | 667 | ||
| 668 | struct dma_coherent_mem *dma_mem; /* internal for coherent mem | 668 | struct dma_coherent_mem *dma_mem; /* internal for coherent mem |
| 669 | override */ | 669 | override */ |
| 670 | #ifdef CONFIG_CMA | ||
| 671 | struct cma *cma_area; /* contiguous memory area for dma | ||
| 672 | allocations */ | ||
| 673 | #endif | ||
| 670 | /* arch specific additions */ | 674 | /* arch specific additions */ |
| 671 | struct dev_archdata archdata; | 675 | struct dev_archdata archdata; |
| 672 | 676 | ||
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h new file mode 100644 index 000000000000..2f303e4b7ed3 --- /dev/null +++ b/include/linux/dma-contiguous.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | #ifndef __LINUX_CMA_H | ||
| 2 | #define __LINUX_CMA_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Contiguous Memory Allocator for DMA mapping framework | ||
| 6 | * Copyright (c) 2010-2011 by Samsung Electronics. | ||
| 7 | * Written by: | ||
| 8 | * Marek Szyprowski <m.szyprowski@samsung.com> | ||
| 9 | * Michal Nazarewicz <mina86@mina86.com> | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public License as | ||
| 13 | * published by the Free Software Foundation; either version 2 of the | ||
| 14 | * License or (at your optional) any later version of the license. | ||
| 15 | */ | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Contiguous Memory Allocator | ||
| 19 | * | ||
| 20 | * The Contiguous Memory Allocator (CMA) makes it possible to | ||
| 21 | * allocate big contiguous chunks of memory after the system has | ||
| 22 | * booted. | ||
| 23 | * | ||
| 24 | * Why is it needed? | ||
| 25 | * | ||
| 26 | * Various devices on embedded systems have no scatter-getter and/or | ||
| 27 | * IO map support and require contiguous blocks of memory to | ||
| 28 | * operate. They include devices such as cameras, hardware video | ||
| 29 | * coders, etc. | ||
| 30 | * | ||
| 31 | * Such devices often require big memory buffers (a full HD frame | ||
| 32 | * is, for instance, more then 2 mega pixels large, i.e. more than 6 | ||
| 33 | * MB of memory), which makes mechanisms such as kmalloc() or | ||
| 34 | * alloc_page() ineffective. | ||
| 35 | * | ||
| 36 | * At the same time, a solution where a big memory region is | ||
| 37 | * reserved for a device is suboptimal since often more memory is | ||
| 38 | * reserved then strictly required and, moreover, the memory is | ||
| 39 | * inaccessible to page system even if device drivers don't use it. | ||
| 40 | * | ||
| 41 | * CMA tries to solve this issue by operating on memory regions | ||
| 42 | * where only movable pages can be allocated from. This way, kernel | ||
| 43 | * can use the memory for pagecache and when device driver requests | ||
| 44 | * it, allocated pages can be migrated. | ||
| 45 | * | ||
| 46 | * Driver usage | ||
| 47 | * | ||
| 48 | * CMA should not be used by the device drivers directly. It is | ||
| 49 | * only a helper framework for dma-mapping subsystem. | ||
| 50 | * | ||
| 51 | * For more information, see kernel-docs in drivers/base/dma-contiguous.c | ||
| 52 | */ | ||
| 53 | |||
| 54 | #ifdef __KERNEL__ | ||
| 55 | |||
| 56 | struct cma; | ||
| 57 | struct page; | ||
| 58 | struct device; | ||
| 59 | |||
| 60 | #ifdef CONFIG_CMA | ||
| 61 | |||
| 62 | /* | ||
| 63 | * There is always at least global CMA area and a few optional device | ||
| 64 | * private areas configured in kernel .config. | ||
| 65 | */ | ||
| 66 | #define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) | ||
| 67 | |||
| 68 | extern struct cma *dma_contiguous_default_area; | ||
| 69 | |||
| 70 | void dma_contiguous_reserve(phys_addr_t addr_limit); | ||
| 71 | int dma_declare_contiguous(struct device *dev, unsigned long size, | ||
| 72 | phys_addr_t base, phys_addr_t limit); | ||
| 73 | |||
| 74 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
| 75 | unsigned int order); | ||
| 76 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
| 77 | int count); | ||
| 78 | |||
| 79 | #else | ||
| 80 | |||
| 81 | #define MAX_CMA_AREAS (0) | ||
| 82 | |||
| 83 | static inline void dma_contiguous_reserve(phys_addr_t limit) { } | ||
| 84 | |||
| 85 | static inline | ||
| 86 | int dma_declare_contiguous(struct device *dev, unsigned long size, | ||
| 87 | phys_addr_t base, phys_addr_t limit) | ||
| 88 | { | ||
| 89 | return -ENOSYS; | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline | ||
| 93 | struct page *dma_alloc_from_contiguous(struct device *dev, int count, | ||
| 94 | unsigned int order) | ||
| 95 | { | ||
| 96 | return NULL; | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline | ||
| 100 | bool dma_release_from_contiguous(struct device *dev, struct page *pages, | ||
| 101 | int count) | ||
| 102 | { | ||
| 103 | return false; | ||
| 104 | } | ||
| 105 | |||
| 106 | #endif | ||
| 107 | |||
| 108 | #endif | ||
| 109 | |||
| 110 | #endif | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 581e74b7df95..1e49be49d324 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -391,4 +391,16 @@ static inline bool pm_suspended_storage(void) | |||
| 391 | } | 391 | } |
| 392 | #endif /* CONFIG_PM_SLEEP */ | 392 | #endif /* CONFIG_PM_SLEEP */ |
| 393 | 393 | ||
| 394 | #ifdef CONFIG_CMA | ||
| 395 | |||
| 396 | /* The below functions must be run on a range from a single zone. */ | ||
| 397 | extern int alloc_contig_range(unsigned long start, unsigned long end, | ||
| 398 | unsigned migratetype); | ||
| 399 | extern void free_contig_range(unsigned long pfn, unsigned nr_pages); | ||
| 400 | |||
| 401 | /* CMA stuff */ | ||
| 402 | extern void init_cma_reserved_pageblock(struct page *page); | ||
| 403 | |||
| 404 | #endif | ||
| 405 | |||
| 394 | #endif /* __LINUX_GFP_H */ | 406 | #endif /* __LINUX_GFP_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41aa49b74821..4871e31ae277 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -35,13 +35,39 @@ | |||
| 35 | */ | 35 | */ |
| 36 | #define PAGE_ALLOC_COSTLY_ORDER 3 | 36 | #define PAGE_ALLOC_COSTLY_ORDER 3 |
| 37 | 37 | ||
| 38 | #define MIGRATE_UNMOVABLE 0 | 38 | enum { |
| 39 | #define MIGRATE_RECLAIMABLE 1 | 39 | MIGRATE_UNMOVABLE, |
| 40 | #define MIGRATE_MOVABLE 2 | 40 | MIGRATE_RECLAIMABLE, |
| 41 | #define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ | 41 | MIGRATE_MOVABLE, |
| 42 | #define MIGRATE_RESERVE 3 | 42 | MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ |
| 43 | #define MIGRATE_ISOLATE 4 /* can't allocate from here */ | 43 | MIGRATE_RESERVE = MIGRATE_PCPTYPES, |
| 44 | #define MIGRATE_TYPES 5 | 44 | #ifdef CONFIG_CMA |
| 45 | /* | ||
| 46 | * MIGRATE_CMA migration type is designed to mimic the way | ||
| 47 | * ZONE_MOVABLE works. Only movable pages can be allocated | ||
| 48 | * from MIGRATE_CMA pageblocks and page allocator never | ||
| 49 | * implicitly change migration type of MIGRATE_CMA pageblock. | ||
| 50 | * | ||
| 51 | * The way to use it is to change migratetype of a range of | ||
| 52 | * pageblocks to MIGRATE_CMA which can be done by | ||
| 53 | * __free_pageblock_cma() function. What is important though | ||
| 54 | * is that a range of pageblocks must be aligned to | ||
| 55 | * MAX_ORDER_NR_PAGES should biggest page be bigger then | ||
| 56 | * a single pageblock. | ||
| 57 | */ | ||
| 58 | MIGRATE_CMA, | ||
| 59 | #endif | ||
| 60 | MIGRATE_ISOLATE, /* can't allocate from here */ | ||
| 61 | MIGRATE_TYPES | ||
| 62 | }; | ||
| 63 | |||
| 64 | #ifdef CONFIG_CMA | ||
| 65 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) | ||
| 66 | # define cma_wmark_pages(zone) zone->min_cma_pages | ||
| 67 | #else | ||
| 68 | # define is_migrate_cma(migratetype) false | ||
| 69 | # define cma_wmark_pages(zone) 0 | ||
| 70 | #endif | ||
| 45 | 71 | ||
| 46 | #define for_each_migratetype_order(order, type) \ | 72 | #define for_each_migratetype_order(order, type) \ |
| 47 | for (order = 0; order < MAX_ORDER; order++) \ | 73 | for (order = 0; order < MAX_ORDER; order++) \ |
| @@ -347,6 +373,13 @@ struct zone { | |||
| 347 | /* see spanned/present_pages for more description */ | 373 | /* see spanned/present_pages for more description */ |
| 348 | seqlock_t span_seqlock; | 374 | seqlock_t span_seqlock; |
| 349 | #endif | 375 | #endif |
| 376 | #ifdef CONFIG_CMA | ||
| 377 | /* | ||
| 378 | * CMA needs to increase watermark levels during the allocation | ||
| 379 | * process to make sure that the system is not starved. | ||
| 380 | */ | ||
| 381 | unsigned long min_cma_pages; | ||
| 382 | #endif | ||
| 350 | struct free_area free_area[MAX_ORDER]; | 383 | struct free_area free_area[MAX_ORDER]; |
| 351 | 384 | ||
| 352 | #ifndef CONFIG_SPARSEMEM | 385 | #ifndef CONFIG_SPARSEMEM |
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 051c1b1ede4e..3bdcab30ca41 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. | 5 | * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. |
| 6 | * If specified range includes migrate types other than MOVABLE, | 6 | * If specified range includes migrate types other than MOVABLE or CMA, |
| 7 | * this will fail with -EBUSY. | 7 | * this will fail with -EBUSY. |
| 8 | * | 8 | * |
| 9 | * For isolating all pages in the range finally, the caller have to | 9 | * For isolating all pages in the range finally, the caller have to |
| @@ -11,27 +11,27 @@ | |||
| 11 | * test it. | 11 | * test it. |
| 12 | */ | 12 | */ |
| 13 | extern int | 13 | extern int |
| 14 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); | 14 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
| 15 | unsigned migratetype); | ||
| 15 | 16 | ||
| 16 | /* | 17 | /* |
| 17 | * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. | 18 | * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. |
| 18 | * target range is [start_pfn, end_pfn) | 19 | * target range is [start_pfn, end_pfn) |
| 19 | */ | 20 | */ |
| 20 | extern int | 21 | extern int |
| 21 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); | 22 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
| 23 | unsigned migratetype); | ||
| 22 | 24 | ||
| 23 | /* | 25 | /* |
| 24 | * test all pages in [start_pfn, end_pfn)are isolated or not. | 26 | * Test all pages in [start_pfn, end_pfn) are isolated or not. |
| 25 | */ | 27 | */ |
| 26 | extern int | 28 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); |
| 27 | test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * Internal funcs.Changes pageblock's migrate type. | 31 | * Internal functions. Changes pageblock's migrate type. |
| 31 | * Please use make_pagetype_isolated()/make_pagetype_movable(). | ||
| 32 | */ | 32 | */ |
| 33 | extern int set_migratetype_isolate(struct page *page); | 33 | extern int set_migratetype_isolate(struct page *page); |
| 34 | extern void unset_migratetype_isolate(struct page *page); | 34 | extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); |
| 35 | 35 | ||
| 36 | 36 | ||
| 37 | #endif | 37 | #endif |
diff --git a/mm/Kconfig b/mm/Kconfig index e338407f1225..39220026c797 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -198,7 +198,7 @@ config COMPACTION | |||
| 198 | config MIGRATION | 198 | config MIGRATION |
| 199 | bool "Page migration" | 199 | bool "Page migration" |
| 200 | def_bool y | 200 | def_bool y |
| 201 | depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION | 201 | depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA |
| 202 | help | 202 | help |
| 203 | Allows the migration of the physical location of pages of processes | 203 | Allows the migration of the physical location of pages of processes |
| 204 | while the virtual addresses are not changed. This is useful in | 204 | while the virtual addresses are not changed. This is useful in |
diff --git a/mm/Makefile b/mm/Makefile index 50ec00ef2a0e..8aada89efbbb 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ | |||
| 13 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 13 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
| 14 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ | 14 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
| 15 | page_isolation.o mm_init.o mmu_context.o percpu.o \ | 15 | page_isolation.o mm_init.o mmu_context.o percpu.o \ |
| 16 | $(mmu-y) | 16 | compaction.o $(mmu-y) |
| 17 | obj-y += init-mm.o | 17 | obj-y += init-mm.o |
| 18 | 18 | ||
| 19 | ifdef CONFIG_NO_BOOTMEM | 19 | ifdef CONFIG_NO_BOOTMEM |
| @@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o | |||
| 32 | obj-$(CONFIG_SPARSEMEM) += sparse.o | 32 | obj-$(CONFIG_SPARSEMEM) += sparse.o |
| 33 | obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o | 33 | obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o |
| 34 | obj-$(CONFIG_SLOB) += slob.o | 34 | obj-$(CONFIG_SLOB) += slob.o |
| 35 | obj-$(CONFIG_COMPACTION) += compaction.o | ||
| 36 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | 35 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o |
| 37 | obj-$(CONFIG_KSM) += ksm.o | 36 | obj-$(CONFIG_KSM) += ksm.o |
| 38 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 37 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o |
diff --git a/mm/compaction.c b/mm/compaction.c index 74a8c825ff28..da7d35ea5103 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
| @@ -16,30 +16,11 @@ | |||
| 16 | #include <linux/sysfs.h> | 16 | #include <linux/sysfs.h> |
| 17 | #include "internal.h" | 17 | #include "internal.h" |
| 18 | 18 | ||
| 19 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
| 20 | |||
| 19 | #define CREATE_TRACE_POINTS | 21 | #define CREATE_TRACE_POINTS |
| 20 | #include <trace/events/compaction.h> | 22 | #include <trace/events/compaction.h> |
| 21 | 23 | ||
| 22 | /* | ||
| 23 | * compact_control is used to track pages being migrated and the free pages | ||
| 24 | * they are being migrated to during memory compaction. The free_pfn starts | ||
| 25 | * at the end of a zone and migrate_pfn begins at the start. Movable pages | ||
| 26 | * are moved to the end of a zone during a compaction run and the run | ||
| 27 | * completes when free_pfn <= migrate_pfn | ||
| 28 | */ | ||
| 29 | struct compact_control { | ||
| 30 | struct list_head freepages; /* List of free pages to migrate to */ | ||
| 31 | struct list_head migratepages; /* List of pages being migrated */ | ||
| 32 | unsigned long nr_freepages; /* Number of isolated free pages */ | ||
| 33 | unsigned long nr_migratepages; /* Number of pages to migrate */ | ||
| 34 | unsigned long free_pfn; /* isolate_freepages search base */ | ||
| 35 | unsigned long migrate_pfn; /* isolate_migratepages search base */ | ||
| 36 | bool sync; /* Synchronous migration */ | ||
| 37 | |||
| 38 | int order; /* order a direct compactor needs */ | ||
| 39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
| 40 | struct zone *zone; | ||
| 41 | }; | ||
| 42 | |||
| 43 | static unsigned long release_freepages(struct list_head *freelist) | 24 | static unsigned long release_freepages(struct list_head *freelist) |
| 44 | { | 25 | { |
| 45 | struct page *page, *next; | 26 | struct page *page, *next; |
| @@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist) | |||
| 54 | return count; | 35 | return count; |
| 55 | } | 36 | } |
| 56 | 37 | ||
| 57 | /* Isolate free pages onto a private freelist. Must hold zone->lock */ | 38 | static void map_pages(struct list_head *list) |
| 58 | static unsigned long isolate_freepages_block(struct zone *zone, | 39 | { |
| 59 | unsigned long blockpfn, | 40 | struct page *page; |
| 60 | struct list_head *freelist) | 41 | |
| 42 | list_for_each_entry(page, list, lru) { | ||
| 43 | arch_alloc_page(page, 0); | ||
| 44 | kernel_map_pages(page, 1, 1); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline bool migrate_async_suitable(int migratetype) | ||
| 49 | { | ||
| 50 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Isolate free pages onto a private freelist. Caller must hold zone->lock. | ||
| 55 | * If @strict is true, will abort returning 0 on any invalid PFNs or non-free | ||
| 56 | * pages inside of the pageblock (even though it may still end up isolating | ||
| 57 | * some pages). | ||
| 58 | */ | ||
| 59 | static unsigned long isolate_freepages_block(unsigned long blockpfn, | ||
| 60 | unsigned long end_pfn, | ||
| 61 | struct list_head *freelist, | ||
| 62 | bool strict) | ||
| 61 | { | 63 | { |
| 62 | unsigned long zone_end_pfn, end_pfn; | ||
| 63 | int nr_scanned = 0, total_isolated = 0; | 64 | int nr_scanned = 0, total_isolated = 0; |
| 64 | struct page *cursor; | 65 | struct page *cursor; |
| 65 | 66 | ||
| 66 | /* Get the last PFN we should scan for free pages at */ | ||
| 67 | zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
| 68 | end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); | ||
| 69 | |||
| 70 | /* Find the first usable PFN in the block to initialse page cursor */ | ||
| 71 | for (; blockpfn < end_pfn; blockpfn++) { | ||
| 72 | if (pfn_valid_within(blockpfn)) | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | cursor = pfn_to_page(blockpfn); | 67 | cursor = pfn_to_page(blockpfn); |
| 76 | 68 | ||
| 77 | /* Isolate free pages. This assumes the block is valid */ | 69 | /* Isolate free pages. This assumes the block is valid */ |
| @@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
| 79 | int isolated, i; | 71 | int isolated, i; |
| 80 | struct page *page = cursor; | 72 | struct page *page = cursor; |
| 81 | 73 | ||
| 82 | if (!pfn_valid_within(blockpfn)) | 74 | if (!pfn_valid_within(blockpfn)) { |
| 75 | if (strict) | ||
| 76 | return 0; | ||
| 83 | continue; | 77 | continue; |
| 78 | } | ||
| 84 | nr_scanned++; | 79 | nr_scanned++; |
| 85 | 80 | ||
| 86 | if (!PageBuddy(page)) | 81 | if (!PageBuddy(page)) { |
| 82 | if (strict) | ||
| 83 | return 0; | ||
| 87 | continue; | 84 | continue; |
| 85 | } | ||
| 88 | 86 | ||
| 89 | /* Found a free page, break it into order-0 pages */ | 87 | /* Found a free page, break it into order-0 pages */ |
| 90 | isolated = split_free_page(page); | 88 | isolated = split_free_page(page); |
| 89 | if (!isolated && strict) | ||
| 90 | return 0; | ||
| 91 | total_isolated += isolated; | 91 | total_isolated += isolated; |
| 92 | for (i = 0; i < isolated; i++) { | 92 | for (i = 0; i < isolated; i++) { |
| 93 | list_add(&page->lru, freelist); | 93 | list_add(&page->lru, freelist); |
| @@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
| 105 | return total_isolated; | 105 | return total_isolated; |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | /* Returns true if the page is within a block suitable for migration to */ | 108 | /** |
| 109 | static bool suitable_migration_target(struct page *page) | 109 | * isolate_freepages_range() - isolate free pages. |
| 110 | { | 110 | * @start_pfn: The first PFN to start isolating. |
| 111 | 111 | * @end_pfn: The one-past-last PFN. | |
| 112 | int migratetype = get_pageblock_migratetype(page); | 112 | * |
| 113 | 113 | * Non-free pages, invalid PFNs, or zone boundaries within the | |
| 114 | /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ | 114 | * [start_pfn, end_pfn) range are considered errors, cause function to |
| 115 | if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) | 115 | * undo its actions and return zero. |
| 116 | return false; | 116 | * |
| 117 | 117 | * Otherwise, function returns one-past-the-last PFN of isolated page | |
| 118 | /* If the page is a large free page, then allow migration */ | 118 | * (which may be greater then end_pfn if end fell in a middle of |
| 119 | if (PageBuddy(page) && page_order(page) >= pageblock_order) | 119 | * a free page). |
| 120 | return true; | ||
| 121 | |||
| 122 | /* If the block is MIGRATE_MOVABLE, allow migration */ | ||
| 123 | if (migratetype == MIGRATE_MOVABLE) | ||
| 124 | return true; | ||
| 125 | |||
| 126 | /* Otherwise skip the block */ | ||
| 127 | return false; | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Based on information in the current compact_control, find blocks | ||
| 132 | * suitable for isolating free pages from and then isolate them. | ||
| 133 | */ | 120 | */ |
| 134 | static void isolate_freepages(struct zone *zone, | 121 | unsigned long |
| 135 | struct compact_control *cc) | 122 | isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) |
| 136 | { | 123 | { |
| 137 | struct page *page; | 124 | unsigned long isolated, pfn, block_end_pfn, flags; |
| 138 | unsigned long high_pfn, low_pfn, pfn; | 125 | struct zone *zone = NULL; |
| 139 | unsigned long flags; | 126 | LIST_HEAD(freelist); |
| 140 | int nr_freepages = cc->nr_freepages; | ||
| 141 | struct list_head *freelist = &cc->freepages; | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Initialise the free scanner. The starting point is where we last | ||
| 145 | * scanned from (or the end of the zone if starting). The low point | ||
| 146 | * is the end of the pageblock the migration scanner is using. | ||
| 147 | */ | ||
| 148 | pfn = cc->free_pfn; | ||
| 149 | low_pfn = cc->migrate_pfn + pageblock_nr_pages; | ||
| 150 | 127 | ||
| 151 | /* | 128 | if (pfn_valid(start_pfn)) |
| 152 | * Take care that if the migration scanner is at the end of the zone | 129 | zone = page_zone(pfn_to_page(start_pfn)); |
| 153 | * that the free scanner does not accidentally move to the next zone | ||
| 154 | * in the next isolation cycle. | ||
| 155 | */ | ||
| 156 | high_pfn = min(low_pfn, pfn); | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Isolate free pages until enough are available to migrate the | ||
| 160 | * pages on cc->migratepages. We stop searching if the migrate | ||
| 161 | * and free page scanners meet or enough free pages are isolated. | ||
| 162 | */ | ||
| 163 | for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; | ||
| 164 | pfn -= pageblock_nr_pages) { | ||
| 165 | unsigned long isolated; | ||
| 166 | 130 | ||
| 167 | if (!pfn_valid(pfn)) | 131 | for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { |
| 168 | continue; | 132 | if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) |
| 133 | break; | ||
| 169 | 134 | ||
| 170 | /* | 135 | /* |
| 171 | * Check for overlapping nodes/zones. It's possible on some | 136 | * On subsequent iterations ALIGN() is actually not needed, |
| 172 | * configurations to have a setup like | 137 | * but we keep it that we not to complicate the code. |
| 173 | * node0 node1 node0 | ||
| 174 | * i.e. it's possible that all pages within a zones range of | ||
| 175 | * pages do not belong to a single zone. | ||
| 176 | */ | 138 | */ |
| 177 | page = pfn_to_page(pfn); | 139 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
| 178 | if (page_zone(page) != zone) | 140 | block_end_pfn = min(block_end_pfn, end_pfn); |
| 179 | continue; | ||
| 180 | 141 | ||
| 181 | /* Check the block is suitable for migration */ | 142 | spin_lock_irqsave(&zone->lock, flags); |
| 182 | if (!suitable_migration_target(page)) | 143 | isolated = isolate_freepages_block(pfn, block_end_pfn, |
| 183 | continue; | 144 | &freelist, true); |
| 145 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 184 | 146 | ||
| 185 | /* | 147 | /* |
| 186 | * Found a block suitable for isolating free pages from. Now | 148 | * In strict mode, isolate_freepages_block() returns 0 if |
| 187 | * we disabled interrupts, double check things are ok and | 149 | * there are any holes in the block (ie. invalid PFNs or |
| 188 | * isolate the pages. This is to minimise the time IRQs | 150 | * non-free pages). |
| 189 | * are disabled | ||
| 190 | */ | 151 | */ |
| 191 | isolated = 0; | 152 | if (!isolated) |
| 192 | spin_lock_irqsave(&zone->lock, flags); | 153 | break; |
| 193 | if (suitable_migration_target(page)) { | ||
| 194 | isolated = isolate_freepages_block(zone, pfn, freelist); | ||
| 195 | nr_freepages += isolated; | ||
| 196 | } | ||
| 197 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 198 | 154 | ||
| 199 | /* | 155 | /* |
| 200 | * Record the highest PFN we isolated pages from. When next | 156 | * If we managed to isolate pages, it is always (1 << n) * |
| 201 | * looking for free pages, the search will restart here as | 157 | * pageblock_nr_pages for some non-negative n. (Max order |
| 202 | * page migration may have returned some pages to the allocator | 158 | * page may span two pageblocks). |
| 203 | */ | 159 | */ |
| 204 | if (isolated) | ||
| 205 | high_pfn = max(high_pfn, pfn); | ||
| 206 | } | 160 | } |
| 207 | 161 | ||
| 208 | /* split_free_page does not map the pages */ | 162 | /* split_free_page does not map the pages */ |
| 209 | list_for_each_entry(page, freelist, lru) { | 163 | map_pages(&freelist); |
| 210 | arch_alloc_page(page, 0); | 164 | |
| 211 | kernel_map_pages(page, 1, 1); | 165 | if (pfn < end_pfn) { |
| 166 | /* Loop terminated early, cleanup. */ | ||
| 167 | release_freepages(&freelist); | ||
| 168 | return 0; | ||
| 212 | } | 169 | } |
| 213 | 170 | ||
| 214 | cc->free_pfn = high_pfn; | 171 | /* We don't use freelists for anything. */ |
| 215 | cc->nr_freepages = nr_freepages; | 172 | return pfn; |
| 216 | } | 173 | } |
| 217 | 174 | ||
| 218 | /* Update the number of anon and file isolated pages in the zone */ | 175 | /* Update the number of anon and file isolated pages in the zone */ |
| @@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone) | |||
| 243 | return isolated > (inactive + active) / 2; | 200 | return isolated > (inactive + active) / 2; |
| 244 | } | 201 | } |
| 245 | 202 | ||
| 246 | /* possible outcome of isolate_migratepages */ | 203 | /** |
| 247 | typedef enum { | 204 | * isolate_migratepages_range() - isolate all migrate-able pages in range. |
| 248 | ISOLATE_ABORT, /* Abort compaction now */ | 205 | * @zone: Zone pages are in. |
| 249 | ISOLATE_NONE, /* No pages isolated, continue scanning */ | 206 | * @cc: Compaction control structure. |
| 250 | ISOLATE_SUCCESS, /* Pages isolated, migrate */ | 207 | * @low_pfn: The first PFN of the range. |
| 251 | } isolate_migrate_t; | 208 | * @end_pfn: The one-past-the-last PFN of the range. |
| 252 | 209 | * | |
| 253 | /* | 210 | * Isolate all pages that can be migrated from the range specified by |
| 254 | * Isolate all pages that can be migrated from the block pointed to by | 211 | * [low_pfn, end_pfn). Returns zero if there is a fatal signal |
| 255 | * the migrate scanner within compact_control. | 212 | * pending), otherwise PFN of the first page that was not scanned |
| 213 | * (which may be both less, equal to or more then end_pfn). | ||
| 214 | * | ||
| 215 | * Assumes that cc->migratepages is empty and cc->nr_migratepages is | ||
| 216 | * zero. | ||
| 217 | * | ||
| 218 | * Apart from cc->migratepages and cc->nr_migratetypes this function | ||
| 219 | * does not modify any cc's fields, in particular it does not modify | ||
| 220 | * (or read for that matter) cc->migrate_pfn. | ||
| 256 | */ | 221 | */ |
| 257 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | 222 | unsigned long |
| 258 | struct compact_control *cc) | 223 | isolate_migratepages_range(struct zone *zone, struct compact_control *cc, |
| 224 | unsigned long low_pfn, unsigned long end_pfn) | ||
| 259 | { | 225 | { |
| 260 | unsigned long low_pfn, end_pfn; | ||
| 261 | unsigned long last_pageblock_nr = 0, pageblock_nr; | 226 | unsigned long last_pageblock_nr = 0, pageblock_nr; |
| 262 | unsigned long nr_scanned = 0, nr_isolated = 0; | 227 | unsigned long nr_scanned = 0, nr_isolated = 0; |
| 263 | struct list_head *migratelist = &cc->migratepages; | 228 | struct list_head *migratelist = &cc->migratepages; |
| 264 | isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; | 229 | isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; |
| 265 | 230 | ||
| 266 | /* Do not scan outside zone boundaries */ | ||
| 267 | low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); | ||
| 268 | |||
| 269 | /* Only scan within a pageblock boundary */ | ||
| 270 | end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); | ||
| 271 | |||
| 272 | /* Do not cross the free scanner or scan within a memory hole */ | ||
| 273 | if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { | ||
| 274 | cc->migrate_pfn = end_pfn; | ||
| 275 | return ISOLATE_NONE; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | 231 | /* |
| 279 | * Ensure that there are not too many pages isolated from the LRU | 232 | * Ensure that there are not too many pages isolated from the LRU |
| 280 | * list by either parallel reclaimers or compaction. If there are, | 233 | * list by either parallel reclaimers or compaction. If there are, |
| @@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 283 | while (unlikely(too_many_isolated(zone))) { | 236 | while (unlikely(too_many_isolated(zone))) { |
| 284 | /* async migration should just abort */ | 237 | /* async migration should just abort */ |
| 285 | if (!cc->sync) | 238 | if (!cc->sync) |
| 286 | return ISOLATE_ABORT; | 239 | return 0; |
| 287 | 240 | ||
| 288 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 241 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
| 289 | 242 | ||
| 290 | if (fatal_signal_pending(current)) | 243 | if (fatal_signal_pending(current)) |
| 291 | return ISOLATE_ABORT; | 244 | return 0; |
| 292 | } | 245 | } |
| 293 | 246 | ||
| 294 | /* Time to isolate some pages for migration */ | 247 | /* Time to isolate some pages for migration */ |
| @@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 351 | */ | 304 | */ |
| 352 | pageblock_nr = low_pfn >> pageblock_order; | 305 | pageblock_nr = low_pfn >> pageblock_order; |
| 353 | if (!cc->sync && last_pageblock_nr != pageblock_nr && | 306 | if (!cc->sync && last_pageblock_nr != pageblock_nr && |
| 354 | get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { | 307 | !migrate_async_suitable(get_pageblock_migratetype(page))) { |
| 355 | low_pfn += pageblock_nr_pages; | 308 | low_pfn += pageblock_nr_pages; |
| 356 | low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; | 309 | low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; |
| 357 | last_pageblock_nr = pageblock_nr; | 310 | last_pageblock_nr = pageblock_nr; |
| @@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 396 | acct_isolated(zone, cc); | 349 | acct_isolated(zone, cc); |
| 397 | 350 | ||
| 398 | spin_unlock_irq(&zone->lru_lock); | 351 | spin_unlock_irq(&zone->lru_lock); |
| 399 | cc->migrate_pfn = low_pfn; | ||
| 400 | 352 | ||
| 401 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); | 353 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); |
| 402 | 354 | ||
| 403 | return ISOLATE_SUCCESS; | 355 | return low_pfn; |
| 356 | } | ||
| 357 | |||
| 358 | #endif /* CONFIG_COMPACTION || CONFIG_CMA */ | ||
| 359 | #ifdef CONFIG_COMPACTION | ||
| 360 | |||
| 361 | /* Returns true if the page is within a block suitable for migration to */ | ||
| 362 | static bool suitable_migration_target(struct page *page) | ||
| 363 | { | ||
| 364 | |||
| 365 | int migratetype = get_pageblock_migratetype(page); | ||
| 366 | |||
| 367 | /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ | ||
| 368 | if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) | ||
| 369 | return false; | ||
| 370 | |||
| 371 | /* If the page is a large free page, then allow migration */ | ||
| 372 | if (PageBuddy(page) && page_order(page) >= pageblock_order) | ||
| 373 | return true; | ||
| 374 | |||
| 375 | /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ | ||
| 376 | if (migrate_async_suitable(migratetype)) | ||
| 377 | return true; | ||
| 378 | |||
| 379 | /* Otherwise skip the block */ | ||
| 380 | return false; | ||
| 381 | } | ||
| 382 | |||
| 383 | /* | ||
| 384 | * Based on information in the current compact_control, find blocks | ||
| 385 | * suitable for isolating free pages from and then isolate them. | ||
| 386 | */ | ||
| 387 | static void isolate_freepages(struct zone *zone, | ||
| 388 | struct compact_control *cc) | ||
| 389 | { | ||
| 390 | struct page *page; | ||
| 391 | unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; | ||
| 392 | unsigned long flags; | ||
| 393 | int nr_freepages = cc->nr_freepages; | ||
| 394 | struct list_head *freelist = &cc->freepages; | ||
| 395 | |||
| 396 | /* | ||
| 397 | * Initialise the free scanner. The starting point is where we last | ||
| 398 | * scanned from (or the end of the zone if starting). The low point | ||
| 399 | * is the end of the pageblock the migration scanner is using. | ||
| 400 | */ | ||
| 401 | pfn = cc->free_pfn; | ||
| 402 | low_pfn = cc->migrate_pfn + pageblock_nr_pages; | ||
| 403 | |||
| 404 | /* | ||
| 405 | * Take care that if the migration scanner is at the end of the zone | ||
| 406 | * that the free scanner does not accidentally move to the next zone | ||
| 407 | * in the next isolation cycle. | ||
| 408 | */ | ||
| 409 | high_pfn = min(low_pfn, pfn); | ||
| 410 | |||
| 411 | zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Isolate free pages until enough are available to migrate the | ||
| 415 | * pages on cc->migratepages. We stop searching if the migrate | ||
| 416 | * and free page scanners meet or enough free pages are isolated. | ||
| 417 | */ | ||
| 418 | for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; | ||
| 419 | pfn -= pageblock_nr_pages) { | ||
| 420 | unsigned long isolated; | ||
| 421 | |||
| 422 | if (!pfn_valid(pfn)) | ||
| 423 | continue; | ||
| 424 | |||
| 425 | /* | ||
| 426 | * Check for overlapping nodes/zones. It's possible on some | ||
| 427 | * configurations to have a setup like | ||
| 428 | * node0 node1 node0 | ||
| 429 | * i.e. it's possible that all pages within a zones range of | ||
| 430 | * pages do not belong to a single zone. | ||
| 431 | */ | ||
| 432 | page = pfn_to_page(pfn); | ||
| 433 | if (page_zone(page) != zone) | ||
| 434 | continue; | ||
| 435 | |||
| 436 | /* Check the block is suitable for migration */ | ||
| 437 | if (!suitable_migration_target(page)) | ||
| 438 | continue; | ||
| 439 | |||
| 440 | /* | ||
| 441 | * Found a block suitable for isolating free pages from. Now | ||
| 442 | * we disabled interrupts, double check things are ok and | ||
| 443 | * isolate the pages. This is to minimise the time IRQs | ||
| 444 | * are disabled | ||
| 445 | */ | ||
| 446 | isolated = 0; | ||
| 447 | spin_lock_irqsave(&zone->lock, flags); | ||
| 448 | if (suitable_migration_target(page)) { | ||
| 449 | end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); | ||
| 450 | isolated = isolate_freepages_block(pfn, end_pfn, | ||
| 451 | freelist, false); | ||
| 452 | nr_freepages += isolated; | ||
| 453 | } | ||
| 454 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 455 | |||
| 456 | /* | ||
| 457 | * Record the highest PFN we isolated pages from. When next | ||
| 458 | * looking for free pages, the search will restart here as | ||
| 459 | * page migration may have returned some pages to the allocator | ||
| 460 | */ | ||
| 461 | if (isolated) | ||
| 462 | high_pfn = max(high_pfn, pfn); | ||
| 463 | } | ||
| 464 | |||
| 465 | /* split_free_page does not map the pages */ | ||
| 466 | map_pages(freelist); | ||
| 467 | |||
| 468 | cc->free_pfn = high_pfn; | ||
| 469 | cc->nr_freepages = nr_freepages; | ||
| 404 | } | 470 | } |
| 405 | 471 | ||
| 406 | /* | 472 | /* |
| @@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc) | |||
| 449 | cc->nr_freepages = nr_freepages; | 515 | cc->nr_freepages = nr_freepages; |
| 450 | } | 516 | } |
| 451 | 517 | ||
| 518 | /* possible outcome of isolate_migratepages */ | ||
| 519 | typedef enum { | ||
| 520 | ISOLATE_ABORT, /* Abort compaction now */ | ||
| 521 | ISOLATE_NONE, /* No pages isolated, continue scanning */ | ||
| 522 | ISOLATE_SUCCESS, /* Pages isolated, migrate */ | ||
| 523 | } isolate_migrate_t; | ||
| 524 | |||
| 525 | /* | ||
| 526 | * Isolate all pages that can be migrated from the block pointed to by | ||
| 527 | * the migrate scanner within compact_control. | ||
| 528 | */ | ||
| 529 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | ||
| 530 | struct compact_control *cc) | ||
| 531 | { | ||
| 532 | unsigned long low_pfn, end_pfn; | ||
| 533 | |||
| 534 | /* Do not scan outside zone boundaries */ | ||
| 535 | low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); | ||
| 536 | |||
| 537 | /* Only scan within a pageblock boundary */ | ||
| 538 | end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); | ||
| 539 | |||
| 540 | /* Do not cross the free scanner or scan within a memory hole */ | ||
| 541 | if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { | ||
| 542 | cc->migrate_pfn = end_pfn; | ||
| 543 | return ISOLATE_NONE; | ||
| 544 | } | ||
| 545 | |||
| 546 | /* Perform the isolation */ | ||
| 547 | low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); | ||
| 548 | if (!low_pfn) | ||
| 549 | return ISOLATE_ABORT; | ||
| 550 | |||
| 551 | cc->migrate_pfn = low_pfn; | ||
| 552 | |||
| 553 | return ISOLATE_SUCCESS; | ||
| 554 | } | ||
| 555 | |||
| 452 | static int compact_finished(struct zone *zone, | 556 | static int compact_finished(struct zone *zone, |
| 453 | struct compact_control *cc) | 557 | struct compact_control *cc) |
| 454 | { | 558 | { |
| @@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node) | |||
| 795 | return device_remove_file(&node->dev, &dev_attr_compact); | 899 | return device_remove_file(&node->dev, &dev_attr_compact); |
| 796 | } | 900 | } |
| 797 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ | 901 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ |
| 902 | |||
| 903 | #endif /* CONFIG_COMPACTION */ | ||
diff --git a/mm/internal.h b/mm/internal.h index 2189af491783..aee4761cf9a9 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
| @@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order); | |||
| 100 | extern bool is_free_buddy_page(struct page *page); | 100 | extern bool is_free_buddy_page(struct page *page); |
| 101 | #endif | 101 | #endif |
| 102 | 102 | ||
| 103 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
| 104 | |||
| 105 | /* | ||
| 106 | * in mm/compaction.c | ||
| 107 | */ | ||
| 108 | /* | ||
| 109 | * compact_control is used to track pages being migrated and the free pages | ||
| 110 | * they are being migrated to during memory compaction. The free_pfn starts | ||
| 111 | * at the end of a zone and migrate_pfn begins at the start. Movable pages | ||
| 112 | * are moved to the end of a zone during a compaction run and the run | ||
| 113 | * completes when free_pfn <= migrate_pfn | ||
| 114 | */ | ||
| 115 | struct compact_control { | ||
| 116 | struct list_head freepages; /* List of free pages to migrate to */ | ||
| 117 | struct list_head migratepages; /* List of pages being migrated */ | ||
| 118 | unsigned long nr_freepages; /* Number of isolated free pages */ | ||
| 119 | unsigned long nr_migratepages; /* Number of pages to migrate */ | ||
| 120 | unsigned long free_pfn; /* isolate_freepages search base */ | ||
| 121 | unsigned long migrate_pfn; /* isolate_migratepages search base */ | ||
| 122 | bool sync; /* Synchronous migration */ | ||
| 123 | |||
| 124 | int order; /* order a direct compactor needs */ | ||
| 125 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
| 126 | struct zone *zone; | ||
| 127 | }; | ||
| 128 | |||
| 129 | unsigned long | ||
| 130 | isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); | ||
| 131 | unsigned long | ||
| 132 | isolate_migratepages_range(struct zone *zone, struct compact_control *cc, | ||
| 133 | unsigned long low_pfn, unsigned long end_pfn); | ||
| 134 | |||
| 135 | #endif | ||
| 103 | 136 | ||
| 104 | /* | 137 | /* |
| 105 | * function for dealing with page's order in buddy system. | 138 | * function for dealing with page's order in buddy system. |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97cc2733551a..c99ad4e6b88c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
| 1404 | /* Not a free page */ | 1404 | /* Not a free page */ |
| 1405 | ret = 1; | 1405 | ret = 1; |
| 1406 | } | 1406 | } |
| 1407 | unset_migratetype_isolate(p); | 1407 | unset_migratetype_isolate(p, MIGRATE_MOVABLE); |
| 1408 | unlock_memory_hotplug(); | 1408 | unlock_memory_hotplug(); |
| 1409 | return ret; | 1409 | return ret; |
| 1410 | } | 1410 | } |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6629fafd6ce4..fc898cb4fe8f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, | |||
| 891 | nr_pages = end_pfn - start_pfn; | 891 | nr_pages = end_pfn - start_pfn; |
| 892 | 892 | ||
| 893 | /* set above range as isolated */ | 893 | /* set above range as isolated */ |
| 894 | ret = start_isolate_page_range(start_pfn, end_pfn); | 894 | ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
| 895 | if (ret) | 895 | if (ret) |
| 896 | goto out; | 896 | goto out; |
| 897 | 897 | ||
| @@ -956,7 +956,7 @@ repeat: | |||
| 956 | We cannot do rollback at this point. */ | 956 | We cannot do rollback at this point. */ |
| 957 | offline_isolated_pages(start_pfn, end_pfn); | 957 | offline_isolated_pages(start_pfn, end_pfn); |
| 958 | /* reset pagetype flags and makes migrate type to be MOVABLE */ | 958 | /* reset pagetype flags and makes migrate type to be MOVABLE */ |
| 959 | undo_isolate_page_range(start_pfn, end_pfn); | 959 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
| 960 | /* removal success */ | 960 | /* removal success */ |
| 961 | zone->present_pages -= offlined_pages; | 961 | zone->present_pages -= offlined_pages; |
| 962 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 962 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
| @@ -981,7 +981,7 @@ failed_removal: | |||
| 981 | start_pfn, end_pfn); | 981 | start_pfn, end_pfn); |
| 982 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 982 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
| 983 | /* pushback to free area */ | 983 | /* pushback to free area */ |
| 984 | undo_isolate_page_range(start_pfn, end_pfn); | 984 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
| 985 | 985 | ||
| 986 | out: | 986 | out: |
| 987 | unlock_memory_hotplug(); | 987 | unlock_memory_hotplug(); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1851df600438..bab8e3bc4202 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -57,6 +57,7 @@ | |||
| 57 | #include <linux/ftrace_event.h> | 57 | #include <linux/ftrace_event.h> |
| 58 | #include <linux/memcontrol.h> | 58 | #include <linux/memcontrol.h> |
| 59 | #include <linux/prefetch.h> | 59 | #include <linux/prefetch.h> |
| 60 | #include <linux/migrate.h> | ||
| 60 | #include <linux/page-debug-flags.h> | 61 | #include <linux/page-debug-flags.h> |
| 61 | 62 | ||
| 62 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
| @@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
| 513 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's | 514 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's |
| 514 | * order is recorded in page_private(page) field. | 515 | * order is recorded in page_private(page) field. |
| 515 | * So when we are allocating or freeing one, we can derive the state of the | 516 | * So when we are allocating or freeing one, we can derive the state of the |
| 516 | * other. That is, if we allocate a small block, and both were | 517 | * other. That is, if we allocate a small block, and both were |
| 517 | * free, the remainder of the region must be split into blocks. | 518 | * free, the remainder of the region must be split into blocks. |
| 518 | * If a block is freed, and its buddy is also free, then this | 519 | * If a block is freed, and its buddy is also free, then this |
| 519 | * triggers coalescing into a block of larger size. | 520 | * triggers coalescing into a block of larger size. |
| 520 | * | 521 | * |
| 521 | * -- wli | 522 | * -- wli |
| 522 | */ | 523 | */ |
| @@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | |||
| 749 | __free_pages(page, order); | 750 | __free_pages(page, order); |
| 750 | } | 751 | } |
| 751 | 752 | ||
| 753 | #ifdef CONFIG_CMA | ||
| 754 | /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ | ||
| 755 | void __init init_cma_reserved_pageblock(struct page *page) | ||
| 756 | { | ||
| 757 | unsigned i = pageblock_nr_pages; | ||
| 758 | struct page *p = page; | ||
| 759 | |||
| 760 | do { | ||
| 761 | __ClearPageReserved(p); | ||
| 762 | set_page_count(p, 0); | ||
| 763 | } while (++p, --i); | ||
| 764 | |||
| 765 | set_page_refcounted(page); | ||
| 766 | set_pageblock_migratetype(page, MIGRATE_CMA); | ||
| 767 | __free_pages(page, pageblock_order); | ||
| 768 | totalram_pages += pageblock_nr_pages; | ||
| 769 | } | ||
| 770 | #endif | ||
| 752 | 771 | ||
| 753 | /* | 772 | /* |
| 754 | * The order of subdivision here is critical for the IO subsystem. | 773 | * The order of subdivision here is critical for the IO subsystem. |
| @@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
| 874 | * This array describes the order lists are fallen back to when | 893 | * This array describes the order lists are fallen back to when |
| 875 | * the free lists for the desirable migrate type are depleted | 894 | * the free lists for the desirable migrate type are depleted |
| 876 | */ | 895 | */ |
| 877 | static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { | 896 | static int fallbacks[MIGRATE_TYPES][4] = { |
| 878 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 897 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
| 879 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 898 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
| 880 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | 899 | #ifdef CONFIG_CMA |
| 881 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ | 900 | [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, |
| 901 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ | ||
| 902 | #else | ||
| 903 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
| 904 | #endif | ||
| 905 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ | ||
| 906 | [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ | ||
| 882 | }; | 907 | }; |
| 883 | 908 | ||
| 884 | /* | 909 | /* |
| @@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
| 973 | /* Find the largest possible block of pages in the other list */ | 998 | /* Find the largest possible block of pages in the other list */ |
| 974 | for (current_order = MAX_ORDER-1; current_order >= order; | 999 | for (current_order = MAX_ORDER-1; current_order >= order; |
| 975 | --current_order) { | 1000 | --current_order) { |
| 976 | for (i = 0; i < MIGRATE_TYPES - 1; i++) { | 1001 | for (i = 0;; i++) { |
| 977 | migratetype = fallbacks[start_migratetype][i]; | 1002 | migratetype = fallbacks[start_migratetype][i]; |
| 978 | 1003 | ||
| 979 | /* MIGRATE_RESERVE handled later if necessary */ | 1004 | /* MIGRATE_RESERVE handled later if necessary */ |
| 980 | if (migratetype == MIGRATE_RESERVE) | 1005 | if (migratetype == MIGRATE_RESERVE) |
| 981 | continue; | 1006 | break; |
| 982 | 1007 | ||
| 983 | area = &(zone->free_area[current_order]); | 1008 | area = &(zone->free_area[current_order]); |
| 984 | if (list_empty(&area->free_list[migratetype])) | 1009 | if (list_empty(&area->free_list[migratetype])) |
| @@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
| 993 | * pages to the preferred allocation list. If falling | 1018 | * pages to the preferred allocation list. If falling |
| 994 | * back for a reclaimable kernel allocation, be more | 1019 | * back for a reclaimable kernel allocation, be more |
| 995 | * aggressive about taking ownership of free pages | 1020 | * aggressive about taking ownership of free pages |
| 1021 | * | ||
| 1022 | * On the other hand, never change migration | ||
| 1023 | * type of MIGRATE_CMA pageblocks nor move CMA | ||
| 1024 | * pages on different free lists. We don't | ||
| 1025 | * want unmovable pages to be allocated from | ||
| 1026 | * MIGRATE_CMA areas. | ||
| 996 | */ | 1027 | */ |
| 997 | if (unlikely(current_order >= (pageblock_order >> 1)) || | 1028 | if (!is_migrate_cma(migratetype) && |
| 998 | start_migratetype == MIGRATE_RECLAIMABLE || | 1029 | (unlikely(current_order >= pageblock_order / 2) || |
| 999 | page_group_by_mobility_disabled) { | 1030 | start_migratetype == MIGRATE_RECLAIMABLE || |
| 1000 | unsigned long pages; | 1031 | page_group_by_mobility_disabled)) { |
| 1032 | int pages; | ||
| 1001 | pages = move_freepages_block(zone, page, | 1033 | pages = move_freepages_block(zone, page, |
| 1002 | start_migratetype); | 1034 | start_migratetype); |
| 1003 | 1035 | ||
| @@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
| 1015 | rmv_page_order(page); | 1047 | rmv_page_order(page); |
| 1016 | 1048 | ||
| 1017 | /* Take ownership for orders >= pageblock_order */ | 1049 | /* Take ownership for orders >= pageblock_order */ |
| 1018 | if (current_order >= pageblock_order) | 1050 | if (current_order >= pageblock_order && |
| 1051 | !is_migrate_cma(migratetype)) | ||
| 1019 | change_pageblock_range(page, current_order, | 1052 | change_pageblock_range(page, current_order, |
| 1020 | start_migratetype); | 1053 | start_migratetype); |
| 1021 | 1054 | ||
| 1022 | expand(zone, page, order, current_order, area, migratetype); | 1055 | expand(zone, page, order, current_order, area, |
| 1056 | is_migrate_cma(migratetype) | ||
| 1057 | ? migratetype : start_migratetype); | ||
| 1023 | 1058 | ||
| 1024 | trace_mm_page_alloc_extfrag(page, order, current_order, | 1059 | trace_mm_page_alloc_extfrag(page, order, current_order, |
| 1025 | start_migratetype, migratetype); | 1060 | start_migratetype, migratetype); |
| @@ -1061,17 +1096,17 @@ retry_reserve: | |||
| 1061 | return page; | 1096 | return page; |
| 1062 | } | 1097 | } |
| 1063 | 1098 | ||
| 1064 | /* | 1099 | /* |
| 1065 | * Obtain a specified number of elements from the buddy allocator, all under | 1100 | * Obtain a specified number of elements from the buddy allocator, all under |
| 1066 | * a single hold of the lock, for efficiency. Add them to the supplied list. | 1101 | * a single hold of the lock, for efficiency. Add them to the supplied list. |
| 1067 | * Returns the number of new pages which were placed at *list. | 1102 | * Returns the number of new pages which were placed at *list. |
| 1068 | */ | 1103 | */ |
| 1069 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 1104 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
| 1070 | unsigned long count, struct list_head *list, | 1105 | unsigned long count, struct list_head *list, |
| 1071 | int migratetype, int cold) | 1106 | int migratetype, int cold) |
| 1072 | { | 1107 | { |
| 1073 | int i; | 1108 | int mt = migratetype, i; |
| 1074 | 1109 | ||
| 1075 | spin_lock(&zone->lock); | 1110 | spin_lock(&zone->lock); |
| 1076 | for (i = 0; i < count; ++i) { | 1111 | for (i = 0; i < count; ++i) { |
| 1077 | struct page *page = __rmqueue(zone, order, migratetype); | 1112 | struct page *page = __rmqueue(zone, order, migratetype); |
| @@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
| 1091 | list_add(&page->lru, list); | 1126 | list_add(&page->lru, list); |
| 1092 | else | 1127 | else |
| 1093 | list_add_tail(&page->lru, list); | 1128 | list_add_tail(&page->lru, list); |
| 1094 | set_page_private(page, migratetype); | 1129 | if (IS_ENABLED(CONFIG_CMA)) { |
| 1130 | mt = get_pageblock_migratetype(page); | ||
| 1131 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) | ||
| 1132 | mt = migratetype; | ||
| 1133 | } | ||
| 1134 | set_page_private(page, mt); | ||
| 1095 | list = &page->lru; | 1135 | list = &page->lru; |
| 1096 | } | 1136 | } |
| 1097 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 1137 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
| @@ -1371,8 +1411,12 @@ int split_free_page(struct page *page) | |||
| 1371 | 1411 | ||
| 1372 | if (order >= pageblock_order - 1) { | 1412 | if (order >= pageblock_order - 1) { |
| 1373 | struct page *endpage = page + (1 << order) - 1; | 1413 | struct page *endpage = page + (1 << order) - 1; |
| 1374 | for (; page < endpage; page += pageblock_nr_pages) | 1414 | for (; page < endpage; page += pageblock_nr_pages) { |
| 1375 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 1415 | int mt = get_pageblock_migratetype(page); |
| 1416 | if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) | ||
| 1417 | set_pageblock_migratetype(page, | ||
| 1418 | MIGRATE_MOVABLE); | ||
| 1419 | } | ||
| 1376 | } | 1420 | } |
| 1377 | 1421 | ||
| 1378 | return 1 << order; | 1422 | return 1 << order; |
| @@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
| 2086 | } | 2130 | } |
| 2087 | #endif /* CONFIG_COMPACTION */ | 2131 | #endif /* CONFIG_COMPACTION */ |
| 2088 | 2132 | ||
| 2089 | /* The really slow allocator path where we enter direct reclaim */ | 2133 | /* Perform direct synchronous page reclaim */ |
| 2090 | static inline struct page * | 2134 | static int |
| 2091 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | 2135 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, |
| 2092 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2136 | nodemask_t *nodemask) |
| 2093 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
| 2094 | int migratetype, unsigned long *did_some_progress) | ||
| 2095 | { | 2137 | { |
| 2096 | struct page *page = NULL; | ||
| 2097 | struct reclaim_state reclaim_state; | 2138 | struct reclaim_state reclaim_state; |
| 2098 | bool drained = false; | 2139 | int progress; |
| 2099 | 2140 | ||
| 2100 | cond_resched(); | 2141 | cond_resched(); |
| 2101 | 2142 | ||
| @@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
| 2106 | reclaim_state.reclaimed_slab = 0; | 2147 | reclaim_state.reclaimed_slab = 0; |
| 2107 | current->reclaim_state = &reclaim_state; | 2148 | current->reclaim_state = &reclaim_state; |
| 2108 | 2149 | ||
| 2109 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); | 2150 | progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); |
| 2110 | 2151 | ||
| 2111 | current->reclaim_state = NULL; | 2152 | current->reclaim_state = NULL; |
| 2112 | lockdep_clear_current_reclaim_state(); | 2153 | lockdep_clear_current_reclaim_state(); |
| @@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
| 2114 | 2155 | ||
| 2115 | cond_resched(); | 2156 | cond_resched(); |
| 2116 | 2157 | ||
| 2158 | return progress; | ||
| 2159 | } | ||
| 2160 | |||
| 2161 | /* The really slow allocator path where we enter direct reclaim */ | ||
| 2162 | static inline struct page * | ||
| 2163 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | ||
| 2164 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
| 2165 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
| 2166 | int migratetype, unsigned long *did_some_progress) | ||
| 2167 | { | ||
| 2168 | struct page *page = NULL; | ||
| 2169 | bool drained = false; | ||
| 2170 | |||
| 2171 | *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
| 2172 | nodemask); | ||
| 2117 | if (unlikely(!(*did_some_progress))) | 2173 | if (unlikely(!(*did_some_progress))) |
| 2118 | return NULL; | 2174 | return NULL; |
| 2119 | 2175 | ||
| @@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
| 4301 | init_waitqueue_head(&pgdat->kswapd_wait); | 4357 | init_waitqueue_head(&pgdat->kswapd_wait); |
| 4302 | pgdat->kswapd_max_order = 0; | 4358 | pgdat->kswapd_max_order = 0; |
| 4303 | pgdat_page_cgroup_init(pgdat); | 4359 | pgdat_page_cgroup_init(pgdat); |
| 4304 | 4360 | ||
| 4305 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4361 | for (j = 0; j < MAX_NR_ZONES; j++) { |
| 4306 | struct zone *zone = pgdat->node_zones + j; | 4362 | struct zone *zone = pgdat->node_zones + j; |
| 4307 | unsigned long size, realsize, memmap_pages; | 4363 | unsigned long size, realsize, memmap_pages; |
| @@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
| 4976 | calculate_totalreserve_pages(); | 5032 | calculate_totalreserve_pages(); |
| 4977 | } | 5033 | } |
| 4978 | 5034 | ||
| 4979 | /** | 5035 | static void __setup_per_zone_wmarks(void) |
| 4980 | * setup_per_zone_wmarks - called when min_free_kbytes changes | ||
| 4981 | * or when memory is hot-{added|removed} | ||
| 4982 | * | ||
| 4983 | * Ensures that the watermark[min,low,high] values for each zone are set | ||
| 4984 | * correctly with respect to min_free_kbytes. | ||
| 4985 | */ | ||
| 4986 | void setup_per_zone_wmarks(void) | ||
| 4987 | { | 5036 | { |
| 4988 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); | 5037 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); |
| 4989 | unsigned long lowmem_pages = 0; | 5038 | unsigned long lowmem_pages = 0; |
| @@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void) | |||
| 5030 | 5079 | ||
| 5031 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); | 5080 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); |
| 5032 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); | 5081 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); |
| 5082 | |||
| 5083 | zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); | ||
| 5084 | zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); | ||
| 5085 | zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); | ||
| 5086 | |||
| 5033 | setup_zone_migrate_reserve(zone); | 5087 | setup_zone_migrate_reserve(zone); |
| 5034 | spin_unlock_irqrestore(&zone->lock, flags); | 5088 | spin_unlock_irqrestore(&zone->lock, flags); |
| 5035 | } | 5089 | } |
| @@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void) | |||
| 5038 | calculate_totalreserve_pages(); | 5092 | calculate_totalreserve_pages(); |
| 5039 | } | 5093 | } |
| 5040 | 5094 | ||
| 5095 | /** | ||
| 5096 | * setup_per_zone_wmarks - called when min_free_kbytes changes | ||
| 5097 | * or when memory is hot-{added|removed} | ||
| 5098 | * | ||
| 5099 | * Ensures that the watermark[min,low,high] values for each zone are set | ||
| 5100 | * correctly with respect to min_free_kbytes. | ||
| 5101 | */ | ||
| 5102 | void setup_per_zone_wmarks(void) | ||
| 5103 | { | ||
| 5104 | mutex_lock(&zonelists_mutex); | ||
| 5105 | __setup_per_zone_wmarks(); | ||
| 5106 | mutex_unlock(&zonelists_mutex); | ||
| 5107 | } | ||
| 5108 | |||
| 5041 | /* | 5109 | /* |
| 5042 | * The inactive anon list should be small enough that the VM never has to | 5110 | * The inactive anon list should be small enough that the VM never has to |
| 5043 | * do too much work, but large enough that each inactive page has a chance | 5111 | * do too much work, but large enough that each inactive page has a chance |
| @@ -5415,14 +5483,16 @@ static int | |||
| 5415 | __count_immobile_pages(struct zone *zone, struct page *page, int count) | 5483 | __count_immobile_pages(struct zone *zone, struct page *page, int count) |
| 5416 | { | 5484 | { |
| 5417 | unsigned long pfn, iter, found; | 5485 | unsigned long pfn, iter, found; |
| 5486 | int mt; | ||
| 5487 | |||
| 5418 | /* | 5488 | /* |
| 5419 | * For avoiding noise data, lru_add_drain_all() should be called | 5489 | * For avoiding noise data, lru_add_drain_all() should be called |
| 5420 | * If ZONE_MOVABLE, the zone never contains immobile pages | 5490 | * If ZONE_MOVABLE, the zone never contains immobile pages |
| 5421 | */ | 5491 | */ |
| 5422 | if (zone_idx(zone) == ZONE_MOVABLE) | 5492 | if (zone_idx(zone) == ZONE_MOVABLE) |
| 5423 | return true; | 5493 | return true; |
| 5424 | 5494 | mt = get_pageblock_migratetype(page); | |
| 5425 | if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) | 5495 | if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) |
| 5426 | return true; | 5496 | return true; |
| 5427 | 5497 | ||
| 5428 | pfn = page_to_pfn(page); | 5498 | pfn = page_to_pfn(page); |
| @@ -5539,7 +5609,7 @@ out: | |||
| 5539 | return ret; | 5609 | return ret; |
| 5540 | } | 5610 | } |
| 5541 | 5611 | ||
| 5542 | void unset_migratetype_isolate(struct page *page) | 5612 | void unset_migratetype_isolate(struct page *page, unsigned migratetype) |
| 5543 | { | 5613 | { |
| 5544 | struct zone *zone; | 5614 | struct zone *zone; |
| 5545 | unsigned long flags; | 5615 | unsigned long flags; |
| @@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page) | |||
| 5547 | spin_lock_irqsave(&zone->lock, flags); | 5617 | spin_lock_irqsave(&zone->lock, flags); |
| 5548 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 5618 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
| 5549 | goto out; | 5619 | goto out; |
| 5550 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 5620 | set_pageblock_migratetype(page, migratetype); |
| 5551 | move_freepages_block(zone, page, MIGRATE_MOVABLE); | 5621 | move_freepages_block(zone, page, migratetype); |
| 5552 | out: | 5622 | out: |
| 5553 | spin_unlock_irqrestore(&zone->lock, flags); | 5623 | spin_unlock_irqrestore(&zone->lock, flags); |
| 5554 | } | 5624 | } |
| 5555 | 5625 | ||
| 5626 | #ifdef CONFIG_CMA | ||
| 5627 | |||
| 5628 | static unsigned long pfn_max_align_down(unsigned long pfn) | ||
| 5629 | { | ||
| 5630 | return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, | ||
| 5631 | pageblock_nr_pages) - 1); | ||
| 5632 | } | ||
| 5633 | |||
| 5634 | static unsigned long pfn_max_align_up(unsigned long pfn) | ||
| 5635 | { | ||
| 5636 | return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, | ||
| 5637 | pageblock_nr_pages)); | ||
| 5638 | } | ||
| 5639 | |||
| 5640 | static struct page * | ||
| 5641 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, | ||
| 5642 | int **resultp) | ||
| 5643 | { | ||
| 5644 | return alloc_page(GFP_HIGHUSER_MOVABLE); | ||
| 5645 | } | ||
| 5646 | |||
| 5647 | /* [start, end) must belong to a single zone. */ | ||
| 5648 | static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | ||
| 5649 | { | ||
| 5650 | /* This function is based on compact_zone() from compaction.c. */ | ||
| 5651 | |||
| 5652 | unsigned long pfn = start; | ||
| 5653 | unsigned int tries = 0; | ||
| 5654 | int ret = 0; | ||
| 5655 | |||
| 5656 | struct compact_control cc = { | ||
| 5657 | .nr_migratepages = 0, | ||
| 5658 | .order = -1, | ||
| 5659 | .zone = page_zone(pfn_to_page(start)), | ||
| 5660 | .sync = true, | ||
| 5661 | }; | ||
| 5662 | INIT_LIST_HEAD(&cc.migratepages); | ||
| 5663 | |||
| 5664 | migrate_prep_local(); | ||
| 5665 | |||
| 5666 | while (pfn < end || !list_empty(&cc.migratepages)) { | ||
| 5667 | if (fatal_signal_pending(current)) { | ||
| 5668 | ret = -EINTR; | ||
| 5669 | break; | ||
| 5670 | } | ||
| 5671 | |||
| 5672 | if (list_empty(&cc.migratepages)) { | ||
| 5673 | cc.nr_migratepages = 0; | ||
| 5674 | pfn = isolate_migratepages_range(cc.zone, &cc, | ||
| 5675 | pfn, end); | ||
| 5676 | if (!pfn) { | ||
| 5677 | ret = -EINTR; | ||
| 5678 | break; | ||
| 5679 | } | ||
| 5680 | tries = 0; | ||
| 5681 | } else if (++tries == 5) { | ||
| 5682 | ret = ret < 0 ? ret : -EBUSY; | ||
| 5683 | break; | ||
| 5684 | } | ||
| 5685 | |||
| 5686 | ret = migrate_pages(&cc.migratepages, | ||
| 5687 | __alloc_contig_migrate_alloc, | ||
| 5688 | 0, false, MIGRATE_SYNC); | ||
| 5689 | } | ||
| 5690 | |||
| 5691 | putback_lru_pages(&cc.migratepages); | ||
| 5692 | return ret > 0 ? 0 : ret; | ||
| 5693 | } | ||
| 5694 | |||
| 5695 | /* | ||
| 5696 | * Update zone's cma pages counter used for watermark level calculation. | ||
| 5697 | */ | ||
| 5698 | static inline void __update_cma_watermarks(struct zone *zone, int count) | ||
| 5699 | { | ||
| 5700 | unsigned long flags; | ||
| 5701 | spin_lock_irqsave(&zone->lock, flags); | ||
| 5702 | zone->min_cma_pages += count; | ||
| 5703 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 5704 | setup_per_zone_wmarks(); | ||
| 5705 | } | ||
| 5706 | |||
| 5707 | /* | ||
| 5708 | * Trigger memory pressure bump to reclaim some pages in order to be able to | ||
| 5709 | * allocate 'count' pages in single page units. Does similar work as | ||
| 5710 | *__alloc_pages_slowpath() function. | ||
| 5711 | */ | ||
| 5712 | static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | ||
| 5713 | { | ||
| 5714 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
| 5715 | struct zonelist *zonelist = node_zonelist(0, gfp_mask); | ||
| 5716 | int did_some_progress = 0; | ||
| 5717 | int order = 1; | ||
| 5718 | |||
| 5719 | /* | ||
| 5720 | * Increase level of watermarks to force kswapd do his job | ||
| 5721 | * to stabilise at new watermark level. | ||
| 5722 | */ | ||
| 5723 | __update_cma_watermarks(zone, count); | ||
| 5724 | |||
| 5725 | /* Obey watermarks as if the page was being allocated */ | ||
| 5726 | while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { | ||
| 5727 | wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); | ||
| 5728 | |||
| 5729 | did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
| 5730 | NULL); | ||
| 5731 | if (!did_some_progress) { | ||
| 5732 | /* Exhausted what can be done so it's blamo time */ | ||
| 5733 | out_of_memory(zonelist, gfp_mask, order, NULL, false); | ||
| 5734 | } | ||
| 5735 | } | ||
| 5736 | |||
| 5737 | /* Restore original watermark levels. */ | ||
| 5738 | __update_cma_watermarks(zone, -count); | ||
| 5739 | |||
| 5740 | return count; | ||
| 5741 | } | ||
| 5742 | |||
| 5743 | /** | ||
| 5744 | * alloc_contig_range() -- tries to allocate given range of pages | ||
| 5745 | * @start: start PFN to allocate | ||
| 5746 | * @end: one-past-the-last PFN to allocate | ||
| 5747 | * @migratetype: migratetype of the underlaying pageblocks (either | ||
| 5748 | * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks | ||
| 5749 | * in range must have the same migratetype and it must | ||
| 5750 | * be either of the two. | ||
| 5751 | * | ||
| 5752 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES | ||
| 5753 | * aligned, however it's the caller's responsibility to guarantee that | ||
| 5754 | * we are the only thread that changes migrate type of pageblocks the | ||
| 5755 | * pages fall in. | ||
| 5756 | * | ||
| 5757 | * The PFN range must belong to a single zone. | ||
| 5758 | * | ||
| 5759 | * Returns zero on success or negative error code. On success all | ||
| 5760 | * pages which PFN is in [start, end) are allocated for the caller and | ||
| 5761 | * need to be freed with free_contig_range(). | ||
| 5762 | */ | ||
| 5763 | int alloc_contig_range(unsigned long start, unsigned long end, | ||
| 5764 | unsigned migratetype) | ||
| 5765 | { | ||
| 5766 | struct zone *zone = page_zone(pfn_to_page(start)); | ||
| 5767 | unsigned long outer_start, outer_end; | ||
| 5768 | int ret = 0, order; | ||
| 5769 | |||
| 5770 | /* | ||
| 5771 | * What we do here is we mark all pageblocks in range as | ||
| 5772 | * MIGRATE_ISOLATE. Because pageblock and max order pages may | ||
| 5773 | * have different sizes, and due to the way page allocator | ||
| 5774 | * work, we align the range to biggest of the two pages so | ||
| 5775 | * that page allocator won't try to merge buddies from | ||
| 5776 | * different pageblocks and change MIGRATE_ISOLATE to some | ||
| 5777 | * other migration type. | ||
| 5778 | * | ||
| 5779 | * Once the pageblocks are marked as MIGRATE_ISOLATE, we | ||
| 5780 | * migrate the pages from an unaligned range (ie. pages that | ||
| 5781 | * we are interested in). This will put all the pages in | ||
| 5782 | * range back to page allocator as MIGRATE_ISOLATE. | ||
| 5783 | * | ||
| 5784 | * When this is done, we take the pages in range from page | ||
| 5785 | * allocator removing them from the buddy system. This way | ||
| 5786 | * page allocator will never consider using them. | ||
| 5787 | * | ||
| 5788 | * This lets us mark the pageblocks back as | ||
| 5789 | * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the | ||
| 5790 | * aligned range but not in the unaligned, original range are | ||
| 5791 | * put back to page allocator so that buddy can use them. | ||
| 5792 | */ | ||
| 5793 | |||
| 5794 | ret = start_isolate_page_range(pfn_max_align_down(start), | ||
| 5795 | pfn_max_align_up(end), migratetype); | ||
| 5796 | if (ret) | ||
| 5797 | goto done; | ||
| 5798 | |||
| 5799 | ret = __alloc_contig_migrate_range(start, end); | ||
| 5800 | if (ret) | ||
| 5801 | goto done; | ||
| 5802 | |||
| 5803 | /* | ||
| 5804 | * Pages from [start, end) are within a MAX_ORDER_NR_PAGES | ||
| 5805 | * aligned blocks that are marked as MIGRATE_ISOLATE. What's | ||
| 5806 | * more, all pages in [start, end) are free in page allocator. | ||
| 5807 | * What we are going to do is to allocate all pages from | ||
| 5808 | * [start, end) (that is remove them from page allocator). | ||
| 5809 | * | ||
| 5810 | * The only problem is that pages at the beginning and at the | ||
| 5811 | * end of interesting range may be not aligned with pages that | ||
| 5812 | * page allocator holds, ie. they can be part of higher order | ||
| 5813 | * pages. Because of this, we reserve the bigger range and | ||
| 5814 | * once this is done free the pages we are not interested in. | ||
| 5815 | * | ||
| 5816 | * We don't have to hold zone->lock here because the pages are | ||
| 5817 | * isolated thus they won't get removed from buddy. | ||
| 5818 | */ | ||
| 5819 | |||
| 5820 | lru_add_drain_all(); | ||
| 5821 | drain_all_pages(); | ||
| 5822 | |||
| 5823 | order = 0; | ||
| 5824 | outer_start = start; | ||
| 5825 | while (!PageBuddy(pfn_to_page(outer_start))) { | ||
| 5826 | if (++order >= MAX_ORDER) { | ||
| 5827 | ret = -EBUSY; | ||
| 5828 | goto done; | ||
| 5829 | } | ||
| 5830 | outer_start &= ~0UL << order; | ||
| 5831 | } | ||
| 5832 | |||
| 5833 | /* Make sure the range is really isolated. */ | ||
| 5834 | if (test_pages_isolated(outer_start, end)) { | ||
| 5835 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | ||
| 5836 | outer_start, end); | ||
| 5837 | ret = -EBUSY; | ||
| 5838 | goto done; | ||
| 5839 | } | ||
| 5840 | |||
| 5841 | /* | ||
| 5842 | * Reclaim enough pages to make sure that contiguous allocation | ||
| 5843 | * will not starve the system. | ||
| 5844 | */ | ||
| 5845 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | ||
| 5846 | |||
| 5847 | /* Grab isolated pages from freelists. */ | ||
| 5848 | outer_end = isolate_freepages_range(outer_start, end); | ||
| 5849 | if (!outer_end) { | ||
| 5850 | ret = -EBUSY; | ||
| 5851 | goto done; | ||
| 5852 | } | ||
| 5853 | |||
| 5854 | /* Free head and tail (if any) */ | ||
| 5855 | if (start != outer_start) | ||
| 5856 | free_contig_range(outer_start, start - outer_start); | ||
| 5857 | if (end != outer_end) | ||
| 5858 | free_contig_range(end, outer_end - end); | ||
| 5859 | |||
| 5860 | done: | ||
| 5861 | undo_isolate_page_range(pfn_max_align_down(start), | ||
| 5862 | pfn_max_align_up(end), migratetype); | ||
| 5863 | return ret; | ||
| 5864 | } | ||
| 5865 | |||
| 5866 | void free_contig_range(unsigned long pfn, unsigned nr_pages) | ||
| 5867 | { | ||
| 5868 | for (; nr_pages--; ++pfn) | ||
| 5869 | __free_page(pfn_to_page(pfn)); | ||
| 5870 | } | ||
| 5871 | #endif | ||
| 5872 | |||
| 5556 | #ifdef CONFIG_MEMORY_HOTREMOVE | 5873 | #ifdef CONFIG_MEMORY_HOTREMOVE |
| 5557 | /* | 5874 | /* |
| 5558 | * All pages in the range must be isolated before calling this. | 5875 | * All pages in the range must be isolated before calling this. |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4ae42bb40892..c9f04774f2b8 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
| @@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) | |||
| 24 | * to be MIGRATE_ISOLATE. | 24 | * to be MIGRATE_ISOLATE. |
| 25 | * @start_pfn: The lower PFN of the range to be isolated. | 25 | * @start_pfn: The lower PFN of the range to be isolated. |
| 26 | * @end_pfn: The upper PFN of the range to be isolated. | 26 | * @end_pfn: The upper PFN of the range to be isolated. |
| 27 | * @migratetype: migrate type to set in error recovery. | ||
| 27 | * | 28 | * |
| 28 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in | 29 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
| 29 | * the range will never be allocated. Any free pages and pages freed in the | 30 | * the range will never be allocated. Any free pages and pages freed in the |
| @@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) | |||
| 32 | * start_pfn/end_pfn must be aligned to pageblock_order. | 33 | * start_pfn/end_pfn must be aligned to pageblock_order. |
| 33 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. | 34 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. |
| 34 | */ | 35 | */ |
| 35 | int | 36 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
| 36 | start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | 37 | unsigned migratetype) |
| 37 | { | 38 | { |
| 38 | unsigned long pfn; | 39 | unsigned long pfn; |
| 39 | unsigned long undo_pfn; | 40 | unsigned long undo_pfn; |
| @@ -56,7 +57,7 @@ undo: | |||
| 56 | for (pfn = start_pfn; | 57 | for (pfn = start_pfn; |
| 57 | pfn < undo_pfn; | 58 | pfn < undo_pfn; |
| 58 | pfn += pageblock_nr_pages) | 59 | pfn += pageblock_nr_pages) |
| 59 | unset_migratetype_isolate(pfn_to_page(pfn)); | 60 | unset_migratetype_isolate(pfn_to_page(pfn), migratetype); |
| 60 | 61 | ||
| 61 | return -EBUSY; | 62 | return -EBUSY; |
| 62 | } | 63 | } |
| @@ -64,8 +65,8 @@ undo: | |||
| 64 | /* | 65 | /* |
| 65 | * Make isolated pages available again. | 66 | * Make isolated pages available again. |
| 66 | */ | 67 | */ |
| 67 | int | 68 | int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
| 68 | undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | 69 | unsigned migratetype) |
| 69 | { | 70 | { |
| 70 | unsigned long pfn; | 71 | unsigned long pfn; |
| 71 | struct page *page; | 72 | struct page *page; |
| @@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | |||
| 77 | page = __first_valid_page(pfn, pageblock_nr_pages); | 78 | page = __first_valid_page(pfn, pageblock_nr_pages); |
| 78 | if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 79 | if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
| 79 | continue; | 80 | continue; |
| 80 | unset_migratetype_isolate(page); | 81 | unset_migratetype_isolate(page, migratetype); |
| 81 | } | 82 | } |
| 82 | return 0; | 83 | return 0; |
| 83 | } | 84 | } |
| @@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) | |||
| 86 | * all pages in [start_pfn...end_pfn) must be in the same zone. | 87 | * all pages in [start_pfn...end_pfn) must be in the same zone. |
| 87 | * zone->lock must be held before call this. | 88 | * zone->lock must be held before call this. |
| 88 | * | 89 | * |
| 89 | * Returns 1 if all pages in the range is isolated. | 90 | * Returns 1 if all pages in the range are isolated. |
| 90 | */ | 91 | */ |
| 91 | static int | 92 | static int |
| 92 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) | 93 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 7db1b9bab492..0dad31dc1618 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { | |||
| 613 | "Reclaimable", | 613 | "Reclaimable", |
| 614 | "Movable", | 614 | "Movable", |
| 615 | "Reserve", | 615 | "Reserve", |
| 616 | #ifdef CONFIG_CMA | ||
| 617 | "CMA", | ||
| 618 | #endif | ||
| 616 | "Isolate", | 619 | "Isolate", |
| 617 | }; | 620 | }; |
| 618 | 621 | ||
