aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kdump/kdump.txt20
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/mm/consistent.c8
-rw-r--r--arch/cris/arch-v32/drivers/Kconfig1
-rw-r--r--arch/cris/arch-v32/drivers/pci/dma.c106
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/mm/consistent.c98
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/amd_iommu.c13
-rw-r--r--arch/x86/kernel/pci-dma.c122
-rw-r--r--arch/x86/kernel/pci-gart_64.c11
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/mm/gup.c9
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c26
-rw-r--r--arch/x86/pci/irq.c106
-rw-r--r--arch/x86/pci/numaq_32.c5
-rw-r--r--drivers/acpi/pci_slot.c18
-rw-r--r--drivers/mfd/mfd-core.c32
-rw-r--r--drivers/mfd/tc6393xb.c8
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c2
-rw-r--r--drivers/pci/msi.c15
-rw-r--r--drivers/pci/pci-acpi.c7
-rw-r--r--drivers/pci/pci.c10
-rw-r--r--drivers/pci/pcie/aspm.c32
-rw-r--r--drivers/pci/probe.c245
-rw-r--r--drivers/pci/quirks.c13
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c1
-rw-r--r--drivers/spi/mpc52xx_psc_spi.c22
-rw-r--r--drivers/spi/spi_s3c24xx.c1
-rw-r--r--drivers/video/sh7760fb.c1
-rw-r--r--fs/bio-integrity.c1
-rw-r--r--fs/buffer.c46
-rw-r--r--fs/ecryptfs/crypto.c30
-rw-r--r--fs/exec.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext3/inode.c67
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--include/acpi/actbl.h1
-rw-r--r--include/asm-arm/arch-s3c2410/spi.h1
-rw-r--r--include/asm-arm/dma-mapping.h2
-rw-r--r--include/asm-cris/dma-mapping.h2
-rw-r--r--include/asm-generic/dma-coherent.h32
-rw-r--r--include/asm-generic/gpio.h1
-rw-r--r--include/asm-generic/pgtable-nopmd.h6
-rw-r--r--include/asm-sh/dma-mapping.h1
-rw-r--r--include/asm-x86/dma-mapping.h22
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/fs.h44
-rw-r--r--include/linux/iommu-helper.h1
-rw-r--r--include/linux/mfd/core.h30
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/mmu_notifier.h279
-rw-r--r--include/linux/pagemap.h1
-rw-r--r--include/linux/pci-aspm.h5
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/pci_regs.h1
-rw-r--r--include/linux/rculist.h28
-rw-r--r--include/linux/rmap.h8
-rw-r--r--init/Kconfig4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/dma-coherent.c154
-rw-r--r--kernel/fork.c3
-rw-r--r--lib/iommu-helper.c8
-rw-r--r--lib/ratelimit.c3
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/filemap.c14
-rw-r--r--mm/filemap_xip.c3
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c35
-rw-r--r--mm/mmap.c160
-rw-r--r--mm/mmu_notifier.c277
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/rmap.c13
-rw-r--r--mm/shmem.c3
79 files changed, 1594 insertions, 759 deletions
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 9691c7f5166c..0705040531a5 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -65,26 +65,26 @@ Install kexec-tools
65 65
662) Download the kexec-tools user-space package from the following URL: 662) Download the kexec-tools user-space package from the following URL:
67 67
68http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools-testing.tar.gz 68http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools.tar.gz
69 69
70This is a symlink to the latest version, which at the time of writing is 70This is a symlink to the latest version.
7120061214, the only release of kexec-tools-testing so far. As other versions
72are released, the older ones will remain available at
73http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/
74 71
75Note: Latest kexec-tools-testing git tree is available at 72The latest kexec-tools git tree is available at:
76 73
77git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools-testing.git 74git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools.git
78or 75or
79http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools-testing.git;a=summary 76http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools.git
77
78More information about kexec-tools can be found at
79http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/README.html
80 80
813) Unpack the tarball with the tar command, as follows: 813) Unpack the tarball with the tar command, as follows:
82 82
83 tar xvpzf kexec-tools-testing.tar.gz 83 tar xvpzf kexec-tools.tar.gz
84 84
854) Change to the kexec-tools directory, as follows: 854) Change to the kexec-tools directory, as follows:
86 86
87 cd kexec-tools-testing-VERSION 87 cd kexec-tools-VERSION
88 88
895) Configure the package, as follows: 895) Configure the package, as follows:
90 90
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c8f528284a94..257033c691f2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -17,6 +17,7 @@ config ARM
17 select HAVE_KRETPROBES if (HAVE_KPROBES) 17 select HAVE_KRETPROBES if (HAVE_KPROBES)
18 select HAVE_FTRACE if (!XIP_KERNEL) 18 select HAVE_FTRACE if (!XIP_KERNEL)
19 select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) 19 select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
20 select HAVE_GENERIC_DMA_COHERENT
20 help 21 help
21 The ARM series is a line of low-power-consumption RISC chip designs 22 The ARM series is a line of low-power-consumption RISC chip designs
22 licensed by ARM Ltd and targeted at embedded applications and 23 licensed by ARM Ltd and targeted at embedded applications and
@@ -234,6 +235,7 @@ config ARCH_VERSATILE
234config ARCH_AT91 235config ARCH_AT91
235 bool "Atmel AT91" 236 bool "Atmel AT91"
236 select GENERIC_GPIO 237 select GENERIC_GPIO
238 select HAVE_CLK
237 help 239 help
238 This enables support for systems based on the Atmel AT91RM9200, 240 This enables support for systems based on the Atmel AT91RM9200,
239 AT91SAM9 and AT91CAP9 processors. 241 AT91SAM9 and AT91CAP9 processors.
@@ -267,7 +269,6 @@ config ARCH_EP93XX
267 select ARM_VIC 269 select ARM_VIC
268 select GENERIC_GPIO 270 select GENERIC_GPIO
269 select HAVE_CLK 271 select HAVE_CLK
270 select HAVE_CLK
271 select ARCH_REQUIRE_GPIOLIB 272 select ARCH_REQUIRE_GPIOLIB
272 help 273 help
273 This enables support for the Cirrus EP93xx series of CPUs. 274 This enables support for the Cirrus EP93xx series of CPUs.
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 333a82a3717e..db7b3e38ef1d 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -274,6 +274,11 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
274void * 274void *
275dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 275dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
276{ 276{
277 void *memory;
278
279 if (dma_alloc_from_coherent(dev, size, handle, &memory))
280 return memory;
281
277 if (arch_is_coherent()) { 282 if (arch_is_coherent()) {
278 void *virt; 283 void *virt;
279 284
@@ -362,6 +367,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr
362 367
363 WARN_ON(irqs_disabled()); 368 WARN_ON(irqs_disabled());
364 369
370 if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
371 return;
372
365 if (arch_is_coherent()) { 373 if (arch_is_coherent()) {
366 kfree(cpu_addr); 374 kfree(cpu_addr);
367 return; 375 return;
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 2a92cb1886ca..7a64fcef9d07 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -641,6 +641,7 @@ config PCI
641 bool 641 bool
642 depends on ETRAX_CARDBUS 642 depends on ETRAX_CARDBUS
643 default y 643 default y
644 select HAVE_GENERIC_DMA_COHERENT
644 645
645config ETRAX_IOP_FW_LOAD 646config ETRAX_IOP_FW_LOAD
646 tristate "IO-processor hotplug firmware loading support" 647 tristate "IO-processor hotplug firmware loading support"
diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c
index e0364654fc44..fbe65954ee6c 100644
--- a/arch/cris/arch-v32/drivers/pci/dma.c
+++ b/arch/cris/arch-v32/drivers/pci/dma.c
@@ -15,35 +15,16 @@
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18struct dma_coherent_mem {
19 void *virt_base;
20 u32 device_base;
21 int size;
22 int flags;
23 unsigned long *bitmap;
24};
25
26void *dma_alloc_coherent(struct device *dev, size_t size, 18void *dma_alloc_coherent(struct device *dev, size_t size,
27 dma_addr_t *dma_handle, gfp_t gfp) 19 dma_addr_t *dma_handle, gfp_t gfp)
28{ 20{
29 void *ret; 21 void *ret;
30 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
31 int order = get_order(size); 22 int order = get_order(size);
32 /* ignore region specifiers */ 23 /* ignore region specifiers */
33 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); 24 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
34 25
35 if (mem) { 26 if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
36 int page = bitmap_find_free_region(mem->bitmap, mem->size, 27 return ret;
37 order);
38 if (page >= 0) {
39 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
40 ret = mem->virt_base + (page << PAGE_SHIFT);
41 memset(ret, 0, size);
42 return ret;
43 }
44 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
45 return NULL;
46 }
47 28
48 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) 29 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
49 gfp |= GFP_DMA; 30 gfp |= GFP_DMA;
@@ -60,90 +41,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
60void dma_free_coherent(struct device *dev, size_t size, 41void dma_free_coherent(struct device *dev, size_t size,
61 void *vaddr, dma_addr_t dma_handle) 42 void *vaddr, dma_addr_t dma_handle)
62{ 43{
63 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
64 int order = get_order(size); 44 int order = get_order(size);
65 45
66 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { 46 if (!dma_release_from_coherent(dev, order, vaddr))
67 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
68
69 bitmap_release_region(mem->bitmap, page, order);
70 } else
71 free_pages((unsigned long)vaddr, order); 47 free_pages((unsigned long)vaddr, order);
72} 48}
73 49
74int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
75 dma_addr_t device_addr, size_t size, int flags)
76{
77 void __iomem *mem_base;
78 int pages = size >> PAGE_SHIFT;
79 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
80
81 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
82 goto out;
83 if (!size)
84 goto out;
85 if (dev->dma_mem)
86 goto out;
87
88 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
89
90 mem_base = ioremap(bus_addr, size);
91 if (!mem_base)
92 goto out;
93
94 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
95 if (!dev->dma_mem)
96 goto iounmap_out;
97 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
98 if (!dev->dma_mem->bitmap)
99 goto free1_out;
100
101 dev->dma_mem->virt_base = mem_base;
102 dev->dma_mem->device_base = device_addr;
103 dev->dma_mem->size = pages;
104 dev->dma_mem->flags = flags;
105
106 if (flags & DMA_MEMORY_MAP)
107 return DMA_MEMORY_MAP;
108
109 return DMA_MEMORY_IO;
110
111 free1_out:
112 kfree(dev->dma_mem);
113 iounmap_out:
114 iounmap(mem_base);
115 out:
116 return 0;
117}
118EXPORT_SYMBOL(dma_declare_coherent_memory);
119
120void dma_release_declared_memory(struct device *dev)
121{
122 struct dma_coherent_mem *mem = dev->dma_mem;
123
124 if(!mem)
125 return;
126 dev->dma_mem = NULL;
127 iounmap(mem->virt_base);
128 kfree(mem->bitmap);
129 kfree(mem);
130}
131EXPORT_SYMBOL(dma_release_declared_memory);
132
133void *dma_mark_declared_memory_occupied(struct device *dev,
134 dma_addr_t device_addr, size_t size)
135{
136 struct dma_coherent_mem *mem = dev->dma_mem;
137 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
138 int pos, err;
139
140 if (!mem)
141 return ERR_PTR(-EINVAL);
142
143 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
144 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
145 if (err != 0)
146 return ERR_PTR(err);
147 return mem->virt_base + (pos << PAGE_SHIFT);
148}
149EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7bfb0d219d67..0b88dc462d73 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,6 +11,7 @@ config SUPERH
11 select HAVE_CLK 11 select HAVE_CLK
12 select HAVE_IDE 12 select HAVE_IDE
13 select HAVE_OPROFILE 13 select HAVE_OPROFILE
14 select HAVE_GENERIC_DMA_COHERENT
14 help 15 help
15 The SuperH is a RISC processor targeted for use in embedded systems 16 The SuperH is a RISC processor targeted for use in embedded systems
16 and consumer electronics; it was also used in the Sega Dreamcast 17 and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 8277982d0938..b2ce014401b5 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -28,21 +28,10 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
28 dma_addr_t *dma_handle, gfp_t gfp) 28 dma_addr_t *dma_handle, gfp_t gfp)
29{ 29{
30 void *ret, *ret_nocache; 30 void *ret, *ret_nocache;
31 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
32 int order = get_order(size); 31 int order = get_order(size);
33 32
34 if (mem) { 33 if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
35 int page = bitmap_find_free_region(mem->bitmap, mem->size, 34 return ret;
36 order);
37 if (page >= 0) {
38 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
39 ret = mem->virt_base + (page << PAGE_SHIFT);
40 memset(ret, 0, size);
41 return ret;
42 }
43 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
44 return NULL;
45 }
46 35
47 ret = (void *)__get_free_pages(gfp, order); 36 ret = (void *)__get_free_pages(gfp, order);
48 if (!ret) 37 if (!ret)
@@ -72,11 +61,7 @@ void dma_free_coherent(struct device *dev, size_t size,
72 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; 61 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
73 int order = get_order(size); 62 int order = get_order(size);
74 63
75 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { 64 if (!dma_release_from_coherent(dev, order, vaddr)) {
76 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
77
78 bitmap_release_region(mem->bitmap, page, order);
79 } else {
80 WARN_ON(irqs_disabled()); /* for portability */ 65 WARN_ON(irqs_disabled()); /* for portability */
81 BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE); 66 BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE);
82 free_pages((unsigned long)phys_to_virt(dma_handle), order); 67 free_pages((unsigned long)phys_to_virt(dma_handle), order);
@@ -85,83 +70,6 @@ void dma_free_coherent(struct device *dev, size_t size,
85} 70}
86EXPORT_SYMBOL(dma_free_coherent); 71EXPORT_SYMBOL(dma_free_coherent);
87 72
88int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
89 dma_addr_t device_addr, size_t size, int flags)
90{
91 void __iomem *mem_base = NULL;
92 int pages = size >> PAGE_SHIFT;
93 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
94
95 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
96 goto out;
97 if (!size)
98 goto out;
99 if (dev->dma_mem)
100 goto out;
101
102 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
103
104 mem_base = ioremap_nocache(bus_addr, size);
105 if (!mem_base)
106 goto out;
107
108 dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
109 if (!dev->dma_mem)
110 goto out;
111 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
112 if (!dev->dma_mem->bitmap)
113 goto free1_out;
114
115 dev->dma_mem->virt_base = mem_base;
116 dev->dma_mem->device_base = device_addr;
117 dev->dma_mem->size = pages;
118 dev->dma_mem->flags = flags;
119
120 if (flags & DMA_MEMORY_MAP)
121 return DMA_MEMORY_MAP;
122
123 return DMA_MEMORY_IO;
124
125 free1_out:
126 kfree(dev->dma_mem);
127 out:
128 if (mem_base)
129 iounmap(mem_base);
130 return 0;
131}
132EXPORT_SYMBOL(dma_declare_coherent_memory);
133
134void dma_release_declared_memory(struct device *dev)
135{
136 struct dma_coherent_mem *mem = dev->dma_mem;
137
138 if (!mem)
139 return;
140 dev->dma_mem = NULL;
141 iounmap(mem->virt_base);
142 kfree(mem->bitmap);
143 kfree(mem);
144}
145EXPORT_SYMBOL(dma_release_declared_memory);
146
147void *dma_mark_declared_memory_occupied(struct device *dev,
148 dma_addr_t device_addr, size_t size)
149{
150 struct dma_coherent_mem *mem = dev->dma_mem;
151 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
152 int pos, err;
153
154 if (!mem)
155 return ERR_PTR(-EINVAL);
156
157 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
158 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
159 if (err != 0)
160 return ERR_PTR(err);
161 return mem->virt_base + (pos << PAGE_SHIFT);
162}
163EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
164
165void dma_cache_sync(struct device *dev, void *vaddr, size_t size, 73void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
166 enum dma_data_direction direction) 74 enum dma_data_direction direction)
167{ 75{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b6fa2877b173..3d0f2b6a5a16 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -30,6 +30,7 @@ config X86
30 select HAVE_FTRACE 30 select HAVE_FTRACE
31 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 31 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
32 select HAVE_ARCH_KGDB if !X86_VOYAGER 32 select HAVE_ARCH_KGDB if !X86_VOYAGER
33 select HAVE_GENERIC_DMA_COHERENT if X86_32
33 select HAVE_EFFICIENT_UNALIGNED_ACCESS 34 select HAVE_EFFICIENT_UNALIGNED_ACCESS
34 35
35config ARCH_DEFCONFIG 36config ARCH_DEFCONFIG
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 74697408576f..22d7d050905d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
29 29
30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
31 31
32#define to_pages(addr, size) \
33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34
35#define EXIT_LOOP_COUNT 10000000 32#define EXIT_LOOP_COUNT 10000000
36 33
37static DEFINE_RWLOCK(amd_iommu_devtable_lock); 34static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
185 u64 address, size_t size) 182 u64 address, size_t size)
186{ 183{
187 int s = 0; 184 int s = 0;
188 unsigned pages = to_pages(address, size); 185 unsigned pages = iommu_num_pages(address, size);
189 186
190 address &= PAGE_MASK; 187 address &= PAGE_MASK;
191 188
@@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
557 if (iommu->exclusion_start && 554 if (iommu->exclusion_start &&
558 iommu->exclusion_start < dma_dom->aperture_size) { 555 iommu->exclusion_start < dma_dom->aperture_size) {
559 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 556 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
560 int pages = to_pages(iommu->exclusion_start, 557 int pages = iommu_num_pages(iommu->exclusion_start,
561 iommu->exclusion_length); 558 iommu->exclusion_length);
562 dma_ops_reserve_addresses(dma_dom, startpage, pages); 559 dma_ops_reserve_addresses(dma_dom, startpage, pages);
563 } 560 }
564 561
@@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev,
767 unsigned int pages; 764 unsigned int pages;
768 int i; 765 int i;
769 766
770 pages = to_pages(paddr, size); 767 pages = iommu_num_pages(paddr, size);
771 paddr &= PAGE_MASK; 768 paddr &= PAGE_MASK;
772 769
773 address = dma_ops_alloc_addresses(dev, dma_dom, pages); 770 address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu,
802 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) 799 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
803 return; 800 return;
804 801
805 pages = to_pages(dma_addr, size); 802 pages = iommu_num_pages(dma_addr, size);
806 dma_addr &= PAGE_MASK; 803 dma_addr &= PAGE_MASK;
807 start = dma_addr; 804 start = dma_addr;
808 805
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 37544123896d..8dbffb846de9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -192,124 +192,6 @@ static __init int iommu_setup(char *p)
192} 192}
193early_param("iommu", iommu_setup); 193early_param("iommu", iommu_setup);
194 194
195#ifdef CONFIG_X86_32
196int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
197 dma_addr_t device_addr, size_t size, int flags)
198{
199 void __iomem *mem_base = NULL;
200 int pages = size >> PAGE_SHIFT;
201 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
202
203 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
204 goto out;
205 if (!size)
206 goto out;
207 if (dev->dma_mem)
208 goto out;
209
210 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
211
212 mem_base = ioremap(bus_addr, size);
213 if (!mem_base)
214 goto out;
215
216 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
217 if (!dev->dma_mem)
218 goto out;
219 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
220 if (!dev->dma_mem->bitmap)
221 goto free1_out;
222
223 dev->dma_mem->virt_base = mem_base;
224 dev->dma_mem->device_base = device_addr;
225 dev->dma_mem->size = pages;
226 dev->dma_mem->flags = flags;
227
228 if (flags & DMA_MEMORY_MAP)
229 return DMA_MEMORY_MAP;
230
231 return DMA_MEMORY_IO;
232
233 free1_out:
234 kfree(dev->dma_mem);
235 out:
236 if (mem_base)
237 iounmap(mem_base);
238 return 0;
239}
240EXPORT_SYMBOL(dma_declare_coherent_memory);
241
242void dma_release_declared_memory(struct device *dev)
243{
244 struct dma_coherent_mem *mem = dev->dma_mem;
245
246 if (!mem)
247 return;
248 dev->dma_mem = NULL;
249 iounmap(mem->virt_base);
250 kfree(mem->bitmap);
251 kfree(mem);
252}
253EXPORT_SYMBOL(dma_release_declared_memory);
254
255void *dma_mark_declared_memory_occupied(struct device *dev,
256 dma_addr_t device_addr, size_t size)
257{
258 struct dma_coherent_mem *mem = dev->dma_mem;
259 int pos, err;
260 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
261
262 pages >>= PAGE_SHIFT;
263
264 if (!mem)
265 return ERR_PTR(-EINVAL);
266
267 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
268 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
269 if (err != 0)
270 return ERR_PTR(err);
271 return mem->virt_base + (pos << PAGE_SHIFT);
272}
273EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
274
275static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
276 dma_addr_t *dma_handle, void **ret)
277{
278 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
279 int order = get_order(size);
280
281 if (mem) {
282 int page = bitmap_find_free_region(mem->bitmap, mem->size,
283 order);
284 if (page >= 0) {
285 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
286 *ret = mem->virt_base + (page << PAGE_SHIFT);
287 memset(*ret, 0, size);
288 }
289 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
290 *ret = NULL;
291 }
292 return (mem != NULL);
293}
294
295static int dma_release_coherent(struct device *dev, int order, void *vaddr)
296{
297 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
298
299 if (mem && vaddr >= mem->virt_base && vaddr <
300 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
301 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
302
303 bitmap_release_region(mem->bitmap, page, order);
304 return 1;
305 }
306 return 0;
307}
308#else
309#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
310#define dma_release_coherent(dev, order, vaddr) (0)
311#endif /* CONFIG_X86_32 */
312
313int dma_supported(struct device *dev, u64 mask) 195int dma_supported(struct device *dev, u64 mask)
314{ 196{
315 struct dma_mapping_ops *ops = get_dma_ops(dev); 197 struct dma_mapping_ops *ops = get_dma_ops(dev);
@@ -379,7 +261,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
379 /* ignore region specifiers */ 261 /* ignore region specifiers */
380 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 262 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
381 263
382 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) 264 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
383 return memory; 265 return memory;
384 266
385 if (!dev) { 267 if (!dev) {
@@ -484,7 +366,7 @@ void dma_free_coherent(struct device *dev, size_t size,
484 366
485 int order = get_order(size); 367 int order = get_order(size);
486 WARN_ON(irqs_disabled()); /* for portability */ 368 WARN_ON(irqs_disabled()); /* for portability */
487 if (dma_release_coherent(dev, order, vaddr)) 369 if (dma_release_from_coherent(dev, order, vaddr))
488 return; 370 return;
489 if (ops->unmap_single) 371 if (ops->unmap_single)
490 ops->unmap_single(dev, bus, size, 0); 372 ops->unmap_single(dev, bus, size, 0);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 744126e64950..49285f8fd4d5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) 67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) 68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
69 69
70#define to_pages(addr, size) \
71 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 70#define EMERGENCY_PAGES 32 /* = 128KB */
74 71
75#ifdef CONFIG_AGP 72#ifdef CONFIG_AGP
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
241static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, 238static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
242 size_t size, int dir) 239 size_t size, int dir)
243{ 240{
244 unsigned long npages = to_pages(phys_mem, size); 241 unsigned long npages = iommu_num_pages(phys_mem, size);
245 unsigned long iommu_page = alloc_iommu(dev, npages); 242 unsigned long iommu_page = alloc_iommu(dev, npages);
246 int i; 243 int i;
247 244
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
304 return; 301 return;
305 302
306 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; 303 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
307 npages = to_pages(dma_addr, size); 304 npages = iommu_num_pages(dma_addr, size);
308 for (i = 0; i < npages; i++) { 305 for (i = 0; i < npages; i++) {
309 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 306 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
310 CLEAR_LEAK(iommu_page + i); 307 CLEAR_LEAK(iommu_page + i);
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
387 } 384 }
388 385
389 addr = phys_addr; 386 addr = phys_addr;
390 pages = to_pages(s->offset, s->length); 387 pages = iommu_num_pages(s->offset, s->length);
391 while (pages--) { 388 while (pages--) {
392 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 389 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
393 SET_LEAK(iommu_page); 390 SET_LEAK(iommu_page);
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
470 467
471 seg_size += s->length; 468 seg_size += s->length;
472 need = nextneed; 469 need = nextneed;
473 pages += to_pages(s->offset, s->length); 470 pages += iommu_num_pages(s->offset, s->length);
474 ps = s; 471 ps = s;
475 } 472 }
476 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) 473 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3b..ce3251ce5504 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER
24 select ANON_INODES 25 select ANON_INODES
25 ---help--- 26 ---help---
26 Support hosting fully virtualized guest machines using hardware 27 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 3085f25b4355..007bb06c7504 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -223,14 +223,17 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages) 223 struct page **pages)
224{ 224{
225 struct mm_struct *mm = current->mm; 225 struct mm_struct *mm = current->mm;
226 unsigned long end = start + (nr_pages << PAGE_SHIFT); 226 unsigned long addr, len, end;
227 unsigned long addr = start;
228 unsigned long next; 227 unsigned long next;
229 pgd_t *pgdp; 228 pgd_t *pgdp;
230 int nr = 0; 229 int nr = 0;
231 230
231 start &= PAGE_MASK;
232 addr = start;
233 len = (unsigned long) nr_pages << PAGE_SHIFT;
234 end = start + len;
232 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
233 start, nr_pages*PAGE_SIZE))) 236 start, len)))
234 goto slow_irqon; 237 goto slow_irqon;
235 238
236 /* 239 /*
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index ff3a6a336342..4bdaa590375d 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
23 pci_read_config_byte(d, reg++, &busno); 23 pci_read_config_byte(d, reg++, &busno);
24 pci_read_config_byte(d, reg++, &suba); 24 pci_read_config_byte(d, reg++, &suba);
25 pci_read_config_byte(d, reg++, &subb); 25 pci_read_config_byte(d, reg++, &subb);
26 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 26 dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno,
27 suba, subb);
27 if (busno) 28 if (busno)
28 pci_scan_bus_with_sysdata(busno); /* Bus A */ 29 pci_scan_bus_with_sysdata(busno); /* Bus A */
29 if (suba < subb) 30 if (suba < subb)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index a09505806b82..5807d1bc73f7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
128 pr = pci_find_parent_resource(dev, r); 128 pr = pci_find_parent_resource(dev, r);
129 if (!r->start || !pr || 129 if (!r->start || !pr ||
130 request_resource(pr, r) < 0) { 130 request_resource(pr, r) < 0) {
131 printk(KERN_ERR "PCI: Cannot allocate " 131 dev_err(&dev->dev, "BAR %d: can't "
132 "resource region %d " 132 "allocate resource\n", idx);
133 "of bridge %s\n",
134 idx, pci_name(dev));
135 /* 133 /*
136 * Something is wrong with the region. 134 * Something is wrong with the region.
137 * Invalidate the resource to prevent 135 * Invalidate the resource to prevent
@@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass)
166 else 164 else
167 disabled = !(command & PCI_COMMAND_MEMORY); 165 disabled = !(command & PCI_COMMAND_MEMORY);
168 if (pass == disabled) { 166 if (pass == disabled) {
169 DBG("PCI: Resource %08lx-%08lx " 167 dev_dbg(&dev->dev, "resource %#08llx-%#08llx "
170 "(f=%lx, d=%d, p=%d)\n", 168 "(f=%lx, d=%d, p=%d)\n",
171 r->start, r->end, r->flags, disabled, pass); 169 (unsigned long long) r->start,
170 (unsigned long long) r->end,
171 r->flags, disabled, pass);
172 pr = pci_find_parent_resource(dev, r); 172 pr = pci_find_parent_resource(dev, r);
173 if (!pr || request_resource(pr, r) < 0) { 173 if (!pr || request_resource(pr, r) < 0) {
174 printk(KERN_ERR "PCI: Cannot allocate " 174 dev_err(&dev->dev, "BAR %d: can't "
175 "resource region %d " 175 "allocate resource\n", idx);
176 "of device %s\n",
177 idx, pci_name(dev));
178 /* We'll assign a new address later */ 176 /* We'll assign a new address later */
179 r->end -= r->start; 177 r->end -= r->start;
180 r->start = 0; 178 r->start = 0;
@@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass)
187 /* Turn the ROM off, leave the resource region, 185 /* Turn the ROM off, leave the resource region,
188 * but keep it unregistered. */ 186 * but keep it unregistered. */
189 u32 reg; 187 u32 reg;
190 DBG("PCI: Switching off ROM of %s\n", 188 dev_dbg(&dev->dev, "disabling ROM\n");
191 pci_name(dev));
192 r->flags &= ~IORESOURCE_ROM_ENABLE; 189 r->flags &= ~IORESOURCE_ROM_ENABLE;
193 pci_read_config_dword(dev, 190 pci_read_config_dword(dev,
194 dev->rom_base_reg, &reg); 191 dev->rom_base_reg, &reg);
@@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev)
257 lat = pcibios_max_latency; 254 lat = pcibios_max_latency;
258 else 255 else
259 return; 256 return;
260 printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", 257 dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat);
261 pci_name(dev), lat);
262 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 258 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
263} 259}
264 260
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 6a06a2eb0597..fec0123b33a9 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
436{ 436{
437 WARN_ON_ONCE(pirq >= 9); 437 WARN_ON_ONCE(pirq >= 9);
438 if (pirq > 8) { 438 if (pirq > 8) {
439 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); 439 dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
440 return 0; 440 return 0;
441 } 441 }
442 return read_config_nybble(router, 0x74, pirq-1); 442 return read_config_nybble(router, 0x74, pirq-1);
@@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
446{ 446{
447 WARN_ON_ONCE(pirq >= 9); 447 WARN_ON_ONCE(pirq >= 9);
448 if (pirq > 8) { 448 if (pirq > 8) {
449 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); 449 dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
450 return 0; 450 return 0;
451 } 451 }
452 write_config_nybble(router, 0x74, pirq-1, irq); 452 write_config_nybble(router, 0x74, pirq-1, irq);
@@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
492 irq = 0; 492 irq = 0;
493 if (pirq <= 4) 493 if (pirq <= 4)
494 irq = read_config_nybble(router, 0x56, pirq - 1); 494 irq = read_config_nybble(router, 0x56, pirq - 1);
495 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", 495 dev_info(&dev->dev,
496 dev->vendor, dev->device, pirq, irq); 496 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
497 dev->vendor, dev->device, pirq, irq);
497 return irq; 498 return irq;
498} 499}
499 500
500static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 501static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
501{ 502{
502 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 503 dev_info(&dev->dev,
503 dev->vendor, dev->device, pirq, irq); 504 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
505 dev->vendor, dev->device, pirq, irq);
504 if (pirq <= 4) 506 if (pirq <= 4)
505 write_config_nybble(router, 0x56, pirq - 1, irq); 507 write_config_nybble(router, 0x56, pirq - 1, irq);
506 return 1; 508 return 1;
@@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router,
730 switch (device) { 732 switch (device) {
731 case PCI_DEVICE_ID_AL_M1533: 733 case PCI_DEVICE_ID_AL_M1533:
732 case PCI_DEVICE_ID_AL_M1563: 734 case PCI_DEVICE_ID_AL_M1563:
733 printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n");
734 r->name = "ALI"; 735 r->name = "ALI";
735 r->get = pirq_ali_get; 736 r->get = pirq_ali_get;
736 r->set = pirq_ali_set; 737 r->set = pirq_ali_set;
@@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r)
840 h->probe(r, pirq_router_dev, pirq_router_dev->device)) 841 h->probe(r, pirq_router_dev, pirq_router_dev->device))
841 break; 842 break;
842 } 843 }
843 printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", 844 dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
844 pirq_router.name, 845 pirq_router.name,
845 pirq_router_dev->vendor, 846 pirq_router_dev->vendor, pirq_router_dev->device);
846 pirq_router_dev->device,
847 pci_name(pirq_router_dev));
848 847
849 /* The device remains referenced for the kernel lifetime */ 848 /* The device remains referenced for the kernel lifetime */
850} 849}
@@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
877 /* Find IRQ pin */ 876 /* Find IRQ pin */
878 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 877 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
879 if (!pin) { 878 if (!pin) {
880 DBG(KERN_DEBUG " -> no interrupt pin\n"); 879 dev_dbg(&dev->dev, "no interrupt pin\n");
881 return 0; 880 return 0;
882 } 881 }
883 pin = pin - 1; 882 pin = pin - 1;
@@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
887 if (!pirq_table) 886 if (!pirq_table)
888 return 0; 887 return 0;
889 888
890 DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin);
891 info = pirq_get_info(dev); 889 info = pirq_get_info(dev);
892 if (!info) { 890 if (!info) {
893 DBG(" -> not found in routing table\n" KERN_DEBUG); 891 dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
892 'A' + pin);
894 return 0; 893 return 0;
895 } 894 }
896 pirq = info->irq[pin].link; 895 pirq = info->irq[pin].link;
897 mask = info->irq[pin].bitmap; 896 mask = info->irq[pin].bitmap;
898 if (!pirq) { 897 if (!pirq) {
899 DBG(" -> not routed\n" KERN_DEBUG); 898 dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
900 return 0; 899 return 0;
901 } 900 }
902 DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, 901 dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
903 pirq_table->exclusive_irqs); 902 'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
904 mask &= pcibios_irq_mask; 903 mask &= pcibios_irq_mask;
905 904
906 /* Work around broken HP Pavilion Notebooks which assign USB to 905 /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
930 if (pci_probe & PCI_USE_PIRQ_MASK) 929 if (pci_probe & PCI_USE_PIRQ_MASK)
931 newirq = 0; 930 newirq = 0;
932 else 931 else
933 printk("\n" KERN_WARNING 932 dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask "
934 "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" 933 "%#x; try pci=usepirqmask\n", newirq, mask);
935 KERN_DEBUG, newirq,
936 pci_name(dev));
937 } 934 }
938 if (!newirq && assign) { 935 if (!newirq && assign) {
939 for (i = 0; i < 16; i++) { 936 for (i = 0; i < 16; i++) {
@@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
944 newirq = i; 941 newirq = i;
945 } 942 }
946 } 943 }
947 DBG(" -> newirq=%d", newirq); 944 dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
948 945
949 /* Check if it is hardcoded */ 946 /* Check if it is hardcoded */
950 if ((pirq & 0xf0) == 0xf0) { 947 if ((pirq & 0xf0) == 0xf0) {
951 irq = pirq & 0xf; 948 irq = pirq & 0xf;
952 DBG(" -> hardcoded IRQ %d\n", irq); 949 msg = "hardcoded";
953 msg = "Hardcoded";
954 } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ 950 } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
955 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { 951 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
956 DBG(" -> got IRQ %d\n", irq); 952 msg = "found";
957 msg = "Found";
958 eisa_set_level_irq(irq); 953 eisa_set_level_irq(irq);
959 } else if (newirq && r->set && 954 } else if (newirq && r->set &&
960 (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { 955 (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
961 DBG(" -> assigning IRQ %d", newirq);
962 if (r->set(pirq_router_dev, dev, pirq, newirq)) { 956 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
963 eisa_set_level_irq(newirq); 957 eisa_set_level_irq(newirq);
964 DBG(" ... OK\n"); 958 msg = "assigned";
965 msg = "Assigned";
966 irq = newirq; 959 irq = newirq;
967 } 960 }
968 } 961 }
969 962
970 if (!irq) { 963 if (!irq) {
971 DBG(" ... failed\n");
972 if (newirq && mask == (1 << newirq)) { 964 if (newirq && mask == (1 << newirq)) {
973 msg = "Guessed"; 965 msg = "guessed";
974 irq = newirq; 966 irq = newirq;
975 } else 967 } else {
968 dev_dbg(&dev->dev, "can't route interrupt\n");
976 return 0; 969 return 0;
970 }
977 } 971 }
978 printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, 972 dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
979 pci_name(dev));
980 973
981 /* Update IRQ for all devices with the same pirq value */ 974 /* Update IRQ for all devices with the same pirq value */
982 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { 975 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
996 (!(pci_probe & PCI_USE_PIRQ_MASK) || \ 989 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
997 ((1 << dev2->irq) & mask))) { 990 ((1 << dev2->irq) & mask))) {
998#ifndef CONFIG_PCI_MSI 991#ifndef CONFIG_PCI_MSI
999 printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", 992 dev_info(&dev2->dev, "IRQ routing conflict: "
1000 pci_name(dev2), dev2->irq, irq); 993 "have IRQ %d, want IRQ %d\n",
994 dev2->irq, irq);
1001#endif 995#endif
1002 continue; 996 continue;
1003 } 997 }
1004 dev2->irq = irq; 998 dev2->irq = irq;
1005 pirq_penalty[irq]++; 999 pirq_penalty[irq]++;
1006 if (dev != dev2) 1000 if (dev != dev2)
1007 printk(KERN_INFO 1001 dev_info(&dev->dev, "sharing IRQ %d with %s\n",
1008 "PCI: Sharing IRQ %d with %s\n", 1002 irq, pci_name(dev2));
1009 irq, pci_name(dev2));
1010 } 1003 }
1011 } 1004 }
1012 return 1; 1005 return 1;
@@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void)
1025 * already in use. 1018 * already in use.
1026 */ 1019 */
1027 if (dev->irq >= 16) { 1020 if (dev->irq >= 16) {
1028 DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", 1021 dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq);
1029 pci_name(dev), dev->irq);
1030 dev->irq = 0; 1022 dev->irq = 0;
1031 } 1023 }
1032 /* 1024 /*
@@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void)
1070 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1062 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1071 PCI_SLOT(bridge->devfn), pin); 1063 PCI_SLOT(bridge->devfn), pin);
1072 if (irq >= 0) 1064 if (irq >= 0)
1073 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", 1065 dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
1074 pci_name(bridge), 'A' + pin, irq); 1066 pci_name(bridge),
1067 'A' + pin, irq);
1075 } 1068 }
1076 if (irq >= 0) { 1069 if (irq >= 0) {
1077 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", 1070 dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
1078 pci_name(dev), 'A' + pin, irq);
1079 dev->irq = irq; 1071 dev->irq = irq;
1080 } 1072 }
1081 } 1073 }
@@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
1231 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1223 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1232 PCI_SLOT(bridge->devfn), pin); 1224 PCI_SLOT(bridge->devfn), pin);
1233 if (irq >= 0) 1225 if (irq >= 0)
1234 printk(KERN_WARNING 1226 dev_warn(&dev->dev, "using bridge %s "
1235 "PCI: using PPB %s[%c] to get irq %d\n", 1227 "INT %c to get IRQ %d\n",
1236 pci_name(bridge), 1228 pci_name(bridge), 'A' + pin,
1237 'A' + pin, irq); 1229 irq);
1238 dev = bridge; 1230 dev = bridge;
1239 } 1231 }
1240 dev = temp_dev; 1232 dev = temp_dev;
1241 if (irq >= 0) { 1233 if (irq >= 0) {
1242 printk(KERN_INFO 1234 dev_info(&dev->dev, "PCI->APIC IRQ transform: "
1243 "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", 1235 "INT %c -> IRQ %d\n", 'A' + pin, irq);
1244 pci_name(dev), 'A' + pin, irq);
1245 dev->irq = irq; 1236 dev->irq = irq;
1246 return 0; 1237 return 0;
1247 } else 1238 } else
1248 msg = " Probably buggy MP table."; 1239 msg = "; probably buggy MP table";
1249 } else if (pci_probe & PCI_BIOS_IRQ_SCAN) 1240 } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
1250 msg = ""; 1241 msg = "";
1251 else 1242 else
1252 msg = " Please try using pci=biosirq."; 1243 msg = "; please try using pci=biosirq";
1253 1244
1254 /* 1245 /*
1255 * With IDE legacy devices the IRQ lookup failure is not 1246 * With IDE legacy devices the IRQ lookup failure is not
@@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
1259 !(dev->class & 0x5)) 1250 !(dev->class & 0x5))
1260 return 0; 1251 return 0;
1261 1252
1262 printk(KERN_WARNING 1253 dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
1263 "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", 1254 'A' + pin, msg);
1264 'A' + pin, pci_name(dev), msg);
1265 } 1255 }
1266 return 0; 1256 return 0;
1267} 1257}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index f4b16dc11dad..1177845d3186 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
131 u8 busno, suba, subb; 131 u8 busno, suba, subb;
132 int quad = BUS2QUAD(d->bus->number); 132 int quad = BUS2QUAD(d->bus->number);
133 133
134 printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); 134 dev_info(&d->dev, "searching for i450NX host bridges\n");
135 reg = 0xd0; 135 reg = 0xd0;
136 for(pxb=0; pxb<2; pxb++) { 136 for(pxb=0; pxb<2; pxb++) {
137 pci_read_config_byte(d, reg++, &busno); 137 pci_read_config_byte(d, reg++, &busno);
138 pci_read_config_byte(d, reg++, &suba); 138 pci_read_config_byte(d, reg++, &suba);
139 pci_read_config_byte(d, reg++, &subb); 139 pci_read_config_byte(d, reg++, &subb);
140 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 140 dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n",
141 pxb, busno, suba, subb);
141 if (busno) { 142 if (busno) {
142 /* Bus A */ 143 /* Bus A */
143 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); 144 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index dd376f7ad090..d5b4ef898879 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -76,9 +76,9 @@ static struct acpi_pci_driver acpi_pci_slot_driver = {
76}; 76};
77 77
78static int 78static int
79check_slot(acpi_handle handle, int *device, unsigned long *sun) 79check_slot(acpi_handle handle, unsigned long *sun)
80{ 80{
81 int retval = 0; 81 int device = -1;
82 unsigned long adr, sta; 82 unsigned long adr, sta;
83 acpi_status status; 83 acpi_status status;
84 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 84 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -89,32 +89,27 @@ check_slot(acpi_handle handle, int *device, unsigned long *sun)
89 if (check_sta_before_sun) { 89 if (check_sta_before_sun) {
90 /* If SxFy doesn't have _STA, we just assume it's there */ 90 /* If SxFy doesn't have _STA, we just assume it's there */
91 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); 91 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
92 if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) { 92 if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT))
93 retval = -1;
94 goto out; 93 goto out;
95 }
96 } 94 }
97 95
98 status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); 96 status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
99 if (ACPI_FAILURE(status)) { 97 if (ACPI_FAILURE(status)) {
100 dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer); 98 dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer);
101 retval = -1;
102 goto out; 99 goto out;
103 } 100 }
104 101
105 *device = (adr >> 16) & 0xffff;
106
107 /* No _SUN == not a slot == bail */ 102 /* No _SUN == not a slot == bail */
108 status = acpi_evaluate_integer(handle, "_SUN", NULL, sun); 103 status = acpi_evaluate_integer(handle, "_SUN", NULL, sun);
109 if (ACPI_FAILURE(status)) { 104 if (ACPI_FAILURE(status)) {
110 dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer); 105 dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer);
111 retval = -1;
112 goto out; 106 goto out;
113 } 107 }
114 108
109 device = (adr >> 16) & 0xffff;
115out: 110out:
116 kfree(buffer.pointer); 111 kfree(buffer.pointer);
117 return retval; 112 return device;
118} 113}
119 114
120struct callback_args { 115struct callback_args {
@@ -144,7 +139,8 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
144 struct callback_args *parent_context = context; 139 struct callback_args *parent_context = context;
145 struct pci_bus *pci_bus = parent_context->pci_bus; 140 struct pci_bus *pci_bus = parent_context->pci_bus;
146 141
147 if (check_slot(handle, &device, &sun)) 142 device = check_slot(handle, &sun);
143 if (device < 0)
148 return AE_OK; 144 return AE_OK;
149 145
150 slot = kmalloc(sizeof(*slot), GFP_KERNEL); 146 slot = kmalloc(sizeof(*slot), GFP_KERNEL);
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 0454be4266c1..9c9c126ed334 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -15,24 +15,24 @@
15#include <linux/platform_device.h> 15#include <linux/platform_device.h>
16#include <linux/mfd/core.h> 16#include <linux/mfd/core.h>
17 17
18static int mfd_add_device(struct platform_device *parent, 18static int mfd_add_device(struct device *parent, int id,
19 const struct mfd_cell *cell, 19 const struct mfd_cell *cell,
20 struct resource *mem_base, 20 struct resource *mem_base,
21 int irq_base) 21 int irq_base)
22{ 22{
23 struct resource res[cell->num_resources]; 23 struct resource res[cell->num_resources];
24 struct platform_device *pdev; 24 struct platform_device *pdev;
25 int ret = -ENOMEM; 25 int ret = -ENOMEM;
26 int r; 26 int r;
27 27
28 pdev = platform_device_alloc(cell->name, parent->id); 28 pdev = platform_device_alloc(cell->name, id);
29 if (!pdev) 29 if (!pdev)
30 goto fail_alloc; 30 goto fail_alloc;
31 31
32 pdev->dev.parent = &parent->dev; 32 pdev->dev.parent = parent;
33 33
34 ret = platform_device_add_data(pdev, 34 ret = platform_device_add_data(pdev,
35 cell, sizeof(struct mfd_cell)); 35 cell->platform_data, cell->data_size);
36 if (ret) 36 if (ret)
37 goto fail_device; 37 goto fail_device;
38 38
@@ -75,17 +75,16 @@ fail_alloc:
75 return ret; 75 return ret;
76} 76}
77 77
78int mfd_add_devices( 78int mfd_add_devices(struct device *parent, int id,
79 struct platform_device *parent, 79 const struct mfd_cell *cells, int n_devs,
80 const struct mfd_cell *cells, int n_devs, 80 struct resource *mem_base,
81 struct resource *mem_base, 81 int irq_base)
82 int irq_base)
83{ 82{
84 int i; 83 int i;
85 int ret = 0; 84 int ret = 0;
86 85
87 for (i = 0; i < n_devs; i++) { 86 for (i = 0; i < n_devs; i++) {
88 ret = mfd_add_device(parent, cells + i, mem_base, irq_base); 87 ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base);
89 if (ret) 88 if (ret)
90 break; 89 break;
91 } 90 }
@@ -99,14 +98,13 @@ EXPORT_SYMBOL(mfd_add_devices);
99 98
100static int mfd_remove_devices_fn(struct device *dev, void *unused) 99static int mfd_remove_devices_fn(struct device *dev, void *unused)
101{ 100{
102 platform_device_unregister( 101 platform_device_unregister(to_platform_device(dev));
103 container_of(dev, struct platform_device, dev));
104 return 0; 102 return 0;
105} 103}
106 104
107void mfd_remove_devices(struct platform_device *parent) 105void mfd_remove_devices(struct device *parent)
108{ 106{
109 device_for_each_child(&parent->dev, NULL, mfd_remove_devices_fn); 107 device_for_each_child(parent, NULL, mfd_remove_devices_fn);
110} 108}
111EXPORT_SYMBOL(mfd_remove_devices); 109EXPORT_SYMBOL(mfd_remove_devices);
112 110
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 94e55e8e7ce6..f4fd797c1590 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -466,8 +466,12 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
466 tc6393xb_attach_irq(dev); 466 tc6393xb_attach_irq(dev);
467 467
468 tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data; 468 tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data;
469 tc6393xb_cells[TC6393XB_CELL_NAND].platform_data =
470 &tc6393xb_cells[TC6393XB_CELL_NAND];
471 tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
472 sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
469 473
470 retval = mfd_add_devices(dev, 474 retval = mfd_add_devices(&dev->dev, dev->id,
471 tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells), 475 tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
472 iomem, tcpd->irq_base); 476 iomem, tcpd->irq_base);
473 477
@@ -501,7 +505,7 @@ static int __devexit tc6393xb_remove(struct platform_device *dev)
501 struct tc6393xb *tc6393xb = platform_get_drvdata(dev); 505 struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
502 int ret; 506 int ret;
503 507
504 mfd_remove_devices(dev); 508 mfd_remove_devices(&dev->dev);
505 509
506 if (tc6393xb->irq) 510 if (tc6393xb->irq)
507 tc6393xb_detach_irq(dev); 511 tc6393xb_detach_irq(dev);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 1323a43285d7..ad27e9e225a6 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -1103,7 +1103,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
1103 dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no"); 1103 dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no");
1104 dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no"); 1104 dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
1105 dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no"); 1105 dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no");
1106 dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); 1106 dbg(" Command Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
1107 pciehp_readw(ctrl, SLOTSTATUS, &reg16); 1107 pciehp_readw(ctrl, SLOTSTATUS, &reg16);
1108 dbg("Slot Status : 0x%04x\n", reg16); 1108 dbg("Slot Status : 0x%04x\n", reg16);
1109 pciehp_readw(ctrl, SLOTCTRL, &reg16); 1109 pciehp_readw(ctrl, SLOTCTRL, &reg16);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 15af618d36e2..18354817173c 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -126,7 +126,16 @@ static void msix_flush_writes(unsigned int irq)
126 } 126 }
127} 127}
128 128
129static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) 129/*
130 * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to
131 * mask all MSI interrupts by clearing the MSI enable bit does not work
132 * reliably as devices without an INTx disable bit will then generate a
133 * level IRQ which will never be cleared.
134 *
135 * Returns 1 if it succeeded in masking the interrupt and 0 if the device
136 * doesn't support MSI masking.
137 */
138static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
130{ 139{
131 struct msi_desc *entry; 140 struct msi_desc *entry;
132 141
@@ -144,8 +153,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
144 mask_bits |= flag & mask; 153 mask_bits |= flag & mask;
145 pci_write_config_dword(entry->dev, pos, mask_bits); 154 pci_write_config_dword(entry->dev, pos, mask_bits);
146 } else { 155 } else {
147 __msi_set_enable(entry->dev, entry->msi_attrib.pos, 156 return 0;
148 !flag);
149 } 157 }
150 break; 158 break;
151 case PCI_CAP_ID_MSIX: 159 case PCI_CAP_ID_MSIX:
@@ -161,6 +169,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
161 break; 169 break;
162 } 170 }
163 entry->msi_attrib.masked = !!flag; 171 entry->msi_attrib.masked = !!flag;
172 return 1;
164} 173}
165 174
166void read_msi_msg(unsigned int irq, struct msi_msg *msg) 175void read_msi_msg(unsigned int irq, struct msi_msg *msg)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7764768b6a0e..89a2f0fa10f9 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pci-aspm.h>
14#include <acpi/acpi.h> 15#include <acpi/acpi.h>
15#include <acpi/acnamesp.h> 16#include <acpi/acnamesp.h>
16#include <acpi/acresrc.h> 17#include <acpi/acresrc.h>
@@ -372,6 +373,12 @@ static int __init acpi_pci_init(void)
372 printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n"); 373 printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n");
373 pci_no_msi(); 374 pci_no_msi();
374 } 375 }
376
377 if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) {
378 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
379 pcie_no_aspm();
380 }
381
375 ret = register_acpi_bus_type(&acpi_pci_bus); 382 ret = register_acpi_bus_type(&acpi_pci_bus);
376 if (ret) 383 if (ret)
377 return 0; 384 return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e9c356236d27..0a3d856833fc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
572 if (!ret) 572 if (!ret)
573 pci_update_current_state(dev); 573 pci_update_current_state(dev);
574 } 574 }
575 /* This device is quirked not to be put into D3, so
576 don't put it in D3 */
577 if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
578 return 0;
575 579
576 error = pci_raw_set_power_state(dev, state); 580 error = pci_raw_set_power_state(dev, state);
577 581
@@ -1123,6 +1127,12 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
1123} 1127}
1124 1128
1125/** 1129/**
1130 * pci_target_state - find an appropriate low power state for a given PCI dev
1131 * @dev: PCI device
1132 *
1133 * Use underlying platform code to find a supported low power state for @dev.
1134 * If the platform can't manage @dev, return the deepest state from which it
1135 * can generate wake events, based on any available PME info.
1126 */ 1136 */
1127pci_power_t pci_target_state(struct pci_dev *dev) 1137pci_power_t pci_target_state(struct pci_dev *dev)
1128{ 1138{
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f82495583e63..9a7c9e1408a4 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -55,7 +55,7 @@ struct pcie_link_state {
55 struct endpoint_state endpoints[8]; 55 struct endpoint_state endpoints[8];
56}; 56};
57 57
58static int aspm_disabled; 58static int aspm_disabled, aspm_force;
59static DEFINE_MUTEX(aspm_lock); 59static DEFINE_MUTEX(aspm_lock);
60static LIST_HEAD(link_list); 60static LIST_HEAD(link_list);
61 61
@@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
510{ 510{
511 struct pci_dev *child_dev; 511 struct pci_dev *child_dev;
512 int child_pos; 512 int child_pos;
513 u32 reg32;
513 514
514 /* 515 /*
515 * Some functions in a slot might not all be PCIE functions, very 516 * Some functions in a slot might not all be PCIE functions, very
@@ -519,6 +520,19 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
519 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP); 520 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
520 if (!child_pos) 521 if (!child_pos)
521 return -EINVAL; 522 return -EINVAL;
523
524 /*
525 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
526 * RBER bit to determine if a function is 1.1 version device
527 */
528 pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
529 &reg32);
530 if (!(reg32 & PCI_EXP_DEVCAP_RBER && !aspm_force)) {
531 printk("Pre-1.1 PCIe device detected, "
532 "disable ASPM for %s. It can be enabled forcedly"
533 " with 'pcie_aspm=force'\n", pci_name(pdev));
534 return -EINVAL;
535 }
522 } 536 }
523 return 0; 537 return 0;
524} 538}
@@ -802,11 +816,23 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
802 816
803static int __init pcie_aspm_disable(char *str) 817static int __init pcie_aspm_disable(char *str)
804{ 818{
805 aspm_disabled = 1; 819 if (!strcmp(str, "off")) {
820 aspm_disabled = 1;
821 printk(KERN_INFO "PCIe ASPM is disabled\n");
822 } else if (!strcmp(str, "force")) {
823 aspm_force = 1;
824 printk(KERN_INFO "PCIe ASPM is forcedly enabled\n");
825 }
806 return 1; 826 return 1;
807} 827}
808 828
809__setup("pcie_noaspm", pcie_aspm_disable); 829__setup("pcie_aspm=", pcie_aspm_disable);
830
831void pcie_no_aspm(void)
832{
833 if (!aspm_force)
834 aspm_disabled = 1;
835}
810 836
811#ifdef CONFIG_ACPI 837#ifdef CONFIG_ACPI
812#include <acpi/acpi_bus.h> 838#include <acpi/acpi_bus.h>
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b1724cf31b66..7098dfb07449 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -163,12 +163,9 @@ static inline unsigned int pci_calc_resource_flags(unsigned int flags)
163 return IORESOURCE_MEM; 163 return IORESOURCE_MEM;
164} 164}
165 165
166/* 166static u64 pci_size(u64 base, u64 maxbase, u64 mask)
167 * Find the extent of a PCI decode..
168 */
169static u32 pci_size(u32 base, u32 maxbase, u32 mask)
170{ 167{
171 u32 size = mask & maxbase; /* Find the significant bits */ 168 u64 size = mask & maxbase; /* Find the significant bits */
172 if (!size) 169 if (!size)
173 return 0; 170 return 0;
174 171
@@ -184,135 +181,142 @@ static u32 pci_size(u32 base, u32 maxbase, u32 mask)
184 return size; 181 return size;
185} 182}
186 183
187static u64 pci_size64(u64 base, u64 maxbase, u64 mask) 184enum pci_bar_type {
188{ 185 pci_bar_unknown, /* Standard PCI BAR probe */
189 u64 size = mask & maxbase; /* Find the significant bits */ 186 pci_bar_io, /* An io port BAR */
190 if (!size) 187 pci_bar_mem32, /* A 32-bit memory BAR */
191 return 0; 188 pci_bar_mem64, /* A 64-bit memory BAR */
189};
192 190
193 /* Get the lowest of them to find the decode size, and 191static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
194 from that the extent. */ 192{
195 size = (size & ~(size-1)) - 1; 193 if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
194 res->flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
195 return pci_bar_io;
196 }
196 197
197 /* base == maxbase can be valid only if the BAR has 198 res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
198 already been programmed with all 1s. */
199 if (base == maxbase && ((base | size) & mask) != mask)
200 return 0;
201 199
202 return size; 200 if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
201 return pci_bar_mem64;
202 return pci_bar_mem32;
203} 203}
204 204
205static inline int is_64bit_memory(u32 mask) 205/*
206 * If the type is not unknown, we assume that the lowest bit is 'enable'.
207 * Returns 1 if the BAR was 64-bit and 0 if it was 32-bit.
208 */
209static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
210 struct resource *res, unsigned int pos)
206{ 211{
207 if ((mask & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 212 u32 l, sz, mask;
208 (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64))
209 return 1;
210 return 0;
211}
212 213
213static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) 214 mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0;
214{
215 unsigned int pos, reg, next;
216 u32 l, sz;
217 struct resource *res;
218 215
219 for(pos=0; pos<howmany; pos = next) { 216 res->name = pci_name(dev);
220 u64 l64;
221 u64 sz64;
222 u32 raw_sz;
223 217
224 next = pos+1; 218 pci_read_config_dword(dev, pos, &l);
225 res = &dev->resource[pos]; 219 pci_write_config_dword(dev, pos, mask);
226 res->name = pci_name(dev); 220 pci_read_config_dword(dev, pos, &sz);
227 reg = PCI_BASE_ADDRESS_0 + (pos << 2); 221 pci_write_config_dword(dev, pos, l);
228 pci_read_config_dword(dev, reg, &l); 222
229 pci_write_config_dword(dev, reg, ~0); 223 /*
230 pci_read_config_dword(dev, reg, &sz); 224 * All bits set in sz means the device isn't working properly.
231 pci_write_config_dword(dev, reg, l); 225 * If the BAR isn't implemented, all bits must be 0. If it's a
232 if (!sz || sz == 0xffffffff) 226 * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
233 continue; 227 * 1 must be clear.
234 if (l == 0xffffffff) 228 */
235 l = 0; 229 if (!sz || sz == 0xffffffff)
236 raw_sz = sz; 230 goto fail;
237 if ((l & PCI_BASE_ADDRESS_SPACE) == 231
238 PCI_BASE_ADDRESS_SPACE_MEMORY) { 232 /*
239 sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK); 233 * I don't know how l can have all bits set. Copied from old code.
240 /* 234 * Maybe it fixes a bug on some ancient platform.
241 * For 64bit prefetchable memory sz could be 0, if the 235 */
242 * real size is bigger than 4G, so we need to check 236 if (l == 0xffffffff)
243 * szhi for that. 237 l = 0;
244 */ 238
245 if (!is_64bit_memory(l) && !sz) 239 if (type == pci_bar_unknown) {
246 continue; 240 type = decode_bar(res, l);
247 res->start = l & PCI_BASE_ADDRESS_MEM_MASK; 241 res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
248 res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK; 242 if (type == pci_bar_io) {
243 l &= PCI_BASE_ADDRESS_IO_MASK;
244 mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff;
249 } else { 245 } else {
250 sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff); 246 l &= PCI_BASE_ADDRESS_MEM_MASK;
251 if (!sz) 247 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
252 continue;
253 res->start = l & PCI_BASE_ADDRESS_IO_MASK;
254 res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK;
255 } 248 }
256 res->end = res->start + (unsigned long) sz; 249 } else {
257 res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; 250 res->flags |= (l & IORESOURCE_ROM_ENABLE);
258 if (is_64bit_memory(l)) { 251 l &= PCI_ROM_ADDRESS_MASK;
259 u32 szhi, lhi; 252 mask = (u32)PCI_ROM_ADDRESS_MASK;
260 253 }
261 pci_read_config_dword(dev, reg+4, &lhi); 254
262 pci_write_config_dword(dev, reg+4, ~0); 255 if (type == pci_bar_mem64) {
263 pci_read_config_dword(dev, reg+4, &szhi); 256 u64 l64 = l;
264 pci_write_config_dword(dev, reg+4, lhi); 257 u64 sz64 = sz;
265 sz64 = ((u64)szhi << 32) | raw_sz; 258 u64 mask64 = mask | (u64)~0 << 32;
266 l64 = ((u64)lhi << 32) | l; 259
267 sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK); 260 pci_read_config_dword(dev, pos + 4, &l);
268 next++; 261 pci_write_config_dword(dev, pos + 4, ~0);
269#if BITS_PER_LONG == 64 262 pci_read_config_dword(dev, pos + 4, &sz);
270 if (!sz64) { 263 pci_write_config_dword(dev, pos + 4, l);
271 res->start = 0; 264
272 res->end = 0; 265 l64 |= ((u64)l << 32);
273 res->flags = 0; 266 sz64 |= ((u64)sz << 32);
274 continue; 267
275 } 268 sz64 = pci_size(l64, sz64, mask64);
276 res->start = l64 & PCI_BASE_ADDRESS_MEM_MASK; 269
277 res->end = res->start + sz64; 270 if (!sz64)
278#else 271 goto fail;
279 if (sz64 > 0x100000000ULL) { 272
280 dev_err(&dev->dev, "BAR %d: can't handle 64-bit" 273 if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) {
281 " BAR\n", pos); 274 dev_err(&dev->dev, "can't handle 64-bit BAR\n");
282 res->start = 0; 275 goto fail;
283 res->flags = 0; 276 } else if ((sizeof(resource_size_t) < 8) && l) {
284 } else if (lhi) { 277 /* Address above 32-bit boundary; disable the BAR */
285 /* 64-bit wide address, treat as disabled */ 278 pci_write_config_dword(dev, pos, 0);
286 pci_write_config_dword(dev, reg, 279 pci_write_config_dword(dev, pos + 4, 0);
287 l & ~(u32)PCI_BASE_ADDRESS_MEM_MASK); 280 res->start = 0;
288 pci_write_config_dword(dev, reg+4, 0); 281 res->end = sz64;
289 res->start = 0; 282 } else {
290 res->end = sz; 283 res->start = l64;
291 } 284 res->end = l64 + sz64;
292#endif
293 } 285 }
286 } else {
287 sz = pci_size(l, sz, mask);
288
289 if (!sz)
290 goto fail;
291
292 res->start = l;
293 res->end = l + sz;
294 } 294 }
295
296 out:
297 return (type == pci_bar_mem64) ? 1 : 0;
298 fail:
299 res->flags = 0;
300 goto out;
301}
302
303static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
304{
305 unsigned int pos, reg;
306
307 for (pos = 0; pos < howmany; pos++) {
308 struct resource *res = &dev->resource[pos];
309 reg = PCI_BASE_ADDRESS_0 + (pos << 2);
310 pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
311 }
312
295 if (rom) { 313 if (rom) {
314 struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
296 dev->rom_base_reg = rom; 315 dev->rom_base_reg = rom;
297 res = &dev->resource[PCI_ROM_RESOURCE]; 316 res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
298 res->name = pci_name(dev); 317 IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
299 pci_read_config_dword(dev, rom, &l); 318 IORESOURCE_SIZEALIGN;
300 pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE); 319 __pci_read_base(dev, pci_bar_mem32, res, rom);
301 pci_read_config_dword(dev, rom, &sz);
302 pci_write_config_dword(dev, rom, l);
303 if (l == 0xffffffff)
304 l = 0;
305 if (sz && sz != 0xffffffff) {
306 sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK);
307 if (sz) {
308 res->flags = (l & IORESOURCE_ROM_ENABLE) |
309 IORESOURCE_MEM | IORESOURCE_PREFETCH |
310 IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
311 IORESOURCE_SIZEALIGN;
312 res->start = l & PCI_ROM_ADDRESS_MASK;
313 res->end = res->start + (unsigned long) sz;
314 }
315 }
316 } 320 }
317} 321}
318 322
@@ -1053,7 +1057,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
1053 } 1057 }
1054 } 1058 }
1055 1059
1056 if (bus->self) 1060 /* only one slot has pcie device */
1061 if (bus->self && nr)
1057 pcie_aspm_init_link_state(bus->self); 1062 pcie_aspm_init_link_state(bus->self);
1058 1063
1059 return nr; 1064 return nr;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 12d489395fad..0fb365074288 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev)
923} 923}
924DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode); 924DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode);
925 925
926/*
927 * Some ATA devices break if put into D3
928 */
929
930static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
931{
932 /* Quirk the legacy ATA devices only. The AHCI ones are ok */
933 if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE)
934 pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
935}
936DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3);
937DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3);
938
926/* This was originally an Alpha specific thing, but it really fits here. 939/* This was originally an Alpha specific thing, but it really fits here.
927 * The i82375 PCI/EISA bridge appears as non-classified. Fix that. 940 * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
928 */ 941 */
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 7a4409ab30ea..a319a20ed440 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/kthread.h> 9#include <linux/kthread.h>
10#include <linux/vmalloc.h> 10#include <linux/vmalloc.h>
11#include <linux/delay.h>
11 12
12static int qla24xx_vport_disable(struct fc_vport *, bool); 13static int qla24xx_vport_disable(struct fc_vport *, bool);
13 14
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 604e5f0a2d95..25eda71f4bf4 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -148,7 +148,6 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi,
148 unsigned rfalarm; 148 unsigned rfalarm;
149 unsigned send_at_once = MPC52xx_PSC_BUFSIZE; 149 unsigned send_at_once = MPC52xx_PSC_BUFSIZE;
150 unsigned recv_at_once; 150 unsigned recv_at_once;
151 unsigned bpw = mps->bits_per_word / 8;
152 151
153 if (!t->tx_buf && !t->rx_buf && t->len) 152 if (!t->tx_buf && !t->rx_buf && t->len)
154 return -EINVAL; 153 return -EINVAL;
@@ -164,22 +163,15 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi,
164 } 163 }
165 164
166 dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once); 165 dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once);
167 if (tx_buf) { 166 for (; send_at_once; sb++, send_at_once--) {
168 for (; send_at_once; sb++, send_at_once--) { 167 /* set EOF flag before the last word is sent */
169 /* set EOF flag */ 168 if (send_at_once == 1)
170 if (mps->bits_per_word 169 out_8(&psc->ircr2, 0x01);
171 && (sb + 1) % bpw == 0) 170
172 out_8(&psc->ircr2, 0x01); 171 if (tx_buf)
173 out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]); 172 out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]);
174 } 173 else
175 } else {
176 for (; send_at_once; sb++, send_at_once--) {
177 /* set EOF flag */
178 if (mps->bits_per_word
179 && ((sb + 1) % bpw) == 0)
180 out_8(&psc->ircr2, 0x01);
181 out_8(&psc->mpc52xx_psc_buffer_8, 0); 174 out_8(&psc->mpc52xx_psc_buffer_8, 0);
182 }
183 } 175 }
184 176
185 177
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 0885cc357a37..1c643c9e1f15 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -270,6 +270,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
270 /* setup the master state. */ 270 /* setup the master state. */
271 271
272 master->num_chipselect = hw->pdata->num_cs; 272 master->num_chipselect = hw->pdata->num_cs;
273 master->bus_num = pdata->bus_num;
273 274
274 /* setup the state for the bitbang driver */ 275 /* setup the state for the bitbang driver */
275 276
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
index 4d0e28c5790b..8d0212da4514 100644
--- a/drivers/video/sh7760fb.c
+++ b/drivers/video/sh7760fb.c
@@ -152,6 +152,7 @@ static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
152 col |= ((*g) & 0xff) << 8; 152 col |= ((*g) & 0xff) << 8;
153 col |= ((*b) & 0xff); 153 col |= ((*b) & 0xff);
154 col &= SH7760FB_PALETTE_MASK; 154 col &= SH7760FB_PALETTE_MASK;
155 iowrite32(col, par->base + LDPR(s));
155 156
156 if (s < 16) 157 if (s < 16)
157 ((u32 *) (info->pseudo_palette))[s] = s; 158 ((u32 *) (info->pseudo_palette))[s] = s;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058d..c3e174b35fe6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707} 707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709 708
710static int __init integrity_init(void) 709static int __init integrity_init(void)
711{ 710{
diff --git a/fs/buffer.c b/fs/buffer.c
index f95805019639..ca12a6bb82b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2096EXPORT_SYMBOL(generic_write_end); 2096EXPORT_SYMBOL(generic_write_end);
2097 2097
2098/* 2098/*
2099 * block_is_partially_uptodate checks whether buffers within a page are
2100 * uptodate or not.
2101 *
2102 * Returns true if all buffers which correspond to a file portion
2103 * we want to read are uptodate.
2104 */
2105int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2106 unsigned long from)
2107{
2108 struct inode *inode = page->mapping->host;
2109 unsigned block_start, block_end, blocksize;
2110 unsigned to;
2111 struct buffer_head *bh, *head;
2112 int ret = 1;
2113
2114 if (!page_has_buffers(page))
2115 return 0;
2116
2117 blocksize = 1 << inode->i_blkbits;
2118 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2119 to = from + to;
2120 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2121 return 0;
2122
2123 head = page_buffers(page);
2124 bh = head;
2125 block_start = 0;
2126 do {
2127 block_end = block_start + blocksize;
2128 if (block_end > from && block_start < to) {
2129 if (!buffer_uptodate(bh)) {
2130 ret = 0;
2131 break;
2132 }
2133 if (block_end >= to)
2134 break;
2135 }
2136 block_start = block_end;
2137 bh = bh->b_this_page;
2138 } while (bh != head);
2139
2140 return ret;
2141}
2142EXPORT_SYMBOL(block_is_partially_uptodate);
2143
2144/*
2099 * Generic "read page" function for block devices that have the normal 2145 * Generic "read page" function for block devices that have the normal
2100 * get_block functionality. This is most of the block device filesystems. 2146 * get_block functionality. This is most of the block device filesystems.
2101 * Reads the page asynchronously --- the unlock_buffer() and 2147 * Reads the page asynchronously --- the unlock_buffer() and
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7b99917ffadc..06db79d05c12 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
475{ 475{
476 struct inode *ecryptfs_inode; 476 struct inode *ecryptfs_inode;
477 struct ecryptfs_crypt_stat *crypt_stat; 477 struct ecryptfs_crypt_stat *crypt_stat;
478 char *enc_extent_virt = NULL; 478 char *enc_extent_virt;
479 struct page *enc_extent_page; 479 struct page *enc_extent_page = NULL;
480 loff_t extent_offset; 480 loff_t extent_offset;
481 int rc = 0; 481 int rc = 0;
482 482
@@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
492 page->index); 492 page->index);
493 goto out; 493 goto out;
494 } 494 }
495 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 495 enc_extent_page = alloc_page(GFP_USER);
496 if (!enc_extent_virt) { 496 if (!enc_extent_page) {
497 rc = -ENOMEM; 497 rc = -ENOMEM;
498 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 498 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
499 "encrypted extent\n"); 499 "encrypted extent\n");
500 goto out; 500 goto out;
501 } 501 }
502 enc_extent_page = virt_to_page(enc_extent_virt); 502 enc_extent_virt = kmap(enc_extent_page);
503 for (extent_offset = 0; 503 for (extent_offset = 0;
504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
505 extent_offset++) { 505 extent_offset++) {
@@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
527 } 527 }
528 } 528 }
529out: 529out:
530 kfree(enc_extent_virt); 530 if (enc_extent_page) {
531 kunmap(enc_extent_page);
532 __free_page(enc_extent_page);
533 }
531 return rc; 534 return rc;
532} 535}
533 536
@@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
609{ 612{
610 struct inode *ecryptfs_inode; 613 struct inode *ecryptfs_inode;
611 struct ecryptfs_crypt_stat *crypt_stat; 614 struct ecryptfs_crypt_stat *crypt_stat;
612 char *enc_extent_virt = NULL; 615 char *enc_extent_virt;
613 struct page *enc_extent_page; 616 struct page *enc_extent_page = NULL;
614 unsigned long extent_offset; 617 unsigned long extent_offset;
615 int rc = 0; 618 int rc = 0;
616 619
@@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
627 page->index); 630 page->index);
628 goto out; 631 goto out;
629 } 632 }
630 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 633 enc_extent_page = alloc_page(GFP_USER);
631 if (!enc_extent_virt) { 634 if (!enc_extent_page) {
632 rc = -ENOMEM; 635 rc = -ENOMEM;
633 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 636 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
634 "encrypted extent\n"); 637 "encrypted extent\n");
635 goto out; 638 goto out;
636 } 639 }
637 enc_extent_page = virt_to_page(enc_extent_virt); 640 enc_extent_virt = kmap(enc_extent_page);
638 for (extent_offset = 0; 641 for (extent_offset = 0;
639 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 642 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
640 extent_offset++) { 643 extent_offset++) {
@@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
662 } 665 }
663 } 666 }
664out: 667out:
665 kfree(enc_extent_virt); 668 if (enc_extent_page) {
669 kunmap(enc_extent_page);
670 __free_page(enc_extent_page);
671 }
666 return rc; 672 return rc;
667} 673}
668 674
diff --git a/fs/exec.c b/fs/exec.c
index 9696bbf0f0b1..32993beecbe9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/pagemap.h>
35#include <linux/highmem.h> 36#include <linux/highmem.h>
36#include <linux/spinlock.h> 37#include <linux/spinlock.h>
37#include <linux/key.h> 38#include <linux/key.h>
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd74..991d6dfeb51f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
791 .direct_IO = ext2_direct_IO, 791 .direct_IO = ext2_direct_IO,
792 .writepages = ext2_writepages, 792 .writepages = ext2_writepages,
793 .migratepage = buffer_migrate_page, 793 .migratepage = buffer_migrate_page,
794 .is_partially_uptodate = block_is_partially_uptodate,
794}; 795};
795 796
796const struct address_space_operations ext2_aops_xip = { 797const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b914..507d8689b111 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
1767} 1767}
1768 1768
1769static const struct address_space_operations ext3_ordered_aops = { 1769static const struct address_space_operations ext3_ordered_aops = {
1770 .readpage = ext3_readpage, 1770 .readpage = ext3_readpage,
1771 .readpages = ext3_readpages, 1771 .readpages = ext3_readpages,
1772 .writepage = ext3_ordered_writepage, 1772 .writepage = ext3_ordered_writepage,
1773 .sync_page = block_sync_page, 1773 .sync_page = block_sync_page,
1774 .write_begin = ext3_write_begin, 1774 .write_begin = ext3_write_begin,
1775 .write_end = ext3_ordered_write_end, 1775 .write_end = ext3_ordered_write_end,
1776 .bmap = ext3_bmap, 1776 .bmap = ext3_bmap,
1777 .invalidatepage = ext3_invalidatepage, 1777 .invalidatepage = ext3_invalidatepage,
1778 .releasepage = ext3_releasepage, 1778 .releasepage = ext3_releasepage,
1779 .direct_IO = ext3_direct_IO, 1779 .direct_IO = ext3_direct_IO,
1780 .migratepage = buffer_migrate_page, 1780 .migratepage = buffer_migrate_page,
1781 .is_partially_uptodate = block_is_partially_uptodate,
1781}; 1782};
1782 1783
1783static const struct address_space_operations ext3_writeback_aops = { 1784static const struct address_space_operations ext3_writeback_aops = {
1784 .readpage = ext3_readpage, 1785 .readpage = ext3_readpage,
1785 .readpages = ext3_readpages, 1786 .readpages = ext3_readpages,
1786 .writepage = ext3_writeback_writepage, 1787 .writepage = ext3_writeback_writepage,
1787 .sync_page = block_sync_page, 1788 .sync_page = block_sync_page,
1788 .write_begin = ext3_write_begin, 1789 .write_begin = ext3_write_begin,
1789 .write_end = ext3_writeback_write_end, 1790 .write_end = ext3_writeback_write_end,
1790 .bmap = ext3_bmap, 1791 .bmap = ext3_bmap,
1791 .invalidatepage = ext3_invalidatepage, 1792 .invalidatepage = ext3_invalidatepage,
1792 .releasepage = ext3_releasepage, 1793 .releasepage = ext3_releasepage,
1793 .direct_IO = ext3_direct_IO, 1794 .direct_IO = ext3_direct_IO,
1794 .migratepage = buffer_migrate_page, 1795 .migratepage = buffer_migrate_page,
1796 .is_partially_uptodate = block_is_partially_uptodate,
1795}; 1797};
1796 1798
1797static const struct address_space_operations ext3_journalled_aops = { 1799static const struct address_space_operations ext3_journalled_aops = {
1798 .readpage = ext3_readpage, 1800 .readpage = ext3_readpage,
1799 .readpages = ext3_readpages, 1801 .readpages = ext3_readpages,
1800 .writepage = ext3_journalled_writepage, 1802 .writepage = ext3_journalled_writepage,
1801 .sync_page = block_sync_page, 1803 .sync_page = block_sync_page,
1802 .write_begin = ext3_write_begin, 1804 .write_begin = ext3_write_begin,
1803 .write_end = ext3_journalled_write_end, 1805 .write_end = ext3_journalled_write_end,
1804 .set_page_dirty = ext3_journalled_set_page_dirty, 1806 .set_page_dirty = ext3_journalled_set_page_dirty,
1805 .bmap = ext3_bmap, 1807 .bmap = ext3_bmap,
1806 .invalidatepage = ext3_invalidatepage, 1808 .invalidatepage = ext3_invalidatepage,
1807 .releasepage = ext3_releasepage, 1809 .releasepage = ext3_releasepage,
1810 .is_partially_uptodate = block_is_partially_uptodate,
1808}; 1811};
1809 1812
1810void ext3_set_aops(struct inode *inode) 1813void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df091..9843b046c235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
2806} 2806}
2807 2807
2808static const struct address_space_operations ext4_ordered_aops = { 2808static const struct address_space_operations ext4_ordered_aops = {
2809 .readpage = ext4_readpage, 2809 .readpage = ext4_readpage,
2810 .readpages = ext4_readpages, 2810 .readpages = ext4_readpages,
2811 .writepage = ext4_normal_writepage, 2811 .writepage = ext4_normal_writepage,
2812 .sync_page = block_sync_page, 2812 .sync_page = block_sync_page,
2813 .write_begin = ext4_write_begin, 2813 .write_begin = ext4_write_begin,
2814 .write_end = ext4_ordered_write_end, 2814 .write_end = ext4_ordered_write_end,
2815 .bmap = ext4_bmap, 2815 .bmap = ext4_bmap,
2816 .invalidatepage = ext4_invalidatepage, 2816 .invalidatepage = ext4_invalidatepage,
2817 .releasepage = ext4_releasepage, 2817 .releasepage = ext4_releasepage,
2818 .direct_IO = ext4_direct_IO, 2818 .direct_IO = ext4_direct_IO,
2819 .migratepage = buffer_migrate_page, 2819 .migratepage = buffer_migrate_page,
2820 .is_partially_uptodate = block_is_partially_uptodate,
2820}; 2821};
2821 2822
2822static const struct address_space_operations ext4_writeback_aops = { 2823static const struct address_space_operations ext4_writeback_aops = {
2823 .readpage = ext4_readpage, 2824 .readpage = ext4_readpage,
2824 .readpages = ext4_readpages, 2825 .readpages = ext4_readpages,
2825 .writepage = ext4_normal_writepage, 2826 .writepage = ext4_normal_writepage,
2826 .sync_page = block_sync_page, 2827 .sync_page = block_sync_page,
2827 .write_begin = ext4_write_begin, 2828 .write_begin = ext4_write_begin,
2828 .write_end = ext4_writeback_write_end, 2829 .write_end = ext4_writeback_write_end,
2829 .bmap = ext4_bmap, 2830 .bmap = ext4_bmap,
2830 .invalidatepage = ext4_invalidatepage, 2831 .invalidatepage = ext4_invalidatepage,
2831 .releasepage = ext4_releasepage, 2832 .releasepage = ext4_releasepage,
2832 .direct_IO = ext4_direct_IO, 2833 .direct_IO = ext4_direct_IO,
2833 .migratepage = buffer_migrate_page, 2834 .migratepage = buffer_migrate_page,
2835 .is_partially_uptodate = block_is_partially_uptodate,
2834}; 2836};
2835 2837
2836static const struct address_space_operations ext4_journalled_aops = { 2838static const struct address_space_operations ext4_journalled_aops = {
2837 .readpage = ext4_readpage, 2839 .readpage = ext4_readpage,
2838 .readpages = ext4_readpages, 2840 .readpages = ext4_readpages,
2839 .writepage = ext4_journalled_writepage, 2841 .writepage = ext4_journalled_writepage,
2840 .sync_page = block_sync_page, 2842 .sync_page = block_sync_page,
2841 .write_begin = ext4_write_begin, 2843 .write_begin = ext4_write_begin,
2842 .write_end = ext4_journalled_write_end, 2844 .write_end = ext4_journalled_write_end,
2843 .set_page_dirty = ext4_journalled_set_page_dirty, 2845 .set_page_dirty = ext4_journalled_set_page_dirty,
2844 .bmap = ext4_bmap, 2846 .bmap = ext4_bmap,
2845 .invalidatepage = ext4_invalidatepage, 2847 .invalidatepage = ext4_invalidatepage,
2846 .releasepage = ext4_releasepage, 2848 .releasepage = ext4_releasepage,
2849 .is_partially_uptodate = block_is_partially_uptodate,
2847}; 2850};
2848 2851
2849static const struct address_space_operations ext4_da_aops = { 2852static const struct address_space_operations ext4_da_aops = {
2850 .readpage = ext4_readpage, 2853 .readpage = ext4_readpage,
2851 .readpages = ext4_readpages, 2854 .readpages = ext4_readpages,
2852 .writepage = ext4_da_writepage, 2855 .writepage = ext4_da_writepage,
2853 .writepages = ext4_da_writepages, 2856 .writepages = ext4_da_writepages,
2854 .sync_page = block_sync_page, 2857 .sync_page = block_sync_page,
2855 .write_begin = ext4_da_write_begin, 2858 .write_begin = ext4_da_write_begin,
2856 .write_end = ext4_da_write_end, 2859 .write_end = ext4_da_write_end,
2857 .bmap = ext4_bmap, 2860 .bmap = ext4_bmap,
2858 .invalidatepage = ext4_da_invalidatepage, 2861 .invalidatepage = ext4_da_invalidatepage,
2859 .releasepage = ext4_releasepage, 2862 .releasepage = ext4_releasepage,
2860 .direct_IO = ext4_direct_IO, 2863 .direct_IO = ext4_direct_IO,
2861 .migratepage = buffer_migrate_page, 2864 .migratepage = buffer_migrate_page,
2865 .is_partially_uptodate = block_is_partially_uptodate,
2862}; 2866};
2863 2867
2864void ext4_set_aops(struct inode *inode) 2868void ext4_set_aops(struct inode *inode)
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 1ebbe883f786..13a3d9ad92db 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -277,6 +277,7 @@ enum acpi_prefered_pm_profiles {
277#define BAF_LEGACY_DEVICES 0x0001 277#define BAF_LEGACY_DEVICES 0x0001
278#define BAF_8042_KEYBOARD_CONTROLLER 0x0002 278#define BAF_8042_KEYBOARD_CONTROLLER 0x0002
279#define BAF_MSI_NOT_SUPPORTED 0x0008 279#define BAF_MSI_NOT_SUPPORTED 0x0008
280#define BAF_PCIE_ASPM_CONTROL 0x0010
280 281
281#define FADT2_REVISION_ID 3 282#define FADT2_REVISION_ID 3
282#define FADT2_MINUS_REVISION_ID 2 283#define FADT2_MINUS_REVISION_ID 2
diff --git a/include/asm-arm/arch-s3c2410/spi.h b/include/asm-arm/arch-s3c2410/spi.h
index 352d33860b63..442169887d3b 100644
--- a/include/asm-arm/arch-s3c2410/spi.h
+++ b/include/asm-arm/arch-s3c2410/spi.h
@@ -16,6 +16,7 @@
16struct s3c2410_spi_info { 16struct s3c2410_spi_info {
17 unsigned long pin_cs; /* simple gpio cs */ 17 unsigned long pin_cs; /* simple gpio cs */
18 unsigned int num_cs; /* total chipselects */ 18 unsigned int num_cs; /* total chipselects */
19 int bus_num; /* bus number to use. */
19 20
20 void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); 21 void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol);
21}; 22};
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index f41335ba6337..45329fca1b64 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -7,6 +7,8 @@
7 7
8#include <linux/scatterlist.h> 8#include <linux/scatterlist.h>
9 9
10#include <asm-generic/dma-coherent.h>
11
10/* 12/*
11 * DMA-consistent mapping functions. These allocate/free a region of 13 * DMA-consistent mapping functions. These allocate/free a region of
12 * uncached, unwrite-buffered mapped memory space for use with DMA 14 * uncached, unwrite-buffered mapped memory space for use with DMA
diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h
index cb2fb25ff8d9..da8ef8e8f842 100644
--- a/include/asm-cris/dma-mapping.h
+++ b/include/asm-cris/dma-mapping.h
@@ -14,6 +14,8 @@
14#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 14#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
15 15
16#ifdef CONFIG_PCI 16#ifdef CONFIG_PCI
17#include <asm-generic/dma-coherent.h>
18
17void *dma_alloc_coherent(struct device *dev, size_t size, 19void *dma_alloc_coherent(struct device *dev, size_t size,
18 dma_addr_t *dma_handle, gfp_t flag); 20 dma_addr_t *dma_handle, gfp_t flag);
19 21
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h
new file mode 100644
index 000000000000..85a3ffaa0242
--- /dev/null
+++ b/include/asm-generic/dma-coherent.h
@@ -0,0 +1,32 @@
1#ifndef DMA_COHERENT_H
2#define DMA_COHERENT_H
3
4#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
5/*
6 * These two functions are only for dma allocator.
7 * Don't use them in device drivers.
8 */
9int dma_alloc_from_coherent(struct device *dev, ssize_t size,
10 dma_addr_t *dma_handle, void **ret);
11int dma_release_from_coherent(struct device *dev, int order, void *vaddr);
12
13/*
14 * Standard interface
15 */
16#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
17extern int
18dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
19 dma_addr_t device_addr, size_t size, int flags);
20
21extern void
22dma_release_declared_memory(struct device *dev);
23
24extern void *
25dma_mark_declared_memory_occupied(struct device *dev,
26 dma_addr_t device_addr, size_t size);
27#else
28#define dma_alloc_from_coherent(dev, size, handle, ret) (0)
29#define dma_release_from_coherent(dev, order, vaddr) (0)
30#endif
31
32#endif
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index c764a8fcb058..0f99ad38b012 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -2,6 +2,7 @@
2#define _ASM_GENERIC_GPIO_H 2#define _ASM_GENERIC_GPIO_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/errno.h>
5 6
6#ifdef CONFIG_GPIOLIB 7#ifdef CONFIG_GPIOLIB
7 8
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 087325ede76c..a7cdc48e8b78 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -5,6 +5,8 @@
5 5
6#include <asm-generic/pgtable-nopud.h> 6#include <asm-generic/pgtable-nopud.h>
7 7
8struct mm_struct;
9
8#define __PAGETABLE_PMD_FOLDED 10#define __PAGETABLE_PMD_FOLDED
9 11
10/* 12/*
@@ -54,7 +56,9 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
54 * inside the pud, so has no extra memory associated with it. 56 * inside the pud, so has no extra memory associated with it.
55 */ 57 */
56#define pmd_alloc_one(mm, address) NULL 58#define pmd_alloc_one(mm, address) NULL
57#define pmd_free(mm, x) do { } while (0) 59static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
60{
61}
58#define __pmd_free_tlb(tlb, x) do { } while (0) 62#define __pmd_free_tlb(tlb, x) do { } while (0)
59 63
60#undef pmd_addr_end 64#undef pmd_addr_end
diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h
index 6c0b8a2de143..627315ecdb52 100644
--- a/include/asm-sh/dma-mapping.h
+++ b/include/asm-sh/dma-mapping.h
@@ -5,6 +5,7 @@
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <asm/cacheflush.h> 6#include <asm/cacheflush.h>
7#include <asm/io.h> 7#include <asm/io.h>
8#include <asm-generic/dma-coherent.h>
8 9
9extern struct bus_type pci_bus_type; 10extern struct bus_type pci_bus_type;
10 11
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index 0eaa9bf6011f..ad9cd6d49bfc 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -249,25 +249,5 @@ static inline int dma_get_cache_alignment(void)
249 249
250#define dma_is_consistent(d, h) (1) 250#define dma_is_consistent(d, h) (1)
251 251
252#ifdef CONFIG_X86_32 252#include <asm-generic/dma-coherent.h>
253# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
254struct dma_coherent_mem {
255 void *virt_base;
256 u32 device_base;
257 int size;
258 int flags;
259 unsigned long *bitmap;
260};
261
262extern int
263dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
264 dma_addr_t device_addr, size_t size, int flags);
265
266extern void
267dma_release_declared_memory(struct device *dev);
268
269extern void *
270dma_mark_declared_memory_occupied(struct device *dev,
271 dma_addr_t device_addr, size_t size);
272#endif /* CONFIG_X86_32 */
273#endif 253#endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 82aa36c53ea7..50cfe8ceb478 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset);
205int block_write_full_page(struct page *page, get_block_t *get_block, 205int block_write_full_page(struct page *page, get_block_t *get_block,
206 struct writeback_control *wbc); 206 struct writeback_control *wbc);
207int block_read_full_page(struct page*, get_block_t*); 207int block_read_full_page(struct page*, get_block_t*);
208int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
209 unsigned long from);
208int block_write_begin(struct file *, struct address_space *, 210int block_write_begin(struct file *, struct address_space *,
209 loff_t, unsigned, unsigned, 211 loff_t, unsigned, unsigned,
210 struct page **, void **, get_block_t*); 212 struct page **, void **, get_block_t*);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8252b045e624..580b513668fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i)
443 return i->count; 443 return i->count;
444} 444}
445 445
446/*
447 * "descriptor" for what we're up to with a read.
448 * This allows us to use the same read code yet
449 * have multiple different users of the data that
450 * we read from a file.
451 *
452 * The simplest case just copies the data to user
453 * mode.
454 */
455typedef struct {
456 size_t written;
457 size_t count;
458 union {
459 char __user *buf;
460 void *data;
461 } arg;
462 int error;
463} read_descriptor_t;
464
465typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
466 unsigned long, unsigned long);
446 467
447struct address_space_operations { 468struct address_space_operations {
448 int (*writepage)(struct page *page, struct writeback_control *wbc); 469 int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -484,6 +505,8 @@ struct address_space_operations {
484 int (*migratepage) (struct address_space *, 505 int (*migratepage) (struct address_space *,
485 struct page *, struct page *); 506 struct page *, struct page *);
486 int (*launder_page) (struct page *); 507 int (*launder_page) (struct page *);
508 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
509 unsigned long);
487}; 510};
488 511
489/* 512/*
@@ -1198,27 +1221,6 @@ struct block_device_operations {
1198 struct module *owner; 1221 struct module *owner;
1199}; 1222};
1200 1223
1201/*
1202 * "descriptor" for what we're up to with a read.
1203 * This allows us to use the same read code yet
1204 * have multiple different users of the data that
1205 * we read from a file.
1206 *
1207 * The simplest case just copies the data to user
1208 * mode.
1209 */
1210typedef struct {
1211 size_t written;
1212 size_t count;
1213 union {
1214 char __user * buf;
1215 void *data;
1216 } arg;
1217 int error;
1218} read_descriptor_t;
1219
1220typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
1221
1222/* These macros are for out of kernel modules to test that 1224/* These macros are for out of kernel modules to test that
1223 * the kernel supports the unlocked_ioctl and compat_ioctl 1225 * the kernel supports the unlocked_ioctl and compat_ioctl
1224 * fields in struct file_operations. */ 1226 * fields in struct file_operations. */
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf75385..f8598f583944 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,3 +8,4 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
8 unsigned long align_mask); 8 unsigned long align_mask);
9extern void iommu_area_free(unsigned long *map, unsigned long start, 9extern void iommu_area_free(unsigned long *map, unsigned long start,
10 unsigned int nr); 10 unsigned int nr);
11extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index bb3dd0545928..49ef857cdb2d 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -1,5 +1,3 @@
1#ifndef MFD_CORE_H
2#define MFD_CORE_H
3/* 1/*
4 * drivers/mfd/mfd-core.h 2 * drivers/mfd/mfd-core.h
5 * 3 *
@@ -13,6 +11,9 @@
13 * 11 *
14 */ 12 */
15 13
14#ifndef MFD_CORE_H
15#define MFD_CORE_H
16
16#include <linux/platform_device.h> 17#include <linux/platform_device.h>
17 18
18/* 19/*
@@ -28,7 +29,13 @@ struct mfd_cell {
28 int (*suspend)(struct platform_device *dev); 29 int (*suspend)(struct platform_device *dev);
29 int (*resume)(struct platform_device *dev); 30 int (*resume)(struct platform_device *dev);
30 31
31 void *driver_data; /* driver-specific data */ 32 /* driver-specific data for MFD-aware "cell" drivers */
33 void *driver_data;
34
35 /* platform_data can be used to either pass data to "generic"
36 driver or as a hook to mfd_cell for the "cell" drivers */
37 void *platform_data;
38 size_t data_size;
32 39
33 /* 40 /*
34 * This resources can be specified relatievly to the parent device. 41 * This resources can be specified relatievly to the parent device.
@@ -38,18 +45,11 @@ struct mfd_cell {
38 const struct resource *resources; 45 const struct resource *resources;
39}; 46};
40 47
41static inline struct mfd_cell * 48extern int mfd_add_devices(struct device *parent, int id,
42mfd_get_cell(struct platform_device *pdev) 49 const struct mfd_cell *cells, int n_devs,
43{ 50 struct resource *mem_base,
44 return (struct mfd_cell *)pdev->dev.platform_data; 51 int irq_base);
45}
46
47extern int mfd_add_devices(
48 struct platform_device *parent,
49 const struct mfd_cell *cells, int n_devs,
50 struct resource *mem_base,
51 int irq_base);
52 52
53extern void mfd_remove_devices(struct platform_device *parent); 53extern void mfd_remove_devices(struct device *parent);
54 54
55#endif 55#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e695eaab4ce..866a3dbe5c75 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
1104 unsigned long addr, unsigned long len, pgoff_t pgoff); 1104 unsigned long addr, unsigned long len, pgoff_t pgoff);
1105extern void exit_mmap(struct mm_struct *); 1105extern void exit_mmap(struct mm_struct *);
1106 1106
1107extern int mm_take_all_locks(struct mm_struct *mm);
1108extern void mm_drop_all_locks(struct mm_struct *mm);
1109
1107#ifdef CONFIG_PROC_FS 1110#ifdef CONFIG_PROC_FS
1108/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ 1111/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1109extern void added_exe_file_vma(struct mm_struct *mm); 1112extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 746f975b58ef..386edbe2cb4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
10#include <linux/rbtree.h> 10#include <linux/rbtree.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/cpumask.h>
13#include <asm/page.h> 14#include <asm/page.h>
14#include <asm/mmu.h> 15#include <asm/mmu.h>
15 16
@@ -253,6 +254,9 @@ struct mm_struct {
253 struct file *exe_file; 254 struct file *exe_file;
254 unsigned long num_exe_file_vmas; 255 unsigned long num_exe_file_vmas;
255#endif 256#endif
257#ifdef CONFIG_MMU_NOTIFIER
258 struct mmu_notifier_mm *mmu_notifier_mm;
259#endif
256}; 260};
257 261
258#endif /* _LINUX_MM_TYPES_H */ 262#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
new file mode 100644
index 000000000000..b77486d152cd
--- /dev/null
+++ b/include/linux/mmu_notifier.h
@@ -0,0 +1,279 @@
1#ifndef _LINUX_MMU_NOTIFIER_H
2#define _LINUX_MMU_NOTIFIER_H
3
4#include <linux/list.h>
5#include <linux/spinlock.h>
6#include <linux/mm_types.h>
7
8struct mmu_notifier;
9struct mmu_notifier_ops;
10
11#ifdef CONFIG_MMU_NOTIFIER
12
13/*
14 * The mmu notifier_mm structure is allocated and installed in
15 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
16 * critical section and it's released only when mm_count reaches zero
17 * in mmdrop().
18 */
19struct mmu_notifier_mm {
20 /* all mmu notifiers registerd in this mm are queued in this list */
21 struct hlist_head list;
22 /* to serialize the list modifications and hlist_unhashed */
23 spinlock_t lock;
24};
25
26struct mmu_notifier_ops {
27 /*
28 * Called either by mmu_notifier_unregister or when the mm is
29 * being destroyed by exit_mmap, always before all pages are
30 * freed. This can run concurrently with other mmu notifier
31 * methods (the ones invoked outside the mm context) and it
32 * should tear down all secondary mmu mappings and freeze the
33 * secondary mmu. If this method isn't implemented you've to
34 * be sure that nothing could possibly write to the pages
35 * through the secondary mmu by the time the last thread with
36 * tsk->mm == mm exits.
37 *
38 * As side note: the pages freed after ->release returns could
39 * be immediately reallocated by the gart at an alias physical
40 * address with a different cache model, so if ->release isn't
41 * implemented because all _software_ driven memory accesses
42 * through the secondary mmu are terminated by the time the
43 * last thread of this mm quits, you've also to be sure that
44 * speculative _hardware_ operations can't allocate dirty
45 * cachelines in the cpu that could not be snooped and made
46 * coherent with the other read and write operations happening
47 * through the gart alias address, so leading to memory
48 * corruption.
49 */
50 void (*release)(struct mmu_notifier *mn,
51 struct mm_struct *mm);
52
53 /*
54 * clear_flush_young is called after the VM is
55 * test-and-clearing the young/accessed bitflag in the
56 * pte. This way the VM will provide proper aging to the
57 * accesses to the page through the secondary MMUs and not
58 * only to the ones through the Linux pte.
59 */
60 int (*clear_flush_young)(struct mmu_notifier *mn,
61 struct mm_struct *mm,
62 unsigned long address);
63
64 /*
65 * Before this is invoked any secondary MMU is still ok to
66 * read/write to the page previously pointed to by the Linux
67 * pte because the page hasn't been freed yet and it won't be
68 * freed until this returns. If required set_page_dirty has to
69 * be called internally to this method.
70 */
71 void (*invalidate_page)(struct mmu_notifier *mn,
72 struct mm_struct *mm,
73 unsigned long address);
74
75 /*
76 * invalidate_range_start() and invalidate_range_end() must be
77 * paired and are called only when the mmap_sem and/or the
78 * locks protecting the reverse maps are held. The subsystem
79 * must guarantee that no additional references are taken to
80 * the pages in the range established between the call to
81 * invalidate_range_start() and the matching call to
82 * invalidate_range_end().
83 *
84 * Invalidation of multiple concurrent ranges may be
85 * optionally permitted by the driver. Either way the
86 * establishment of sptes is forbidden in the range passed to
87 * invalidate_range_begin/end for the whole duration of the
88 * invalidate_range_begin/end critical section.
89 *
90 * invalidate_range_start() is called when all pages in the
91 * range are still mapped and have at least a refcount of one.
92 *
93 * invalidate_range_end() is called when all pages in the
94 * range have been unmapped and the pages have been freed by
95 * the VM.
96 *
97 * The VM will remove the page table entries and potentially
98 * the page between invalidate_range_start() and
99 * invalidate_range_end(). If the page must not be freed
100 * because of pending I/O or other circumstances then the
101 * invalidate_range_start() callback (or the initial mapping
102 * by the driver) must make sure that the refcount is kept
103 * elevated.
104 *
105 * If the driver increases the refcount when the pages are
106 * initially mapped into an address space then either
107 * invalidate_range_start() or invalidate_range_end() may
108 * decrease the refcount. If the refcount is decreased on
109 * invalidate_range_start() then the VM can free pages as page
110 * table entries are removed. If the refcount is only
111 * droppped on invalidate_range_end() then the driver itself
112 * will drop the last refcount but it must take care to flush
113 * any secondary tlb before doing the final free on the
114 * page. Pages will no longer be referenced by the linux
115 * address space but may still be referenced by sptes until
116 * the last refcount is dropped.
117 */
118 void (*invalidate_range_start)(struct mmu_notifier *mn,
119 struct mm_struct *mm,
120 unsigned long start, unsigned long end);
121 void (*invalidate_range_end)(struct mmu_notifier *mn,
122 struct mm_struct *mm,
123 unsigned long start, unsigned long end);
124};
125
126/*
127 * The notifier chains are protected by mmap_sem and/or the reverse map
128 * semaphores. Notifier chains are only changed when all reverse maps and
129 * the mmap_sem locks are taken.
130 *
131 * Therefore notifier chains can only be traversed when either
132 *
133 * 1. mmap_sem is held.
134 * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
135 * 3. No other concurrent thread can access the list (release)
136 */
137struct mmu_notifier {
138 struct hlist_node hlist;
139 const struct mmu_notifier_ops *ops;
140};
141
142static inline int mm_has_notifiers(struct mm_struct *mm)
143{
144 return unlikely(mm->mmu_notifier_mm);
145}
146
147extern int mmu_notifier_register(struct mmu_notifier *mn,
148 struct mm_struct *mm);
149extern int __mmu_notifier_register(struct mmu_notifier *mn,
150 struct mm_struct *mm);
151extern void mmu_notifier_unregister(struct mmu_notifier *mn,
152 struct mm_struct *mm);
153extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
154extern void __mmu_notifier_release(struct mm_struct *mm);
155extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
156 unsigned long address);
157extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
158 unsigned long address);
159extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
160 unsigned long start, unsigned long end);
161extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
162 unsigned long start, unsigned long end);
163
164static inline void mmu_notifier_release(struct mm_struct *mm)
165{
166 if (mm_has_notifiers(mm))
167 __mmu_notifier_release(mm);
168}
169
170static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
171 unsigned long address)
172{
173 if (mm_has_notifiers(mm))
174 return __mmu_notifier_clear_flush_young(mm, address);
175 return 0;
176}
177
178static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
179 unsigned long address)
180{
181 if (mm_has_notifiers(mm))
182 __mmu_notifier_invalidate_page(mm, address);
183}
184
185static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
186 unsigned long start, unsigned long end)
187{
188 if (mm_has_notifiers(mm))
189 __mmu_notifier_invalidate_range_start(mm, start, end);
190}
191
192static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
193 unsigned long start, unsigned long end)
194{
195 if (mm_has_notifiers(mm))
196 __mmu_notifier_invalidate_range_end(mm, start, end);
197}
198
199static inline void mmu_notifier_mm_init(struct mm_struct *mm)
200{
201 mm->mmu_notifier_mm = NULL;
202}
203
204static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
205{
206 if (mm_has_notifiers(mm))
207 __mmu_notifier_mm_destroy(mm);
208}
209
210/*
211 * These two macros will sometime replace ptep_clear_flush.
212 * ptep_clear_flush is impleemnted as macro itself, so this also is
213 * implemented as a macro until ptep_clear_flush will converted to an
214 * inline function, to diminish the risk of compilation failure. The
215 * invalidate_page method over time can be moved outside the PT lock
216 * and these two macros can be later removed.
217 */
218#define ptep_clear_flush_notify(__vma, __address, __ptep) \
219({ \
220 pte_t __pte; \
221 struct vm_area_struct *___vma = __vma; \
222 unsigned long ___address = __address; \
223 __pte = ptep_clear_flush(___vma, ___address, __ptep); \
224 mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
225 __pte; \
226})
227
228#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
229({ \
230 int __young; \
231 struct vm_area_struct *___vma = __vma; \
232 unsigned long ___address = __address; \
233 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \
234 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
235 ___address); \
236 __young; \
237})
238
239#else /* CONFIG_MMU_NOTIFIER */
240
241static inline void mmu_notifier_release(struct mm_struct *mm)
242{
243}
244
245static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
246 unsigned long address)
247{
248 return 0;
249}
250
251static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
252 unsigned long address)
253{
254}
255
256static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
257 unsigned long start, unsigned long end)
258{
259}
260
261static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
262 unsigned long start, unsigned long end)
263{
264}
265
266static inline void mmu_notifier_mm_init(struct mm_struct *mm)
267{
268}
269
270static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
271{
272}
273
274#define ptep_clear_flush_young_notify ptep_clear_flush_young
275#define ptep_clear_flush_notify ptep_clear_flush
276
277#endif /* CONFIG_MMU_NOTIFIER */
278
279#endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a81d81890422..a39b38ccdc97 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
20 */ 20 */
21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ 21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ 22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
23#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */
23 24
24static inline void mapping_set_error(struct address_space *mapping, int error) 25static inline void mapping_set_error(struct address_space *mapping, int error)
25{ 26{
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index a1a1e618e996..91ba0b338b47 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
27extern void pcie_aspm_exit_link_state(struct pci_dev *pdev); 27extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
28extern void pcie_aspm_pm_state_change(struct pci_dev *pdev); 28extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
29extern void pci_disable_link_state(struct pci_dev *pdev, int state); 29extern void pci_disable_link_state(struct pci_dev *pdev, int state);
30extern void pcie_no_aspm(void);
30#else 31#else
31static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) 32static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
32{ 33{
@@ -40,6 +41,10 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
40static inline void pci_disable_link_state(struct pci_dev *pdev, int state) 41static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
41{ 42{
42} 43}
44
45static inline void pcie_no_aspm(void)
46{
47}
43#endif 48#endif
44 49
45#ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */ 50#ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1d296d31abe0..825be3878f68 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -124,6 +124,8 @@ enum pci_dev_flags {
124 * generation too. 124 * generation too.
125 */ 125 */
126 PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, 126 PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
127 /* Device configuration is irrevocably lost if disabled into D3 */
128 PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
127}; 129};
128 130
129typedef unsigned short __bitwise pci_bus_flags_t; 131typedef unsigned short __bitwise pci_bus_flags_t;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 19958b929905..450684f7eaac 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -374,6 +374,7 @@
374#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ 374#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
375#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ 375#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */
376#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ 376#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
377#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */
377#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ 378#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
378#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ 379#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
379#define PCI_EXP_DEVCTL 8 /* Device Control */ 380#define PCI_EXP_DEVCTL 8 /* Device Control */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b0f39be08b6c..eb4443c7e05b 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -98,6 +98,34 @@ static inline void list_del_rcu(struct list_head *entry)
98} 98}
99 99
100/** 100/**
101 * hlist_del_init_rcu - deletes entry from hash list with re-initialization
102 * @n: the element to delete from the hash list.
103 *
104 * Note: list_unhashed() on the node return true after this. It is
105 * useful for RCU based read lockfree traversal if the writer side
106 * must know if the list entry is still hashed or already unhashed.
107 *
108 * In particular, it means that we can not poison the forward pointers
109 * that may still be used for walking the hash list and we can only
110 * zero the pprev pointer so list_unhashed() will return true after
111 * this.
112 *
113 * The caller must take whatever precautions are necessary (such as
114 * holding appropriate locks) to avoid racing with another
115 * list-mutation primitive, such as hlist_add_head_rcu() or
116 * hlist_del_rcu(), running on this same list. However, it is
117 * perfectly legal to run concurrently with the _rcu list-traversal
118 * primitives, such as hlist_for_each_entry_rcu().
119 */
120static inline void hlist_del_init_rcu(struct hlist_node *n)
121{
122 if (!hlist_unhashed(n)) {
123 __hlist_del(n);
124 n->pprev = NULL;
125 }
126}
127
128/**
101 * list_replace_rcu - replace old entry by new one 129 * list_replace_rcu - replace old entry by new one
102 * @old : the element to be replaced 130 * @old : the element to be replaced
103 * @new : the new element to insert 131 * @new : the new element to insert
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692ac5bd..69407f85e10b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
26 */ 26 */
27struct anon_vma { 27struct anon_vma {
28 spinlock_t lock; /* Serialize access to vma list */ 28 spinlock_t lock; /* Serialize access to vma list */
29 /*
30 * NOTE: the LSB of the head.next is set by
31 * mm_take_all_locks() _after_ taking the above lock. So the
32 * head must only be read/written after taking the above lock
33 * to be sure to see a valid next pointer. The LSB bit itself
34 * is serialized by a system wide lock only visible to
35 * mm_take_all_locks() (mm_all_locks_mutex).
36 */
29 struct list_head head; /* List of private "related" vmas */ 37 struct list_head head; /* List of private "related" vmas */
30}; 38};
31 39
diff --git a/init/Kconfig b/init/Kconfig
index 43d6989c275f..250e02c8f8f9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -802,6 +802,10 @@ config PROC_PAGE_MONITOR
802 802
803endmenu # General setup 803endmenu # General setup
804 804
805config HAVE_GENERIC_DMA_COHERENT
806 bool
807 default n
808
805config SLABINFO 809config SLABINFO
806 bool 810 bool
807 depends on PROC_FS 811 depends on PROC_FS
diff --git a/kernel/Makefile b/kernel/Makefile
index 54f69837d35a..4e1d7df7c3e2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
84obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 84obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
85obj-$(CONFIG_MARKERS) += marker.o 85obj-$(CONFIG_MARKERS) += marker.o
86obj-$(CONFIG_LATENCYTOP) += latencytop.o 86obj-$(CONFIG_LATENCYTOP) += latencytop.o
87obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
87obj-$(CONFIG_FTRACE) += trace/ 88obj-$(CONFIG_FTRACE) += trace/
88obj-$(CONFIG_TRACING) += trace/ 89obj-$(CONFIG_TRACING) += trace/
89obj-$(CONFIG_SMP) += sched_cpupri.o 90obj-$(CONFIG_SMP) += sched_cpupri.o
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
new file mode 100644
index 000000000000..7517115a8cce
--- /dev/null
+++ b/kernel/dma-coherent.c
@@ -0,0 +1,154 @@
1/*
2 * Coherent per-device memory handling.
3 * Borrowed from i386
4 */
5#include <linux/kernel.h>
6#include <linux/dma-mapping.h>
7
8struct dma_coherent_mem {
9 void *virt_base;
10 u32 device_base;
11 int size;
12 int flags;
13 unsigned long *bitmap;
14};
15
16int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
17 dma_addr_t device_addr, size_t size, int flags)
18{
19 void __iomem *mem_base = NULL;
20 int pages = size >> PAGE_SHIFT;
21 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
22
23 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
24 goto out;
25 if (!size)
26 goto out;
27 if (dev->dma_mem)
28 goto out;
29
30 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
31
32 mem_base = ioremap(bus_addr, size);
33 if (!mem_base)
34 goto out;
35
36 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
37 if (!dev->dma_mem)
38 goto out;
39 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
40 if (!dev->dma_mem->bitmap)
41 goto free1_out;
42
43 dev->dma_mem->virt_base = mem_base;
44 dev->dma_mem->device_base = device_addr;
45 dev->dma_mem->size = pages;
46 dev->dma_mem->flags = flags;
47
48 if (flags & DMA_MEMORY_MAP)
49 return DMA_MEMORY_MAP;
50
51 return DMA_MEMORY_IO;
52
53 free1_out:
54 kfree(dev->dma_mem);
55 out:
56 if (mem_base)
57 iounmap(mem_base);
58 return 0;
59}
60EXPORT_SYMBOL(dma_declare_coherent_memory);
61
62void dma_release_declared_memory(struct device *dev)
63{
64 struct dma_coherent_mem *mem = dev->dma_mem;
65
66 if (!mem)
67 return;
68 dev->dma_mem = NULL;
69 iounmap(mem->virt_base);
70 kfree(mem->bitmap);
71 kfree(mem);
72}
73EXPORT_SYMBOL(dma_release_declared_memory);
74
75void *dma_mark_declared_memory_occupied(struct device *dev,
76 dma_addr_t device_addr, size_t size)
77{
78 struct dma_coherent_mem *mem = dev->dma_mem;
79 int pos, err;
80 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
81
82 pages >>= PAGE_SHIFT;
83
84 if (!mem)
85 return ERR_PTR(-EINVAL);
86
87 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
88 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
89 if (err != 0)
90 return ERR_PTR(err);
91 return mem->virt_base + (pos << PAGE_SHIFT);
92}
93EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
94
95/**
96 * Try to allocate memory from the per-device coherent area.
97 *
98 * @dev: device from which we allocate memory
99 * @size: size of requested memory area
100 * @dma_handle: This will be filled with the correct dma handle
101 * @ret: This pointer will be filled with the virtual address
102 * to allocated area.
103 *
104 * This function should be only called from per-arch %dma_alloc_coherent()
105 * to support allocation from per-device coherent memory pools.
106 *
107 * Returns 0 if dma_alloc_coherent should continue with allocating from
108 * generic memory areas, or !0 if dma_alloc_coherent should return %ret.
109 */
110int dma_alloc_from_coherent(struct device *dev, ssize_t size,
111 dma_addr_t *dma_handle, void **ret)
112{
113 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
114 int order = get_order(size);
115
116 if (mem) {
117 int page = bitmap_find_free_region(mem->bitmap, mem->size,
118 order);
119 if (page >= 0) {
120 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
121 *ret = mem->virt_base + (page << PAGE_SHIFT);
122 memset(*ret, 0, size);
123 } else if (mem->flags & DMA_MEMORY_EXCLUSIVE)
124 *ret = NULL;
125 }
126 return (mem != NULL);
127}
128
129/**
130 * Try to free the memory allocated from per-device coherent memory pool.
131 * @dev: device from which the memory was allocated
132 * @order: the order of pages allocated
133 * @vaddr: virtual address of allocated pages
134 *
135 * This checks whether the memory was allocated from the per-device
136 * coherent memory pool and if so, releases that memory.
137 *
138 * Returns 1 if we correctly released the memory, or 0 if
139 * %dma_release_coherent() should proceed with releasing memory from
140 * generic pools.
141 */
142int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
143{
144 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
145
146 if (mem && vaddr >= mem->virt_base && vaddr <
147 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
148 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
149
150 bitmap_release_region(mem->bitmap, page, order);
151 return 1;
152 }
153 return 0;
154}
diff --git a/kernel/fork.c b/kernel/fork.c
index 8214ba7c8bb1..7ce2ebe84796 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
27#include <linux/key.h> 27#include <linux/key.h>
28#include <linux/binfmts.h> 28#include <linux/binfmts.h>
29#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 31#include <linux/fs.h>
31#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
32#include <linux/capability.h> 33#include <linux/capability.h>
@@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
414 415
415 if (likely(!mm_alloc_pgd(mm))) { 416 if (likely(!mm_alloc_pgd(mm))) {
416 mm->def_flags = 0; 417 mm->def_flags = 0;
418 mmu_notifier_mm_init(mm);
417 return mm; 419 return mm;
418 } 420 }
419 421
@@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
446 BUG_ON(mm == &init_mm); 448 BUG_ON(mm == &init_mm);
447 mm_free_pgd(mm); 449 mm_free_pgd(mm);
448 destroy_context(mm); 450 destroy_context(mm);
451 mmu_notifier_mm_destroy(mm);
449 free_mm(mm); 452 free_mm(mm);
450} 453}
451EXPORT_SYMBOL_GPL(__mmdrop); 454EXPORT_SYMBOL_GPL(__mmdrop);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..889ddce2021e 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
80 } 80 }
81} 81}
82EXPORT_SYMBOL(iommu_area_free); 82EXPORT_SYMBOL(iommu_area_free);
83
84unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
85{
86 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
87
88 return size >> PAGE_SHIFT;
89}
90EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 35136671b215..26187edcc7ea 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -15,7 +15,6 @@
15#include <linux/module.h> 15#include <linux/module.h>
16 16
17static DEFINE_SPINLOCK(ratelimit_lock); 17static DEFINE_SPINLOCK(ratelimit_lock);
18static unsigned long flags;
19 18
20/* 19/*
21 * __ratelimit - rate limiting 20 * __ratelimit - rate limiting
@@ -26,6 +25,8 @@ static unsigned long flags;
26 */ 25 */
27int __ratelimit(struct ratelimit_state *rs) 26int __ratelimit(struct ratelimit_state *rs)
28{ 27{
28 unsigned long flags;
29
29 if (!rs->interval) 30 if (!rs->interval)
30 return 1; 31 return 1;
31 32
diff --git a/mm/Kconfig b/mm/Kconfig
index efee5d379df4..446c6588c753 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -208,3 +208,6 @@ config NR_QUICK
208config VIRT_TO_BUS 208config VIRT_TO_BUS
209 def_bool y 209 def_bool y
210 depends on !ARCH_NO_VIRT_TO_BUS 210 depends on !ARCH_NO_VIRT_TO_BUS
211
212config MMU_NOTIFIER
213 bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef1..da4ccf015aea 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 26obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
27obj-$(CONFIG_SLOB) += slob.o 27obj-$(CONFIG_SLOB) += slob.o
28obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
28obj-$(CONFIG_SLAB) += slab.o 29obj-$(CONFIG_SLAB) += slab.o
29obj-$(CONFIG_SLUB) += slub.o 30obj-$(CONFIG_SLUB) += slub.o
30obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 31obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 5de7633e1dbe..42bbc6909ba4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1023,8 +1023,17 @@ find_page:
1023 ra, filp, page, 1023 ra, filp, page,
1024 index, last_index - index); 1024 index, last_index - index);
1025 } 1025 }
1026 if (!PageUptodate(page)) 1026 if (!PageUptodate(page)) {
1027 goto page_not_up_to_date; 1027 if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
1028 !mapping->a_ops->is_partially_uptodate)
1029 goto page_not_up_to_date;
1030 if (TestSetPageLocked(page))
1031 goto page_not_up_to_date;
1032 if (!mapping->a_ops->is_partially_uptodate(page,
1033 desc, offset))
1034 goto page_not_up_to_date_locked;
1035 unlock_page(page);
1036 }
1028page_ok: 1037page_ok:
1029 /* 1038 /*
1030 * i_size must be checked after we know the page is Uptodate. 1039 * i_size must be checked after we know the page is Uptodate.
@@ -1094,6 +1103,7 @@ page_not_up_to_date:
1094 if (lock_page_killable(page)) 1103 if (lock_page_killable(page))
1095 goto readpage_eio; 1104 goto readpage_eio;
1096 1105
1106page_not_up_to_date_locked:
1097 /* Did it get truncated before we got the lock? */ 1107 /* Did it get truncated before we got the lock? */
1098 if (!page->mapping) { 1108 if (!page->mapping) {
1099 unlock_page(page); 1109 unlock_page(page);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 98a3f31ccd6a..380ab402d711 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/uio.h> 14#include <linux/uio.h>
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/mmu_notifier.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
18#include <asm/io.h> 19#include <asm/io.h>
@@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
188 if (pte) { 189 if (pte) {
189 /* Nuke the page table entry. */ 190 /* Nuke the page table entry. */
190 flush_cache_page(vma, address, pte_pfn(*pte)); 191 flush_cache_page(vma, address, pte_pfn(*pte));
191 pteval = ptep_clear_flush(vma, address, pte); 192 pteval = ptep_clear_flush_notify(vma, address, pte);
192 page_remove_rmap(page, vma); 193 page_remove_rmap(page, vma);
193 dec_mm_counter(mm, file_rss); 194 dec_mm_counter(mm, file_rss);
194 BUG_ON(pte_dirty(pteval)); 195 BUG_ON(pte_dirty(pteval));
diff --git a/mm/fremap.c b/mm/fremap.c
index 07a9c82ce1a3..7881638e4a12 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -15,6 +15,7 @@
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/mmu_notifier.h>
18 19
19#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
20#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
@@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
214 spin_unlock(&mapping->i_mmap_lock); 215 spin_unlock(&mapping->i_mmap_lock);
215 } 216 }
216 217
218 mmu_notifier_invalidate_range_start(mm, start, start + size);
217 err = populate_range(mm, vma, start, size, pgoff); 219 err = populate_range(mm, vma, start, size, pgoff);
220 mmu_notifier_invalidate_range_end(mm, start, start + size);
218 if (!err && !(flags & MAP_NONBLOCK)) { 221 if (!err && !(flags & MAP_NONBLOCK)) {
219 if (unlikely(has_write_lock)) { 222 if (unlikely(has_write_lock)) {
220 downgrade_write(&mm->mmap_sem); 223 downgrade_write(&mm->mmap_sem);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3be79dc18c5c..254ce2b90158 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/sysctl.h> 10#include <linux/sysctl.h>
11#include <linux/highmem.h> 11#include <linux/highmem.h>
12#include <linux/mmu_notifier.h>
12#include <linux/nodemask.h> 13#include <linux/nodemask.h>
13#include <linux/pagemap.h> 14#include <linux/pagemap.h>
14#include <linux/mempolicy.h> 15#include <linux/mempolicy.h>
@@ -19,6 +20,7 @@
19 20
20#include <asm/page.h> 21#include <asm/page.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/io.h>
22 24
23#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
24#include "internal.h" 26#include "internal.h"
@@ -1672,6 +1674,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1672 BUG_ON(start & ~huge_page_mask(h)); 1674 BUG_ON(start & ~huge_page_mask(h));
1673 BUG_ON(end & ~huge_page_mask(h)); 1675 BUG_ON(end & ~huge_page_mask(h));
1674 1676
1677 mmu_notifier_invalidate_range_start(mm, start, end);
1675 spin_lock(&mm->page_table_lock); 1678 spin_lock(&mm->page_table_lock);
1676 for (address = start; address < end; address += sz) { 1679 for (address = start; address < end; address += sz) {
1677 ptep = huge_pte_offset(mm, address); 1680 ptep = huge_pte_offset(mm, address);
@@ -1713,6 +1716,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1713 } 1716 }
1714 spin_unlock(&mm->page_table_lock); 1717 spin_unlock(&mm->page_table_lock);
1715 flush_tlb_range(vma, start, end); 1718 flush_tlb_range(vma, start, end);
1719 mmu_notifier_invalidate_range_end(mm, start, end);
1716 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1720 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1717 list_del(&page->lru); 1721 list_del(&page->lru);
1718 put_page(page); 1722 put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index a8ca04faaea6..67f0ab9077d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/mmu_notifier.h>
54 55
55#include <asm/pgalloc.h> 56#include <asm/pgalloc.h>
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
@@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
652 unsigned long next; 653 unsigned long next;
653 unsigned long addr = vma->vm_start; 654 unsigned long addr = vma->vm_start;
654 unsigned long end = vma->vm_end; 655 unsigned long end = vma->vm_end;
656 int ret;
655 657
656 /* 658 /*
657 * Don't copy ptes where a page fault will fill them correctly. 659 * Don't copy ptes where a page fault will fill them correctly.
@@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
667 if (is_vm_hugetlb_page(vma)) 669 if (is_vm_hugetlb_page(vma))
668 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 670 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
669 671
672 /*
673 * We need to invalidate the secondary MMU mappings only when
674 * there could be a permission downgrade on the ptes of the
675 * parent mm. And a permission downgrade will only happen if
676 * is_cow_mapping() returns true.
677 */
678 if (is_cow_mapping(vma->vm_flags))
679 mmu_notifier_invalidate_range_start(src_mm, addr, end);
680
681 ret = 0;
670 dst_pgd = pgd_offset(dst_mm, addr); 682 dst_pgd = pgd_offset(dst_mm, addr);
671 src_pgd = pgd_offset(src_mm, addr); 683 src_pgd = pgd_offset(src_mm, addr);
672 do { 684 do {
673 next = pgd_addr_end(addr, end); 685 next = pgd_addr_end(addr, end);
674 if (pgd_none_or_clear_bad(src_pgd)) 686 if (pgd_none_or_clear_bad(src_pgd))
675 continue; 687 continue;
676 if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 688 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
677 vma, addr, next)) 689 vma, addr, next))) {
678 return -ENOMEM; 690 ret = -ENOMEM;
691 break;
692 }
679 } while (dst_pgd++, src_pgd++, addr = next, addr != end); 693 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
680 return 0; 694
695 if (is_cow_mapping(vma->vm_flags))
696 mmu_notifier_invalidate_range_end(src_mm,
697 vma->vm_start, end);
698 return ret;
681} 699}
682 700
683static unsigned long zap_pte_range(struct mmu_gather *tlb, 701static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
881 unsigned long start = start_addr; 899 unsigned long start = start_addr;
882 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; 900 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
883 int fullmm = (*tlbp)->fullmm; 901 int fullmm = (*tlbp)->fullmm;
902 struct mm_struct *mm = vma->vm_mm;
884 903
904 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
885 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { 905 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
886 unsigned long end; 906 unsigned long end;
887 907
@@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
946 } 966 }
947 } 967 }
948out: 968out:
969 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
949 return start; /* which is now the end (or restart) address */ 970 return start; /* which is now the end (or restart) address */
950} 971}
951 972
@@ -1616,10 +1637,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1616{ 1637{
1617 pgd_t *pgd; 1638 pgd_t *pgd;
1618 unsigned long next; 1639 unsigned long next;
1619 unsigned long end = addr + size; 1640 unsigned long start = addr, end = addr + size;
1620 int err; 1641 int err;
1621 1642
1622 BUG_ON(addr >= end); 1643 BUG_ON(addr >= end);
1644 mmu_notifier_invalidate_range_start(mm, start, end);
1623 pgd = pgd_offset(mm, addr); 1645 pgd = pgd_offset(mm, addr);
1624 do { 1646 do {
1625 next = pgd_addr_end(addr, end); 1647 next = pgd_addr_end(addr, end);
@@ -1627,6 +1649,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1627 if (err) 1649 if (err)
1628 break; 1650 break;
1629 } while (pgd++, addr = next, addr != end); 1651 } while (pgd++, addr = next, addr != end);
1652 mmu_notifier_invalidate_range_end(mm, start, end);
1630 return err; 1653 return err;
1631} 1654}
1632EXPORT_SYMBOL_GPL(apply_to_page_range); 1655EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1839,7 +1862,7 @@ gotten:
1839 * seen in the presence of one thread doing SMC and another 1862 * seen in the presence of one thread doing SMC and another
1840 * thread doing COW. 1863 * thread doing COW.
1841 */ 1864 */
1842 ptep_clear_flush(vma, address, page_table); 1865 ptep_clear_flush_notify(vma, address, page_table);
1843 set_pte_at(mm, address, page_table, entry); 1866 set_pte_at(mm, address, page_table, entry);
1844 update_mmu_cache(vma, address, entry); 1867 update_mmu_cache(vma, address, entry);
1845 lru_cache_add_active(new_page); 1868 lru_cache_add_active(new_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd5..245c3d69067b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,6 +26,7 @@
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/rmap.h> 28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
@@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
2061 2062
2062 /* mm's last user has gone, and its about to be pulled down */ 2063 /* mm's last user has gone, and its about to be pulled down */
2063 arch_exit_mmap(mm); 2064 arch_exit_mmap(mm);
2065 mmu_notifier_release(mm);
2064 2066
2065 lru_add_drain(); 2067 lru_add_drain();
2066 flush_cache_mm(mm); 2068 flush_cache_mm(mm);
@@ -2268,3 +2270,161 @@ int install_special_mapping(struct mm_struct *mm,
2268 2270
2269 return 0; 2271 return 0;
2270} 2272}
2273
2274static DEFINE_MUTEX(mm_all_locks_mutex);
2275
2276static void vm_lock_anon_vma(struct anon_vma *anon_vma)
2277{
2278 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2279 /*
2280 * The LSB of head.next can't change from under us
2281 * because we hold the mm_all_locks_mutex.
2282 */
2283 spin_lock(&anon_vma->lock);
2284 /*
2285 * We can safely modify head.next after taking the
2286 * anon_vma->lock. If some other vma in this mm shares
2287 * the same anon_vma we won't take it again.
2288 *
2289 * No need of atomic instructions here, head.next
2290 * can't change from under us thanks to the
2291 * anon_vma->lock.
2292 */
2293 if (__test_and_set_bit(0, (unsigned long *)
2294 &anon_vma->head.next))
2295 BUG();
2296 }
2297}
2298
2299static void vm_lock_mapping(struct address_space *mapping)
2300{
2301 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2302 /*
2303 * AS_MM_ALL_LOCKS can't change from under us because
2304 * we hold the mm_all_locks_mutex.
2305 *
2306 * Operations on ->flags have to be atomic because
2307 * even if AS_MM_ALL_LOCKS is stable thanks to the
2308 * mm_all_locks_mutex, there may be other cpus
2309 * changing other bitflags in parallel to us.
2310 */
2311 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2312 BUG();
2313 spin_lock(&mapping->i_mmap_lock);
2314 }
2315}
2316
2317/*
2318 * This operation locks against the VM for all pte/vma/mm related
2319 * operations that could ever happen on a certain mm. This includes
2320 * vmtruncate, try_to_unmap, and all page faults.
2321 *
2322 * The caller must take the mmap_sem in write mode before calling
2323 * mm_take_all_locks(). The caller isn't allowed to release the
2324 * mmap_sem until mm_drop_all_locks() returns.
2325 *
2326 * mmap_sem in write mode is required in order to block all operations
2327 * that could modify pagetables and free pages without need of
2328 * altering the vma layout (for example populate_range() with
2329 * nonlinear vmas). It's also needed in write mode to avoid new
2330 * anon_vmas to be associated with existing vmas.
2331 *
2332 * A single task can't take more than one mm_take_all_locks() in a row
2333 * or it would deadlock.
2334 *
2335 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
2336 * mapping->flags avoid to take the same lock twice, if more than one
2337 * vma in this mm is backed by the same anon_vma or address_space.
2338 *
2339 * We can take all the locks in random order because the VM code
2340 * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
2341 * takes more than one of them in a row. Secondly we're protected
2342 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
2343 *
2344 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
2345 * that may have to take thousand of locks.
2346 *
2347 * mm_take_all_locks() can fail if it's interrupted by signals.
2348 */
2349int mm_take_all_locks(struct mm_struct *mm)
2350{
2351 struct vm_area_struct *vma;
2352 int ret = -EINTR;
2353
2354 BUG_ON(down_read_trylock(&mm->mmap_sem));
2355
2356 mutex_lock(&mm_all_locks_mutex);
2357
2358 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2359 if (signal_pending(current))
2360 goto out_unlock;
2361 if (vma->anon_vma)
2362 vm_lock_anon_vma(vma->anon_vma);
2363 if (vma->vm_file && vma->vm_file->f_mapping)
2364 vm_lock_mapping(vma->vm_file->f_mapping);
2365 }
2366 ret = 0;
2367
2368out_unlock:
2369 if (ret)
2370 mm_drop_all_locks(mm);
2371
2372 return ret;
2373}
2374
2375static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2376{
2377 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2378 /*
2379 * The LSB of head.next can't change to 0 from under
2380 * us because we hold the mm_all_locks_mutex.
2381 *
2382 * We must however clear the bitflag before unlocking
2383 * the vma so the users using the anon_vma->head will
2384 * never see our bitflag.
2385 *
2386 * No need of atomic instructions here, head.next
2387 * can't change from under us until we release the
2388 * anon_vma->lock.
2389 */
2390 if (!__test_and_clear_bit(0, (unsigned long *)
2391 &anon_vma->head.next))
2392 BUG();
2393 spin_unlock(&anon_vma->lock);
2394 }
2395}
2396
2397static void vm_unlock_mapping(struct address_space *mapping)
2398{
2399 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2400 /*
2401 * AS_MM_ALL_LOCKS can't change to 0 from under us
2402 * because we hold the mm_all_locks_mutex.
2403 */
2404 spin_unlock(&mapping->i_mmap_lock);
2405 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2406 &mapping->flags))
2407 BUG();
2408 }
2409}
2410
2411/*
2412 * The mmap_sem cannot be released by the caller until
2413 * mm_drop_all_locks() returns.
2414 */
2415void mm_drop_all_locks(struct mm_struct *mm)
2416{
2417 struct vm_area_struct *vma;
2418
2419 BUG_ON(down_read_trylock(&mm->mmap_sem));
2420 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2421
2422 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2423 if (vma->anon_vma)
2424 vm_unlock_anon_vma(vma->anon_vma);
2425 if (vma->vm_file && vma->vm_file->f_mapping)
2426 vm_unlock_mapping(vma->vm_file->f_mapping);
2427 }
2428
2429 mutex_unlock(&mm_all_locks_mutex);
2430}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
new file mode 100644
index 000000000000..5f4ef0250bee
--- /dev/null
+++ b/mm/mmu_notifier.c
@@ -0,0 +1,277 @@
1/*
2 * linux/mm/mmu_notifier.c
3 *
4 * Copyright (C) 2008 Qumranet, Inc.
5 * Copyright (C) 2008 SGI
6 * Christoph Lameter <clameter@sgi.com>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2. See
9 * the COPYING file in the top-level directory.
10 */
11
12#include <linux/rculist.h>
13#include <linux/mmu_notifier.h>
14#include <linux/module.h>
15#include <linux/mm.h>
16#include <linux/err.h>
17#include <linux/rcupdate.h>
18#include <linux/sched.h>
19
20/*
21 * This function can't run concurrently against mmu_notifier_register
22 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
23 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
24 * in parallel despite there being no task using this mm any more,
25 * through the vmas outside of the exit_mmap context, such as with
26 * vmtruncate. This serializes against mmu_notifier_unregister with
27 * the mmu_notifier_mm->lock in addition to RCU and it serializes
28 * against the other mmu notifiers with RCU. struct mmu_notifier_mm
29 * can't go away from under us as exit_mmap holds an mm_count pin
30 * itself.
31 */
32void __mmu_notifier_release(struct mm_struct *mm)
33{
34 struct mmu_notifier *mn;
35
36 spin_lock(&mm->mmu_notifier_mm->lock);
37 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
38 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
39 struct mmu_notifier,
40 hlist);
41 /*
42 * We arrived before mmu_notifier_unregister so
43 * mmu_notifier_unregister will do nothing other than
44 * to wait ->release to finish and
45 * mmu_notifier_unregister to return.
46 */
47 hlist_del_init_rcu(&mn->hlist);
48 /*
49 * RCU here will block mmu_notifier_unregister until
50 * ->release returns.
51 */
52 rcu_read_lock();
53 spin_unlock(&mm->mmu_notifier_mm->lock);
54 /*
55 * if ->release runs before mmu_notifier_unregister it
56 * must be handled as it's the only way for the driver
57 * to flush all existing sptes and stop the driver
58 * from establishing any more sptes before all the
59 * pages in the mm are freed.
60 */
61 if (mn->ops->release)
62 mn->ops->release(mn, mm);
63 rcu_read_unlock();
64 spin_lock(&mm->mmu_notifier_mm->lock);
65 }
66 spin_unlock(&mm->mmu_notifier_mm->lock);
67
68 /*
69 * synchronize_rcu here prevents mmu_notifier_release to
70 * return to exit_mmap (which would proceed freeing all pages
71 * in the mm) until the ->release method returns, if it was
72 * invoked by mmu_notifier_unregister.
73 *
74 * The mmu_notifier_mm can't go away from under us because one
75 * mm_count is hold by exit_mmap.
76 */
77 synchronize_rcu();
78}
79
80/*
81 * If no young bitflag is supported by the hardware, ->clear_flush_young can
82 * unmap the address and return 1 or 0 depending if the mapping previously
83 * existed or not.
84 */
85int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
86 unsigned long address)
87{
88 struct mmu_notifier *mn;
89 struct hlist_node *n;
90 int young = 0;
91
92 rcu_read_lock();
93 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
94 if (mn->ops->clear_flush_young)
95 young |= mn->ops->clear_flush_young(mn, mm, address);
96 }
97 rcu_read_unlock();
98
99 return young;
100}
101
102void __mmu_notifier_invalidate_page(struct mm_struct *mm,
103 unsigned long address)
104{
105 struct mmu_notifier *mn;
106 struct hlist_node *n;
107
108 rcu_read_lock();
109 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
110 if (mn->ops->invalidate_page)
111 mn->ops->invalidate_page(mn, mm, address);
112 }
113 rcu_read_unlock();
114}
115
116void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
117 unsigned long start, unsigned long end)
118{
119 struct mmu_notifier *mn;
120 struct hlist_node *n;
121
122 rcu_read_lock();
123 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
124 if (mn->ops->invalidate_range_start)
125 mn->ops->invalidate_range_start(mn, mm, start, end);
126 }
127 rcu_read_unlock();
128}
129
130void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
131 unsigned long start, unsigned long end)
132{
133 struct mmu_notifier *mn;
134 struct hlist_node *n;
135
136 rcu_read_lock();
137 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
138 if (mn->ops->invalidate_range_end)
139 mn->ops->invalidate_range_end(mn, mm, start, end);
140 }
141 rcu_read_unlock();
142}
143
144static int do_mmu_notifier_register(struct mmu_notifier *mn,
145 struct mm_struct *mm,
146 int take_mmap_sem)
147{
148 struct mmu_notifier_mm *mmu_notifier_mm;
149 int ret;
150
151 BUG_ON(atomic_read(&mm->mm_users) <= 0);
152
153 ret = -ENOMEM;
154 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
155 if (unlikely(!mmu_notifier_mm))
156 goto out;
157
158 if (take_mmap_sem)
159 down_write(&mm->mmap_sem);
160 ret = mm_take_all_locks(mm);
161 if (unlikely(ret))
162 goto out_cleanup;
163
164 if (!mm_has_notifiers(mm)) {
165 INIT_HLIST_HEAD(&mmu_notifier_mm->list);
166 spin_lock_init(&mmu_notifier_mm->lock);
167 mm->mmu_notifier_mm = mmu_notifier_mm;
168 mmu_notifier_mm = NULL;
169 }
170 atomic_inc(&mm->mm_count);
171
172 /*
173 * Serialize the update against mmu_notifier_unregister. A
174 * side note: mmu_notifier_release can't run concurrently with
175 * us because we hold the mm_users pin (either implicitly as
176 * current->mm or explicitly with get_task_mm() or similar).
177 * We can't race against any other mmu notifier method either
178 * thanks to mm_take_all_locks().
179 */
180 spin_lock(&mm->mmu_notifier_mm->lock);
181 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
182 spin_unlock(&mm->mmu_notifier_mm->lock);
183
184 mm_drop_all_locks(mm);
185out_cleanup:
186 if (take_mmap_sem)
187 up_write(&mm->mmap_sem);
188 /* kfree() does nothing if mmu_notifier_mm is NULL */
189 kfree(mmu_notifier_mm);
190out:
191 BUG_ON(atomic_read(&mm->mm_users) <= 0);
192 return ret;
193}
194
195/*
196 * Must not hold mmap_sem nor any other VM related lock when calling
197 * this registration function. Must also ensure mm_users can't go down
198 * to zero while this runs to avoid races with mmu_notifier_release,
199 * so mm has to be current->mm or the mm should be pinned safely such
200 * as with get_task_mm(). If the mm is not current->mm, the mm_users
201 * pin should be released by calling mmput after mmu_notifier_register
202 * returns. mmu_notifier_unregister must be always called to
203 * unregister the notifier. mm_count is automatically pinned to allow
204 * mmu_notifier_unregister to safely run at any time later, before or
205 * after exit_mmap. ->release will always be called before exit_mmap
206 * frees the pages.
207 */
208int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
209{
210 return do_mmu_notifier_register(mn, mm, 1);
211}
212EXPORT_SYMBOL_GPL(mmu_notifier_register);
213
214/*
215 * Same as mmu_notifier_register but here the caller must hold the
216 * mmap_sem in write mode.
217 */
218int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
219{
220 return do_mmu_notifier_register(mn, mm, 0);
221}
222EXPORT_SYMBOL_GPL(__mmu_notifier_register);
223
224/* this is called after the last mmu_notifier_unregister() returned */
225void __mmu_notifier_mm_destroy(struct mm_struct *mm)
226{
227 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
228 kfree(mm->mmu_notifier_mm);
229 mm->mmu_notifier_mm = LIST_POISON1; /* debug */
230}
231
232/*
233 * This releases the mm_count pin automatically and frees the mm
234 * structure if it was the last user of it. It serializes against
235 * running mmu notifiers with RCU and against mmu_notifier_unregister
236 * with the unregister lock + RCU. All sptes must be dropped before
237 * calling mmu_notifier_unregister. ->release or any other notifier
238 * method may be invoked concurrently with mmu_notifier_unregister,
239 * and only after mmu_notifier_unregister returned we're guaranteed
240 * that ->release or any other method can't run anymore.
241 */
242void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
243{
244 BUG_ON(atomic_read(&mm->mm_count) <= 0);
245
246 spin_lock(&mm->mmu_notifier_mm->lock);
247 if (!hlist_unhashed(&mn->hlist)) {
248 hlist_del_rcu(&mn->hlist);
249
250 /*
251 * RCU here will force exit_mmap to wait ->release to finish
252 * before freeing the pages.
253 */
254 rcu_read_lock();
255 spin_unlock(&mm->mmu_notifier_mm->lock);
256 /*
257 * exit_mmap will block in mmu_notifier_release to
258 * guarantee ->release is called before freeing the
259 * pages.
260 */
261 if (mn->ops->release)
262 mn->ops->release(mn, mm);
263 rcu_read_unlock();
264 } else
265 spin_unlock(&mm->mmu_notifier_mm->lock);
266
267 /*
268 * Wait any running method to finish, of course including
269 * ->release if it was run by mmu_notifier_relase instead of us.
270 */
271 synchronize_rcu();
272
273 BUG_ON(atomic_read(&mm->mm_count) <= 0);
274
275 mmdrop(mm);
276}
277EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd645a3b0a0..fded06f923f4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -21,6 +21,7 @@
21#include <linux/syscalls.h> 21#include <linux/syscalls.h>
22#include <linux/swap.h> 22#include <linux/swap.h>
23#include <linux/swapops.h> 23#include <linux/swapops.h>
24#include <linux/mmu_notifier.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include <asm/pgtable.h> 26#include <asm/pgtable.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
@@ -203,10 +204,12 @@ success:
203 dirty_accountable = 1; 204 dirty_accountable = 1;
204 } 205 }
205 206
207 mmu_notifier_invalidate_range_start(mm, start, end);
206 if (is_vm_hugetlb_page(vma)) 208 if (is_vm_hugetlb_page(vma))
207 hugetlb_change_protection(vma, start, end, vma->vm_page_prot); 209 hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
208 else 210 else
209 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); 211 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
212 mmu_notifier_invalidate_range_end(mm, start, end);
210 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 213 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
211 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 214 vm_stat_account(mm, newflags, vma->vm_file, nrpages);
212 return 0; 215 return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 08e3c7f2bd15..1a7743923c8c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -18,6 +18,7 @@
18#include <linux/highmem.h> 18#include <linux/highmem.h>
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/syscalls.h> 20#include <linux/syscalls.h>
21#include <linux/mmu_notifier.h>
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23#include <asm/cacheflush.h> 24#include <asm/cacheflush.h>
@@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
74 struct mm_struct *mm = vma->vm_mm; 75 struct mm_struct *mm = vma->vm_mm;
75 pte_t *old_pte, *new_pte, pte; 76 pte_t *old_pte, *new_pte, pte;
76 spinlock_t *old_ptl, *new_ptl; 77 spinlock_t *old_ptl, *new_ptl;
78 unsigned long old_start;
77 79
80 old_start = old_addr;
81 mmu_notifier_invalidate_range_start(vma->vm_mm,
82 old_start, old_end);
78 if (vma->vm_file) { 83 if (vma->vm_file) {
79 /* 84 /*
80 * Subtle point from Rajesh Venkatasubramanian: before 85 * Subtle point from Rajesh Venkatasubramanian: before
@@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
116 pte_unmap_unlock(old_pte - 1, old_ptl); 121 pte_unmap_unlock(old_pte - 1, old_ptl);
117 if (mapping) 122 if (mapping)
118 spin_unlock(&mapping->i_mmap_lock); 123 spin_unlock(&mapping->i_mmap_lock);
124 mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
119} 125}
120 126
121#define LATENCY_LIMIT (64 * PAGE_SIZE) 127#define LATENCY_LIMIT (64 * PAGE_SIZE)
diff --git a/mm/rmap.c b/mm/rmap.c
index 39ae5a9bf382..99bc3f9cd796 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kallsyms.h> 50#include <linux/kallsyms.h>
51#include <linux/memcontrol.h> 51#include <linux/memcontrol.h>
52#include <linux/mmu_notifier.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
54 55
@@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page,
287 if (vma->vm_flags & VM_LOCKED) { 288 if (vma->vm_flags & VM_LOCKED) {
288 referenced++; 289 referenced++;
289 *mapcount = 1; /* break early from loop */ 290 *mapcount = 1; /* break early from loop */
290 } else if (ptep_clear_flush_young(vma, address, pte)) 291 } else if (ptep_clear_flush_young_notify(vma, address, pte))
291 referenced++; 292 referenced++;
292 293
293 /* Pretend the page is referenced if the task has the 294 /* Pretend the page is referenced if the task has the
@@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
457 pte_t entry; 458 pte_t entry;
458 459
459 flush_cache_page(vma, address, pte_pfn(*pte)); 460 flush_cache_page(vma, address, pte_pfn(*pte));
460 entry = ptep_clear_flush(vma, address, pte); 461 entry = ptep_clear_flush_notify(vma, address, pte);
461 entry = pte_wrprotect(entry); 462 entry = pte_wrprotect(entry);
462 entry = pte_mkclean(entry); 463 entry = pte_mkclean(entry);
463 set_pte_at(mm, address, pte, entry); 464 set_pte_at(mm, address, pte, entry);
@@ -705,14 +706,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
705 * skipped over this mm) then we should reactivate it. 706 * skipped over this mm) then we should reactivate it.
706 */ 707 */
707 if (!migration && ((vma->vm_flags & VM_LOCKED) || 708 if (!migration && ((vma->vm_flags & VM_LOCKED) ||
708 (ptep_clear_flush_young(vma, address, pte)))) { 709 (ptep_clear_flush_young_notify(vma, address, pte)))) {
709 ret = SWAP_FAIL; 710 ret = SWAP_FAIL;
710 goto out_unmap; 711 goto out_unmap;
711 } 712 }
712 713
713 /* Nuke the page table entry. */ 714 /* Nuke the page table entry. */
714 flush_cache_page(vma, address, page_to_pfn(page)); 715 flush_cache_page(vma, address, page_to_pfn(page));
715 pteval = ptep_clear_flush(vma, address, pte); 716 pteval = ptep_clear_flush_notify(vma, address, pte);
716 717
717 /* Move the dirty bit to the physical page now the pte is gone. */ 718 /* Move the dirty bit to the physical page now the pte is gone. */
718 if (pte_dirty(pteval)) 719 if (pte_dirty(pteval))
@@ -837,12 +838,12 @@ static void try_to_unmap_cluster(unsigned long cursor,
837 page = vm_normal_page(vma, address, *pte); 838 page = vm_normal_page(vma, address, *pte);
838 BUG_ON(!page || PageAnon(page)); 839 BUG_ON(!page || PageAnon(page));
839 840
840 if (ptep_clear_flush_young(vma, address, pte)) 841 if (ptep_clear_flush_young_notify(vma, address, pte))
841 continue; 842 continue;
842 843
843 /* Nuke the page table entry. */ 844 /* Nuke the page table entry. */
844 flush_cache_page(vma, address, pte_pfn(*pte)); 845 flush_cache_page(vma, address, pte_pfn(*pte));
845 pteval = ptep_clear_flush(vma, address, pte); 846 pteval = ptep_clear_flush_notify(vma, address, pte);
846 847
847 /* If nonlinear, store the file page offset in the pte. */ 848 /* If nonlinear, store the file page offset in the pte. */
848 if (page->index != linear_page_index(vma, address)) 849 if (page->index != linear_page_index(vma, address))
diff --git a/mm/shmem.c b/mm/shmem.c
index 952d361774bb..c1e5a3b4f758 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,7 +1513,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1513 inode->i_uid = current->fsuid; 1513 inode->i_uid = current->fsuid;
1514 inode->i_gid = current->fsgid; 1514 inode->i_gid = current->fsgid;
1515 inode->i_blocks = 0; 1515 inode->i_blocks = 0;
1516 inode->i_mapping->a_ops = &shmem_aops;
1517 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; 1516 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1518 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1517 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1519 inode->i_generation = get_seconds(); 1518 inode->i_generation = get_seconds();
@@ -1528,6 +1527,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1528 init_special_inode(inode, mode, dev); 1527 init_special_inode(inode, mode, dev);
1529 break; 1528 break;
1530 case S_IFREG: 1529 case S_IFREG:
1530 inode->i_mapping->a_ops = &shmem_aops;
1531 inode->i_op = &shmem_inode_operations; 1531 inode->i_op = &shmem_inode_operations;
1532 inode->i_fop = &shmem_file_operations; 1532 inode->i_fop = &shmem_file_operations;
1533 mpol_shared_policy_init(&info->policy, 1533 mpol_shared_policy_init(&info->policy,
@@ -1929,6 +1929,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1929 return error; 1929 return error;
1930 } 1930 }
1931 unlock_page(page); 1931 unlock_page(page);
1932 inode->i_mapping->a_ops = &shmem_aops;
1932 inode->i_op = &shmem_symlink_inode_operations; 1933 inode->i_op = &shmem_symlink_inode_operations;
1933 kaddr = kmap_atomic(page, KM_USER0); 1934 kaddr = kmap_atomic(page, KM_USER0);
1934 memcpy(kaddr, symname, len); 1935 memcpy(kaddr, symname, len);