diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2012-06-13 14:46:40 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2012-07-18 16:40:05 -0400 |
commit | bbaa22c3a0d0be4406d26e5a73d1e8e504787986 (patch) | |
tree | 4d00f1bda85d9735c60d7db1cdbdd215d5317ae4 /arch/tile | |
parent | 3e219b91533058e242b78ac08aaa91024dd6f369 (diff) |
tilegx pci: support I/O to arbitrarily-cached pages
The tilegx PCI root complex support (currently only in linux-next)
is limited to pages that are homed on cached in the default manner,
i.e. "hash-for-home". This change supports delivery of I/O data to
pages that are cached in other ways (locally on a particular core,
uncached, user-managed incoherent, etc.).
A large part of the change is supporting flushing pages from cache
on particular homes so that we can transition the data that we are
delivering to or from the device appropriately. The new homecache_finv*
routines handle this.
Some changes to page_table_range_init() were also required to make
the fixmap code work correctly on tilegx; it hadn't been used there
before.
We also remove some stub mark_caches_evicted_*() routines that
were just no-ops anyway.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile')
-rw-r--r-- | arch/tile/include/asm/cache.h | 12 | ||||
-rw-r--r-- | arch/tile/include/asm/fixmap.h | 14 | ||||
-rw-r--r-- | arch/tile/include/asm/homecache.h | 19 | ||||
-rw-r--r-- | arch/tile/include/asm/page.h | 7 | ||||
-rw-r--r-- | arch/tile/kernel/pci-dma.c | 182 | ||||
-rw-r--r-- | arch/tile/mm/homecache.c | 156 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 59 |
7 files changed, 278 insertions, 171 deletions
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 392e5333dd8b..a9a529964e07 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h | |||
@@ -27,11 +27,17 @@ | |||
27 | #define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) | 27 | #define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN. | 30 | * TILEPro I/O is not always coherent (networking typically uses coherent |
31 | * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the | ||
32 | * L2 cacheline size helps ensure that kernel heap allocations are aligned. | ||
33 | * TILE-Gx I/O is always coherent when used on hash-for-home pages. | ||
34 | * | ||
35 | * However, it's possible at runtime to request not to use hash-for-home | ||
36 | * for the kernel heap, in which case the kernel will use flush-and-inval | ||
37 | * to manage coherence. As a result, we use L2_CACHE_BYTES for the | ||
38 | * DMA minimum alignment to avoid false sharing in the kernel heap. | ||
31 | */ | 39 | */ |
32 | #ifndef __tilegx__ | ||
33 | #define ARCH_DMA_MINALIGN L2_CACHE_BYTES | 40 | #define ARCH_DMA_MINALIGN L2_CACHE_BYTES |
34 | #endif | ||
35 | 41 | ||
36 | /* use the cache line size for the L2, which is where it counts */ | 42 | /* use the cache line size for the L2, which is where it counts */ |
37 | #define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT | 43 | #define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT |
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index c66f7933beaa..e16dbf929cb5 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h | |||
@@ -45,15 +45,23 @@ | |||
45 | * | 45 | * |
46 | * TLB entries of such buffers will not be flushed across | 46 | * TLB entries of such buffers will not be flushed across |
47 | * task switches. | 47 | * task switches. |
48 | * | ||
49 | * We don't bother with a FIX_HOLE since above the fixmaps | ||
50 | * is unmapped memory in any case. | ||
51 | */ | 48 | */ |
52 | enum fixed_addresses { | 49 | enum fixed_addresses { |
50 | #ifdef __tilegx__ | ||
51 | /* | ||
52 | * TILEPro has unmapped memory above so the hole isn't needed, | ||
53 | * and in any case the hole pushes us over a single 16MB pmd. | ||
54 | */ | ||
55 | FIX_HOLE, | ||
56 | #endif | ||
53 | #ifdef CONFIG_HIGHMEM | 57 | #ifdef CONFIG_HIGHMEM |
54 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ | 58 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ |
55 | FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, | 59 | FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, |
56 | #endif | 60 | #endif |
61 | #ifdef __tilegx__ /* see homecache.c */ | ||
62 | FIX_HOMECACHE_BEGIN, | ||
63 | FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1, | ||
64 | #endif | ||
57 | __end_of_permanent_fixed_addresses, | 65 | __end_of_permanent_fixed_addresses, |
58 | 66 | ||
59 | /* | 67 | /* |
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index a8243865d49e..7b7771328642 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h | |||
@@ -79,10 +79,17 @@ extern void homecache_change_page_home(struct page *, int order, int home); | |||
79 | /* | 79 | /* |
80 | * Flush a page out of whatever cache(s) it is in. | 80 | * Flush a page out of whatever cache(s) it is in. |
81 | * This is more than just finv, since it properly handles waiting | 81 | * This is more than just finv, since it properly handles waiting |
82 | * for the data to reach memory on tilepro, but it can be quite | 82 | * for the data to reach memory, but it can be quite |
83 | * heavyweight, particularly on hash-for-home memory. | 83 | * heavyweight, particularly on incoherent or immutable memory. |
84 | */ | 84 | */ |
85 | extern void homecache_flush_cache(struct page *, int order); | 85 | extern void homecache_finv_page(struct page *); |
86 | |||
87 | /* | ||
88 | * Flush a page out of the specified home cache. | ||
89 | * Note that the specified home need not be the actual home of the page, | ||
90 | * as for example might be the case when coordinating with I/O devices. | ||
91 | */ | ||
92 | extern void homecache_finv_map_page(struct page *, int home); | ||
86 | 93 | ||
87 | /* | 94 | /* |
88 | * Allocate a page with the given GFP flags, home, and optionally | 95 | * Allocate a page with the given GFP flags, home, and optionally |
@@ -104,10 +111,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, | |||
104 | * routines use homecache_change_page_home() to reset the home | 111 | * routines use homecache_change_page_home() to reset the home |
105 | * back to the default before returning the page to the allocator. | 112 | * back to the default before returning the page to the allocator. |
106 | */ | 113 | */ |
114 | void __homecache_free_pages(struct page *, unsigned int order); | ||
107 | void homecache_free_pages(unsigned long addr, unsigned int order); | 115 | void homecache_free_pages(unsigned long addr, unsigned int order); |
108 | #define homecache_free_page(page) \ | 116 | #define __homecache_free_page(page) __homecache_free_pages((page), 0) |
109 | homecache_free_pages((page), 0) | 117 | #define homecache_free_page(page) homecache_free_pages((page), 0) |
110 | |||
111 | 118 | ||
112 | 119 | ||
113 | /* | 120 | /* |
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 9d9131e5c552..dd033a4fd627 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h | |||
@@ -174,7 +174,9 @@ static inline __attribute_const__ int get_order(unsigned long size) | |||
174 | #define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ | 174 | #define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ |
175 | #define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ | 175 | #define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ |
176 | #define PAGE_OFFSET MEM_HIGH_START | 176 | #define PAGE_OFFSET MEM_HIGH_START |
177 | #define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */ | 177 | #define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */ |
178 | #define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */ | ||
179 | #define _VMALLOC_START FIXADDR_TOP | ||
178 | #define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ | 180 | #define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ |
179 | #define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ | 181 | #define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ |
180 | #define MEM_SV_INTRPT MEM_SV_START | 182 | #define MEM_SV_INTRPT MEM_SV_START |
@@ -185,9 +187,6 @@ static inline __attribute_const__ int get_order(unsigned long size) | |||
185 | /* Highest DTLB address we will use */ | 187 | /* Highest DTLB address we will use */ |
186 | #define KERNEL_HIGH_VADDR MEM_SV_START | 188 | #define KERNEL_HIGH_VADDR MEM_SV_START |
187 | 189 | ||
188 | /* Since we don't currently provide any fixmaps, we use an impossible VA. */ | ||
189 | #define FIXADDR_TOP MEM_HV_START | ||
190 | |||
191 | #else /* !__tilegx__ */ | 190 | #else /* !__tilegx__ */ |
192 | 191 | ||
193 | /* | 192 | /* |
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index b3ed19f8779c..9814d7082f24 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c | |||
@@ -22,9 +22,15 @@ | |||
22 | /* Generic DMA mapping functions: */ | 22 | /* Generic DMA mapping functions: */ |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Allocate what Linux calls "coherent" memory, which for us just | 25 | * Allocate what Linux calls "coherent" memory. On TILEPro this is |
26 | * means uncached. | 26 | * uncached memory; on TILE-Gx it is hash-for-home memory. |
27 | */ | 27 | */ |
28 | #ifdef __tilepro__ | ||
29 | #define PAGE_HOME_DMA PAGE_HOME_UNCACHED | ||
30 | #else | ||
31 | #define PAGE_HOME_DMA PAGE_HOME_HASH | ||
32 | #endif | ||
33 | |||
28 | void *dma_alloc_coherent(struct device *dev, | 34 | void *dma_alloc_coherent(struct device *dev, |
29 | size_t size, | 35 | size_t size, |
30 | dma_addr_t *dma_handle, | 36 | dma_addr_t *dma_handle, |
@@ -48,13 +54,13 @@ void *dma_alloc_coherent(struct device *dev, | |||
48 | if (dma_mask <= DMA_BIT_MASK(32)) | 54 | if (dma_mask <= DMA_BIT_MASK(32)) |
49 | node = 0; | 55 | node = 0; |
50 | 56 | ||
51 | pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED); | 57 | pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA); |
52 | if (pg == NULL) | 58 | if (pg == NULL) |
53 | return NULL; | 59 | return NULL; |
54 | 60 | ||
55 | addr = page_to_phys(pg); | 61 | addr = page_to_phys(pg); |
56 | if (addr + size > dma_mask) { | 62 | if (addr + size > dma_mask) { |
57 | homecache_free_pages(addr, order); | 63 | __homecache_free_pages(pg, order); |
58 | return NULL; | 64 | return NULL; |
59 | } | 65 | } |
60 | 66 | ||
@@ -87,22 +93,110 @@ EXPORT_SYMBOL(dma_free_coherent); | |||
87 | * can count on nothing having been touched. | 93 | * can count on nothing having been touched. |
88 | */ | 94 | */ |
89 | 95 | ||
90 | /* Flush a PA range from cache page by page. */ | 96 | /* Set up a single page for DMA access. */ |
91 | static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) | 97 | static void __dma_prep_page(struct page *page, unsigned long offset, |
98 | size_t size, enum dma_data_direction direction) | ||
92 | { | 99 | { |
93 | struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); | 100 | /* |
94 | size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); | 101 | * Flush the page from cache if necessary. |
102 | * On tilegx, data is delivered to hash-for-home L3; on tilepro, | ||
103 | * data is delivered direct to memory. | ||
104 | * | ||
105 | * NOTE: If we were just doing DMA_TO_DEVICE we could optimize | ||
106 | * this to be a "flush" not a "finv" and keep some of the | ||
107 | * state in cache across the DMA operation, but it doesn't seem | ||
108 | * worth creating the necessary flush_buffer_xxx() infrastructure. | ||
109 | */ | ||
110 | int home = page_home(page); | ||
111 | switch (home) { | ||
112 | case PAGE_HOME_HASH: | ||
113 | #ifdef __tilegx__ | ||
114 | return; | ||
115 | #endif | ||
116 | break; | ||
117 | case PAGE_HOME_UNCACHED: | ||
118 | #ifdef __tilepro__ | ||
119 | return; | ||
120 | #endif | ||
121 | break; | ||
122 | case PAGE_HOME_IMMUTABLE: | ||
123 | /* Should be going to the device only. */ | ||
124 | BUG_ON(direction == DMA_FROM_DEVICE || | ||
125 | direction == DMA_BIDIRECTIONAL); | ||
126 | return; | ||
127 | case PAGE_HOME_INCOHERENT: | ||
128 | /* Incoherent anyway, so no need to work hard here. */ | ||
129 | return; | ||
130 | default: | ||
131 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
132 | break; | ||
133 | } | ||
134 | homecache_finv_page(page); | ||
135 | |||
136 | #ifdef DEBUG_ALIGNMENT | ||
137 | /* Warn if the region isn't cacheline aligned. */ | ||
138 | if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1))) | ||
139 | pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n", | ||
140 | PFN_PHYS(page_to_pfn(page)) + offset, size); | ||
141 | #endif | ||
142 | } | ||
95 | 143 | ||
96 | while ((ssize_t)size > 0) { | 144 | /* Make the page ready to be read by the core. */ |
97 | /* Flush the page. */ | 145 | static void __dma_complete_page(struct page *page, unsigned long offset, |
98 | homecache_flush_cache(page++, 0); | 146 | size_t size, enum dma_data_direction direction) |
147 | { | ||
148 | #ifdef __tilegx__ | ||
149 | switch (page_home(page)) { | ||
150 | case PAGE_HOME_HASH: | ||
151 | /* I/O device delivered data the way the cpu wanted it. */ | ||
152 | break; | ||
153 | case PAGE_HOME_INCOHERENT: | ||
154 | /* Incoherent anyway, so no need to work hard here. */ | ||
155 | break; | ||
156 | case PAGE_HOME_IMMUTABLE: | ||
157 | /* Extra read-only copies are not a problem. */ | ||
158 | break; | ||
159 | default: | ||
160 | /* Flush the bogus hash-for-home I/O entries to memory. */ | ||
161 | homecache_finv_map_page(page, PAGE_HOME_HASH); | ||
162 | break; | ||
163 | } | ||
164 | #endif | ||
165 | } | ||
99 | 166 | ||
100 | /* Figure out if we need to continue on the next page. */ | 167 | static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size, |
101 | size -= bytesleft; | 168 | enum dma_data_direction direction) |
102 | bytesleft = PAGE_SIZE; | 169 | { |
170 | struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); | ||
171 | unsigned long offset = dma_addr & (PAGE_SIZE - 1); | ||
172 | size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); | ||
173 | |||
174 | while (size != 0) { | ||
175 | __dma_prep_page(page, offset, bytes, direction); | ||
176 | size -= bytes; | ||
177 | ++page; | ||
178 | offset = 0; | ||
179 | bytes = min((size_t)PAGE_SIZE, size); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size, | ||
184 | enum dma_data_direction direction) | ||
185 | { | ||
186 | struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); | ||
187 | unsigned long offset = dma_addr & (PAGE_SIZE - 1); | ||
188 | size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); | ||
189 | |||
190 | while (size != 0) { | ||
191 | __dma_complete_page(page, offset, bytes, direction); | ||
192 | size -= bytes; | ||
193 | ++page; | ||
194 | offset = 0; | ||
195 | bytes = min((size_t)PAGE_SIZE, size); | ||
103 | } | 196 | } |
104 | } | 197 | } |
105 | 198 | ||
199 | |||
106 | /* | 200 | /* |
107 | * dma_map_single can be passed any memory address, and there appear | 201 | * dma_map_single can be passed any memory address, and there appear |
108 | * to be no alignment constraints. | 202 | * to be no alignment constraints. |
@@ -111,28 +205,29 @@ static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) | |||
111 | * line with some other data that has been touched in the meantime. | 205 | * line with some other data that has been touched in the meantime. |
112 | */ | 206 | */ |
113 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, | 207 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, |
114 | enum dma_data_direction direction) | 208 | enum dma_data_direction direction) |
115 | { | 209 | { |
116 | dma_addr_t dma_addr = __pa(ptr); | 210 | dma_addr_t dma_addr = __pa(ptr); |
117 | 211 | ||
118 | BUG_ON(!valid_dma_direction(direction)); | 212 | BUG_ON(!valid_dma_direction(direction)); |
119 | WARN_ON(size == 0); | 213 | WARN_ON(size == 0); |
120 | 214 | ||
121 | __dma_map_pa_range(dma_addr, size); | 215 | __dma_prep_pa_range(dma_addr, size, direction); |
122 | 216 | ||
123 | return dma_addr; | 217 | return dma_addr; |
124 | } | 218 | } |
125 | EXPORT_SYMBOL(dma_map_single); | 219 | EXPORT_SYMBOL(dma_map_single); |
126 | 220 | ||
127 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, | 221 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, |
128 | enum dma_data_direction direction) | 222 | enum dma_data_direction direction) |
129 | { | 223 | { |
130 | BUG_ON(!valid_dma_direction(direction)); | 224 | BUG_ON(!valid_dma_direction(direction)); |
225 | __dma_complete_pa_range(dma_addr, size, direction); | ||
131 | } | 226 | } |
132 | EXPORT_SYMBOL(dma_unmap_single); | 227 | EXPORT_SYMBOL(dma_unmap_single); |
133 | 228 | ||
134 | int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, | 229 | int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, |
135 | enum dma_data_direction direction) | 230 | enum dma_data_direction direction) |
136 | { | 231 | { |
137 | struct scatterlist *sg; | 232 | struct scatterlist *sg; |
138 | int i; | 233 | int i; |
@@ -143,17 +238,25 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, | |||
143 | 238 | ||
144 | for_each_sg(sglist, sg, nents, i) { | 239 | for_each_sg(sglist, sg, nents, i) { |
145 | sg->dma_address = sg_phys(sg); | 240 | sg->dma_address = sg_phys(sg); |
146 | __dma_map_pa_range(sg->dma_address, sg->length); | 241 | __dma_prep_pa_range(sg->dma_address, sg->length, direction); |
147 | } | 242 | } |
148 | 243 | ||
149 | return nents; | 244 | return nents; |
150 | } | 245 | } |
151 | EXPORT_SYMBOL(dma_map_sg); | 246 | EXPORT_SYMBOL(dma_map_sg); |
152 | 247 | ||
153 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, | 248 | void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, |
154 | enum dma_data_direction direction) | 249 | enum dma_data_direction direction) |
155 | { | 250 | { |
251 | struct scatterlist *sg; | ||
252 | int i; | ||
253 | |||
156 | BUG_ON(!valid_dma_direction(direction)); | 254 | BUG_ON(!valid_dma_direction(direction)); |
255 | for_each_sg(sglist, sg, nents, i) { | ||
256 | sg->dma_address = sg_phys(sg); | ||
257 | __dma_complete_pa_range(sg->dma_address, sg->length, | ||
258 | direction); | ||
259 | } | ||
157 | } | 260 | } |
158 | EXPORT_SYMBOL(dma_unmap_sg); | 261 | EXPORT_SYMBOL(dma_unmap_sg); |
159 | 262 | ||
@@ -164,16 +267,17 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, | |||
164 | BUG_ON(!valid_dma_direction(direction)); | 267 | BUG_ON(!valid_dma_direction(direction)); |
165 | 268 | ||
166 | BUG_ON(offset + size > PAGE_SIZE); | 269 | BUG_ON(offset + size > PAGE_SIZE); |
167 | homecache_flush_cache(page, 0); | 270 | __dma_prep_page(page, offset, size, direction); |
168 | |||
169 | return page_to_pa(page) + offset; | 271 | return page_to_pa(page) + offset; |
170 | } | 272 | } |
171 | EXPORT_SYMBOL(dma_map_page); | 273 | EXPORT_SYMBOL(dma_map_page); |
172 | 274 | ||
173 | void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, | 275 | void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, |
174 | enum dma_data_direction direction) | 276 | enum dma_data_direction direction) |
175 | { | 277 | { |
176 | BUG_ON(!valid_dma_direction(direction)); | 278 | BUG_ON(!valid_dma_direction(direction)); |
279 | __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), | ||
280 | dma_address & PAGE_OFFSET, size, direction); | ||
177 | } | 281 | } |
178 | EXPORT_SYMBOL(dma_unmap_page); | 282 | EXPORT_SYMBOL(dma_unmap_page); |
179 | 283 | ||
@@ -181,33 +285,33 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, | |||
181 | size_t size, enum dma_data_direction direction) | 285 | size_t size, enum dma_data_direction direction) |
182 | { | 286 | { |
183 | BUG_ON(!valid_dma_direction(direction)); | 287 | BUG_ON(!valid_dma_direction(direction)); |
288 | __dma_complete_pa_range(dma_handle, size, direction); | ||
184 | } | 289 | } |
185 | EXPORT_SYMBOL(dma_sync_single_for_cpu); | 290 | EXPORT_SYMBOL(dma_sync_single_for_cpu); |
186 | 291 | ||
187 | void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, | 292 | void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, |
188 | size_t size, enum dma_data_direction direction) | 293 | size_t size, enum dma_data_direction direction) |
189 | { | 294 | { |
190 | unsigned long start = PFN_DOWN(dma_handle); | 295 | __dma_prep_pa_range(dma_handle, size, direction); |
191 | unsigned long end = PFN_DOWN(dma_handle + size - 1); | ||
192 | unsigned long i; | ||
193 | |||
194 | BUG_ON(!valid_dma_direction(direction)); | ||
195 | for (i = start; i <= end; ++i) | ||
196 | homecache_flush_cache(pfn_to_page(i), 0); | ||
197 | } | 296 | } |
198 | EXPORT_SYMBOL(dma_sync_single_for_device); | 297 | EXPORT_SYMBOL(dma_sync_single_for_device); |
199 | 298 | ||
200 | void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, | 299 | void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, |
201 | enum dma_data_direction direction) | 300 | int nelems, enum dma_data_direction direction) |
202 | { | 301 | { |
302 | struct scatterlist *sg; | ||
303 | int i; | ||
304 | |||
203 | BUG_ON(!valid_dma_direction(direction)); | 305 | BUG_ON(!valid_dma_direction(direction)); |
204 | WARN_ON(nelems == 0 || sg[0].length == 0); | 306 | WARN_ON(nelems == 0 || sglist->length == 0); |
307 | |||
308 | for_each_sg(sglist, sg, nelems, i) { | ||
309 | dma_sync_single_for_cpu(dev, sg->dma_address, | ||
310 | sg_dma_len(sg), direction); | ||
311 | } | ||
205 | } | 312 | } |
206 | EXPORT_SYMBOL(dma_sync_sg_for_cpu); | 313 | EXPORT_SYMBOL(dma_sync_sg_for_cpu); |
207 | 314 | ||
208 | /* | ||
209 | * Flush and invalidate cache for scatterlist. | ||
210 | */ | ||
211 | void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, | 315 | void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, |
212 | int nelems, enum dma_data_direction direction) | 316 | int nelems, enum dma_data_direction direction) |
213 | { | 317 | { |
@@ -242,8 +346,8 @@ void dma_sync_single_range_for_device(struct device *dev, | |||
242 | EXPORT_SYMBOL(dma_sync_single_range_for_device); | 346 | EXPORT_SYMBOL(dma_sync_single_range_for_device); |
243 | 347 | ||
244 | /* | 348 | /* |
245 | * dma_alloc_noncoherent() returns non-cacheable memory, so there's no | 349 | * dma_alloc_noncoherent() is #defined to return coherent memory, |
246 | * need to do any flushing here. | 350 | * so there's no need to do any flushing here. |
247 | */ | 351 | */ |
248 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 352 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
249 | enum dma_data_direction direction) | 353 | enum dma_data_direction direction) |
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index dbcbdf7b8aa8..5f7868dcd6d4 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -64,10 +64,6 @@ early_param("noallocl2", set_noallocl2); | |||
64 | 64 | ||
65 | #endif | 65 | #endif |
66 | 66 | ||
67 | /* Provide no-op versions of these routines to keep flush_remote() cleaner. */ | ||
68 | #define mark_caches_evicted_start() 0 | ||
69 | #define mark_caches_evicted_finish(mask, timestamp) do {} while (0) | ||
70 | |||
71 | 67 | ||
72 | /* | 68 | /* |
73 | * Update the irq_stat for cpus that we are going to interrupt | 69 | * Update the irq_stat for cpus that we are going to interrupt |
@@ -107,7 +103,6 @@ static void hv_flush_update(const struct cpumask *cache_cpumask, | |||
107 | * there's never any good reason for hv_flush_remote() to fail. | 103 | * there's never any good reason for hv_flush_remote() to fail. |
108 | * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally | 104 | * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally |
109 | * is the type that Linux wants to pass around anyway. | 105 | * is the type that Linux wants to pass around anyway. |
110 | * - Centralizes the mark_caches_evicted() handling. | ||
111 | * - Canonicalizes that lengths of zero make cpumasks NULL. | 106 | * - Canonicalizes that lengths of zero make cpumasks NULL. |
112 | * - Handles deferring TLB flushes for dataplane tiles. | 107 | * - Handles deferring TLB flushes for dataplane tiles. |
113 | * - Tracks remote interrupts in the per-cpu irq_cpustat_t. | 108 | * - Tracks remote interrupts in the per-cpu irq_cpustat_t. |
@@ -126,7 +121,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | |||
126 | HV_Remote_ASID *asids, int asidcount) | 121 | HV_Remote_ASID *asids, int asidcount) |
127 | { | 122 | { |
128 | int rc; | 123 | int rc; |
129 | int timestamp = 0; /* happy compiler */ | ||
130 | struct cpumask cache_cpumask_copy, tlb_cpumask_copy; | 124 | struct cpumask cache_cpumask_copy, tlb_cpumask_copy; |
131 | struct cpumask *cache_cpumask, *tlb_cpumask; | 125 | struct cpumask *cache_cpumask, *tlb_cpumask; |
132 | HV_PhysAddr cache_pa; | 126 | HV_PhysAddr cache_pa; |
@@ -157,15 +151,11 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | |||
157 | hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, | 151 | hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, |
158 | asids, asidcount); | 152 | asids, asidcount); |
159 | cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; | 153 | cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; |
160 | if (cache_control & HV_FLUSH_EVICT_L2) | ||
161 | timestamp = mark_caches_evicted_start(); | ||
162 | rc = hv_flush_remote(cache_pa, cache_control, | 154 | rc = hv_flush_remote(cache_pa, cache_control, |
163 | cpumask_bits(cache_cpumask), | 155 | cpumask_bits(cache_cpumask), |
164 | tlb_va, tlb_length, tlb_pgsize, | 156 | tlb_va, tlb_length, tlb_pgsize, |
165 | cpumask_bits(tlb_cpumask), | 157 | cpumask_bits(tlb_cpumask), |
166 | asids, asidcount); | 158 | asids, asidcount); |
167 | if (cache_control & HV_FLUSH_EVICT_L2) | ||
168 | mark_caches_evicted_finish(cache_cpumask, timestamp); | ||
169 | if (rc == 0) | 159 | if (rc == 0) |
170 | return; | 160 | return; |
171 | cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); | 161 | cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); |
@@ -180,85 +170,86 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | |||
180 | panic("Unsafe to continue."); | 170 | panic("Unsafe to continue."); |
181 | } | 171 | } |
182 | 172 | ||
183 | void flush_remote_page(struct page *page, int order) | 173 | static void homecache_finv_page_va(void* va, int home) |
184 | { | 174 | { |
185 | int i, pages = (1 << order); | 175 | if (home == smp_processor_id()) { |
186 | for (i = 0; i < pages; ++i, ++page) { | 176 | finv_buffer_local(va, PAGE_SIZE); |
187 | void *p = kmap_atomic(page); | 177 | } else if (home == PAGE_HOME_HASH) { |
188 | int hfh = 0; | 178 | finv_buffer_remote(va, PAGE_SIZE, 1); |
189 | int home = page_home(page); | 179 | } else { |
190 | #if CHIP_HAS_CBOX_HOME_MAP() | 180 | BUG_ON(home < 0 || home >= NR_CPUS); |
191 | if (home == PAGE_HOME_HASH) | 181 | finv_buffer_remote(va, PAGE_SIZE, 0); |
192 | hfh = 1; | ||
193 | else | ||
194 | #endif | ||
195 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
196 | finv_buffer_remote(p, PAGE_SIZE, hfh); | ||
197 | kunmap_atomic(p); | ||
198 | } | 182 | } |
199 | } | 183 | } |
200 | 184 | ||
201 | void homecache_evict(const struct cpumask *mask) | 185 | void homecache_finv_map_page(struct page *page, int home) |
202 | { | 186 | { |
203 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); | 187 | unsigned long flags; |
188 | unsigned long va; | ||
189 | pte_t *ptep; | ||
190 | pte_t pte; | ||
191 | |||
192 | if (home == PAGE_HOME_UNCACHED) | ||
193 | return; | ||
194 | local_irq_save(flags); | ||
195 | #ifdef CONFIG_HIGHMEM | ||
196 | va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() + | ||
197 | (KM_TYPE_NR * smp_processor_id())); | ||
198 | #else | ||
199 | va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id()); | ||
200 | #endif | ||
201 | ptep = virt_to_pte(NULL, (unsigned long)va); | ||
202 | pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); | ||
203 | __set_pte(ptep, pte_set_home(pte, home)); | ||
204 | homecache_finv_page_va((void *)va, home); | ||
205 | __pte_clear(ptep); | ||
206 | hv_flush_page(va, PAGE_SIZE); | ||
207 | #ifdef CONFIG_HIGHMEM | ||
208 | kmap_atomic_idx_pop(); | ||
209 | #endif | ||
210 | local_irq_restore(flags); | ||
204 | } | 211 | } |
205 | 212 | ||
206 | /* | 213 | static void homecache_finv_page_home(struct page *page, int home) |
207 | * Return a mask of the cpus whose caches currently own these pages. | ||
208 | * The return value is whether the pages are all coherently cached | ||
209 | * (i.e. none are immutable, incoherent, or uncached). | ||
210 | */ | ||
211 | static int homecache_mask(struct page *page, int pages, | ||
212 | struct cpumask *home_mask) | ||
213 | { | 214 | { |
214 | int i; | 215 | if (!PageHighMem(page) && home == page_home(page)) |
215 | int cached_coherently = 1; | 216 | homecache_finv_page_va(page_address(page), home); |
216 | cpumask_clear(home_mask); | 217 | else |
217 | for (i = 0; i < pages; ++i) { | 218 | homecache_finv_map_page(page, home); |
218 | int home = page_home(&page[i]); | ||
219 | if (home == PAGE_HOME_IMMUTABLE || | ||
220 | home == PAGE_HOME_INCOHERENT) { | ||
221 | cpumask_copy(home_mask, cpu_possible_mask); | ||
222 | return 0; | ||
223 | } | ||
224 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
225 | if (home == PAGE_HOME_HASH) { | ||
226 | cpumask_or(home_mask, home_mask, &hash_for_home_map); | ||
227 | continue; | ||
228 | } | ||
229 | #endif | ||
230 | if (home == PAGE_HOME_UNCACHED) { | ||
231 | cached_coherently = 0; | ||
232 | continue; | ||
233 | } | ||
234 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
235 | cpumask_set_cpu(home, home_mask); | ||
236 | } | ||
237 | return cached_coherently; | ||
238 | } | 219 | } |
239 | 220 | ||
240 | /* | 221 | static inline bool incoherent_home(int home) |
241 | * Return the passed length, or zero if it's long enough that we | ||
242 | * believe we should evict the whole L2 cache. | ||
243 | */ | ||
244 | static unsigned long cache_flush_length(unsigned long length) | ||
245 | { | 222 | { |
246 | return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; | 223 | return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT; |
247 | } | 224 | } |
248 | 225 | ||
249 | /* Flush a page out of whatever cache(s) it is in. */ | 226 | static void homecache_finv_page_internal(struct page *page, int force_map) |
250 | void homecache_flush_cache(struct page *page, int order) | ||
251 | { | 227 | { |
252 | int pages = 1 << order; | 228 | int home = page_home(page); |
253 | int length = cache_flush_length(pages * PAGE_SIZE); | 229 | if (home == PAGE_HOME_UNCACHED) |
254 | unsigned long pfn = page_to_pfn(page); | 230 | return; |
255 | struct cpumask home_mask; | 231 | if (incoherent_home(home)) { |
256 | 232 | int cpu; | |
257 | homecache_mask(page, pages, &home_mask); | 233 | for_each_cpu(cpu, &cpu_cacheable_map) |
258 | flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); | 234 | homecache_finv_map_page(page, cpu); |
259 | sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE); | 235 | } else if (force_map) { |
236 | /* Force if, e.g., the normal mapping is migrating. */ | ||
237 | homecache_finv_map_page(page, home); | ||
238 | } else { | ||
239 | homecache_finv_page_home(page, home); | ||
240 | } | ||
241 | sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); | ||
260 | } | 242 | } |
261 | 243 | ||
244 | void homecache_finv_page(struct page *page) | ||
245 | { | ||
246 | homecache_finv_page_internal(page, 0); | ||
247 | } | ||
248 | |||
249 | void homecache_evict(const struct cpumask *mask) | ||
250 | { | ||
251 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); | ||
252 | } | ||
262 | 253 | ||
263 | /* Report the home corresponding to a given PTE. */ | 254 | /* Report the home corresponding to a given PTE. */ |
264 | static int pte_to_home(pte_t pte) | 255 | static int pte_to_home(pte_t pte) |
@@ -441,15 +432,8 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, | |||
441 | return page; | 432 | return page; |
442 | } | 433 | } |
443 | 434 | ||
444 | void homecache_free_pages(unsigned long addr, unsigned int order) | 435 | void __homecache_free_pages(struct page *page, unsigned int order) |
445 | { | 436 | { |
446 | struct page *page; | ||
447 | |||
448 | if (addr == 0) | ||
449 | return; | ||
450 | |||
451 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
452 | page = virt_to_page((void *)addr); | ||
453 | if (put_page_testzero(page)) { | 437 | if (put_page_testzero(page)) { |
454 | homecache_change_page_home(page, order, initial_page_home()); | 438 | homecache_change_page_home(page, order, initial_page_home()); |
455 | if (order == 0) { | 439 | if (order == 0) { |
@@ -460,3 +444,13 @@ void homecache_free_pages(unsigned long addr, unsigned int order) | |||
460 | } | 444 | } |
461 | } | 445 | } |
462 | } | 446 | } |
447 | EXPORT_SYMBOL(__homecache_free_pages); | ||
448 | |||
449 | void homecache_free_pages(unsigned long addr, unsigned int order) | ||
450 | { | ||
451 | if (addr != 0) { | ||
452 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
453 | __homecache_free_pages(virt_to_page((void *)addr), order); | ||
454 | } | ||
455 | } | ||
456 | EXPORT_SYMBOL(homecache_free_pages); | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 630dd2ce2afe..a2417a0a8222 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -150,7 +150,21 @@ void __init shatter_pmd(pmd_t *pmd) | |||
150 | assign_pte(pmd, pte); | 150 | assign_pte(pmd, pte); |
151 | } | 151 | } |
152 | 152 | ||
153 | #ifdef CONFIG_HIGHMEM | 153 | #ifdef __tilegx__ |
154 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
155 | { | ||
156 | pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); | ||
157 | if (pud_none(*pud)) | ||
158 | assign_pmd(pud, alloc_pmd()); | ||
159 | return pmd_offset(pud, va); | ||
160 | } | ||
161 | #else | ||
162 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
163 | { | ||
164 | return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); | ||
165 | } | ||
166 | #endif | ||
167 | |||
154 | /* | 168 | /* |
155 | * This function initializes a certain range of kernel virtual memory | 169 | * This function initializes a certain range of kernel virtual memory |
156 | * with new bootmem page tables, everywhere page tables are missing in | 170 | * with new bootmem page tables, everywhere page tables are missing in |
@@ -163,24 +177,17 @@ void __init shatter_pmd(pmd_t *pmd) | |||
163 | * checking the pgd every time. | 177 | * checking the pgd every time. |
164 | */ | 178 | */ |
165 | static void __init page_table_range_init(unsigned long start, | 179 | static void __init page_table_range_init(unsigned long start, |
166 | unsigned long end, pgd_t *pgd_base) | 180 | unsigned long end, pgd_t *pgd) |
167 | { | 181 | { |
168 | pgd_t *pgd; | ||
169 | int pgd_idx; | ||
170 | unsigned long vaddr; | 182 | unsigned long vaddr; |
171 | 183 | start = round_down(start, PMD_SIZE); | |
172 | vaddr = start; | 184 | end = round_up(end, PMD_SIZE); |
173 | pgd_idx = pgd_index(vaddr); | 185 | for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) { |
174 | pgd = pgd_base + pgd_idx; | 186 | pmd_t *pmd = get_pmd(pgd, vaddr); |
175 | |||
176 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | ||
177 | pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr); | ||
178 | if (pmd_none(*pmd)) | 187 | if (pmd_none(*pmd)) |
179 | assign_pte(pmd, alloc_pte()); | 188 | assign_pte(pmd, alloc_pte()); |
180 | vaddr += PMD_SIZE; | ||
181 | } | 189 | } |
182 | } | 190 | } |
183 | #endif /* CONFIG_HIGHMEM */ | ||
184 | 191 | ||
185 | 192 | ||
186 | #if CHIP_HAS_CBOX_HOME_MAP() | 193 | #if CHIP_HAS_CBOX_HOME_MAP() |
@@ -404,21 +411,6 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot) | |||
404 | return prot; | 411 | return prot; |
405 | } | 412 | } |
406 | 413 | ||
407 | #ifndef __tilegx__ | ||
408 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
409 | { | ||
410 | return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); | ||
411 | } | ||
412 | #else | ||
413 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
414 | { | ||
415 | pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); | ||
416 | if (pud_none(*pud)) | ||
417 | assign_pmd(pud, alloc_pmd()); | ||
418 | return pmd_offset(pud, va); | ||
419 | } | ||
420 | #endif | ||
421 | |||
422 | /* Temporary page table we use for staging. */ | 414 | /* Temporary page table we use for staging. */ |
423 | static pgd_t pgtables[PTRS_PER_PGD] | 415 | static pgd_t pgtables[PTRS_PER_PGD] |
424 | __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); | 416 | __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); |
@@ -779,9 +771,6 @@ static void __init set_non_bootmem_pages_init(void) | |||
779 | */ | 771 | */ |
780 | void __init paging_init(void) | 772 | void __init paging_init(void) |
781 | { | 773 | { |
782 | #ifdef CONFIG_HIGHMEM | ||
783 | unsigned long vaddr, end; | ||
784 | #endif | ||
785 | #ifdef __tilegx__ | 774 | #ifdef __tilegx__ |
786 | pud_t *pud; | 775 | pud_t *pud; |
787 | #endif | 776 | #endif |
@@ -789,14 +778,14 @@ void __init paging_init(void) | |||
789 | 778 | ||
790 | kernel_physical_mapping_init(pgd_base); | 779 | kernel_physical_mapping_init(pgd_base); |
791 | 780 | ||
792 | #ifdef CONFIG_HIGHMEM | ||
793 | /* | 781 | /* |
794 | * Fixed mappings, only the page table structure has to be | 782 | * Fixed mappings, only the page table structure has to be |
795 | * created - mappings will be set by set_fixmap(): | 783 | * created - mappings will be set by set_fixmap(): |
796 | */ | 784 | */ |
797 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | 785 | page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1), |
798 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | 786 | FIXADDR_TOP, pgd_base); |
799 | page_table_range_init(vaddr, end, pgd_base); | 787 | |
788 | #ifdef CONFIG_HIGHMEM | ||
800 | permanent_kmaps_init(pgd_base); | 789 | permanent_kmaps_init(pgd_base); |
801 | #endif | 790 | #endif |
802 | 791 | ||