diff options
author | FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> | 2008-09-24 07:48:36 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-25 05:02:25 -0400 |
commit | ecef533ea68b2fb3baaf459beb2f802a240bdb16 (patch) | |
tree | ee23b4458bd908ca1c97fd4fa8edbf911372c910 /arch | |
parent | 9f6ac57729724b58df81ca5dc005326759a806fe (diff) |
revert "x86: make GART to respect device's dma_mask about virtual mappings"
This reverts:
commit bee44f294efd8417f5e68553778a6cc957af1547
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri Sep 12 19:42:35 2008 +0900
x86: make GART to respect device's dma_mask about virtual mappings
I wrote the above commit to fix a GART alloc_coherent regression, that
can't handle a device having dma_masks > 24bit < 32bits, introduced by
the alloc_coherent rewrite:
http://lkml.org/lkml/2008/8/12/200
After the alloc_coherent rewrite, GART alloc_coherent tried to
allocate pages with GFP_DMA32. If GART got an address that a device
can't access to, GART mapped the address to a virtual I/O address. But
GART mapping mechanism didn't take account of dma mask, so GART could
use a virtual I/O address that the device can't access to again.
Alan pointed out:
" This is indeed a specific problem found with things like older
AACRAID where control blocks must be below 31bits and the GART
is above 0x80000000. "
The above commit modified GART mapping mechanism to take care of dma
mask. But Andi pointed out, "The GART is somewhere in the 4GB range so
you cannot use it to map anything < 4GB. Also GART is pretty small."
http://lkml.org/lkml/2008/9/12/43
That means it's possible that GART doesn't have virtual I/O address
space that a device can access to. The above commit (to modify GART
mapping mechanism to take care of dma mask) can't fix the regression
reliably so let's avoid making GART more complicated.
We need a solution that always works for dma_masks > 24bit <
32bits. That's how GART worked before the alloc_coherent rewrite.
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/pci-gart_64.c | 39 |
1 files changed, 11 insertions, 28 deletions
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9e390f1bd46a..7e08e466b8ad 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -83,34 +83,23 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | |||
83 | static int need_flush; /* global flush state. set for each gart wrap */ | 83 | static int need_flush; /* global flush state. set for each gart wrap */ |
84 | 84 | ||
85 | static unsigned long alloc_iommu(struct device *dev, int size, | 85 | static unsigned long alloc_iommu(struct device *dev, int size, |
86 | unsigned long align_mask, u64 dma_mask) | 86 | unsigned long align_mask) |
87 | { | 87 | { |
88 | unsigned long offset, flags; | 88 | unsigned long offset, flags; |
89 | unsigned long boundary_size; | 89 | unsigned long boundary_size; |
90 | unsigned long base_index; | 90 | unsigned long base_index; |
91 | unsigned long limit; | ||
92 | 91 | ||
93 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
94 | PAGE_SIZE) >> PAGE_SHIFT; | 93 | PAGE_SIZE) >> PAGE_SHIFT; |
95 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, | 94 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, |
96 | PAGE_SIZE) >> PAGE_SHIFT; | 95 | PAGE_SIZE) >> PAGE_SHIFT; |
97 | 96 | ||
98 | limit = iommu_device_max_index(iommu_pages, | ||
99 | DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), | ||
100 | dma_mask >> PAGE_SHIFT); | ||
101 | |||
102 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
103 | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, | |
104 | if (limit <= next_bit) { | ||
105 | need_flush = 1; | ||
106 | next_bit = 0; | ||
107 | } | ||
108 | |||
109 | offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, | ||
110 | size, base_index, boundary_size, align_mask); | 99 | size, base_index, boundary_size, align_mask); |
111 | if (offset == -1 && next_bit) { | 100 | if (offset == -1) { |
112 | need_flush = 1; | 101 | need_flush = 1; |
113 | offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
114 | size, base_index, boundary_size, | 103 | size, base_index, boundary_size, |
115 | align_mask); | 104 | align_mask); |
116 | } | 105 | } |
@@ -239,14 +228,12 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | |||
239 | * Caller needs to check if the iommu is needed and flush. | 228 | * Caller needs to check if the iommu is needed and flush. |
240 | */ | 229 | */ |
241 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 230 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
242 | size_t size, int dir, unsigned long align_mask, | 231 | size_t size, int dir, unsigned long align_mask) |
243 | u64 dma_mask) | ||
244 | { | 232 | { |
245 | unsigned long npages = iommu_num_pages(phys_mem, size); | 233 | unsigned long npages = iommu_num_pages(phys_mem, size); |
246 | unsigned long iommu_page; | 234 | unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); |
247 | int i; | 235 | int i; |
248 | 236 | ||
249 | iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); | ||
250 | if (iommu_page == -1) { | 237 | if (iommu_page == -1) { |
251 | if (!nonforced_iommu(dev, phys_mem, size)) | 238 | if (!nonforced_iommu(dev, phys_mem, size)) |
252 | return phys_mem; | 239 | return phys_mem; |
@@ -276,7 +263,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | |||
276 | if (!need_iommu(dev, paddr, size)) | 263 | if (!need_iommu(dev, paddr, size)) |
277 | return paddr; | 264 | return paddr; |
278 | 265 | ||
279 | bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); | 266 | bus = dma_map_area(dev, paddr, size, dir, 0); |
280 | flush_gart(); | 267 | flush_gart(); |
281 | 268 | ||
282 | return bus; | 269 | return bus; |
@@ -327,7 +314,6 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
327 | { | 314 | { |
328 | struct scatterlist *s; | 315 | struct scatterlist *s; |
329 | int i; | 316 | int i; |
330 | u64 dma_mask = dma_get_mask(dev); | ||
331 | 317 | ||
332 | #ifdef CONFIG_IOMMU_DEBUG | 318 | #ifdef CONFIG_IOMMU_DEBUG |
333 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | 319 | printk(KERN_DEBUG "dma_map_sg overflow\n"); |
@@ -337,8 +323,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
337 | unsigned long addr = sg_phys(s); | 323 | unsigned long addr = sg_phys(s); |
338 | 324 | ||
339 | if (nonforced_iommu(dev, addr, s->length)) { | 325 | if (nonforced_iommu(dev, addr, s->length)) { |
340 | addr = dma_map_area(dev, addr, s->length, dir, 0, | 326 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
341 | dma_mask); | ||
342 | if (addr == bad_dma_address) { | 327 | if (addr == bad_dma_address) { |
343 | if (i > 0) | 328 | if (i > 0) |
344 | gart_unmap_sg(dev, sg, i, dir); | 329 | gart_unmap_sg(dev, sg, i, dir); |
@@ -360,16 +345,14 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
360 | int nelems, struct scatterlist *sout, | 345 | int nelems, struct scatterlist *sout, |
361 | unsigned long pages) | 346 | unsigned long pages) |
362 | { | 347 | { |
363 | unsigned long iommu_start; | 348 | unsigned long iommu_start = alloc_iommu(dev, pages, 0); |
364 | unsigned long iommu_page; | 349 | unsigned long iommu_page = iommu_start; |
365 | struct scatterlist *s; | 350 | struct scatterlist *s; |
366 | int i; | 351 | int i; |
367 | 352 | ||
368 | iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); | ||
369 | if (iommu_start == -1) | 353 | if (iommu_start == -1) |
370 | return -1; | 354 | return -1; |
371 | 355 | ||
372 | iommu_page = iommu_start; | ||
373 | for_each_sg(start, s, nelems, i) { | 356 | for_each_sg(start, s, nelems, i) { |
374 | unsigned long pages, addr; | 357 | unsigned long pages, addr; |
375 | unsigned long phys_addr = s->dma_address; | 358 | unsigned long phys_addr = s->dma_address; |
@@ -522,7 +505,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | |||
522 | align_mask = (1UL << get_order(size)) - 1; | 505 | align_mask = (1UL << get_order(size)) - 1; |
523 | 506 | ||
524 | *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, | 507 | *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, |
525 | align_mask, dma_mask); | 508 | align_mask); |
526 | flush_gart(); | 509 | flush_gart(); |
527 | 510 | ||
528 | if (*dma_addr != bad_dma_address) | 511 | if (*dma_addr != bad_dma_address) |