aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-18 14:17:10 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-18 14:17:10 -0500
commit49cc0c43d0d60ba8ca1cd754921bb50119d42940 (patch)
tree9aa3298173c5cc35683d8e01259e856e1ae84921
parent87a349f9cc0908bc0cfac0c9ece3179f650ae95a (diff)
parentd30a6b84df00128e03588564925dc828a53e6865 (diff)
Merge branch 'sun4v-64bit-DMA'
Tushar Dave says: ==================== sparc: Enable sun4v hypervisor PCI IOMMU v2 APIs and ATU ATU (Address Translation Unit) is a new IOMMU in SPARC supported with sun4v hypervisor PCI IOMMU v2 APIs. Current SPARC IOMMU supports only 32bit address ranges and one TSB per PCIe root complex that has a 2GB per root complex DVMA space limit. The limit has become a scalability bottleneck nowadays that a typical 10G/40G NIC can consume 500MB DVMA space per instance. When DVMA resource is exhausted, devices will not be usable since the driver can't allocate DVMA. For example, we recently experienced legacy IOMMU limitation while using i40e driver in system with large number of CPUs (e.g. 128). Four ports of i40e, each request 128 QP (Queue Pairs). Each queue has 512 (default) descriptors. So considering only RX queues (because RX premap DMA buffers), i40e takes 4*128*512 number of DMA entries in IOMMU table. Legacy IOMMU can have at max (2G/8K)- 1 entries available in table. So bringing up four instance of i40e alone saturate existing IOMMU resource. ATU removes bottleneck by allowing guest os to create IOTSB of size 32G (or more) with 64bit address ranges available in ATU HW. 32G is more than enough DVMA space to be shared by all PCIe devices under root complex contrast to 2G space provided by legacy IOMMU. ATU allows PCIe devices to use 64bit DMA addressing. Devices which choose to use 32bit DMA mask will continue to work with the existing legacy IOMMU. The patch set is tested on sun4v (T1000, T2000, T3, T4, T5, T7, S7) and sun4u SPARC. Thanks. -Tushar v2->v3: - Patch #5 addresses comment by Joe Perches. -- use %s, __func__ instead of embedding the function name. v1->v2: - Patch #2 addresses comments by Dave M. -- use page allocator to allocate IOTSB. -- use true/false with boolean variables. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/Kconfig22
-rw-r--r--arch/sparc/include/asm/hypervisor.h343
-rw-r--r--arch/sparc/include/asm/iommu_64.h28
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/iommu.c8
-rw-r--r--arch/sparc/kernel/pci_sun4v.c418
-rw-r--r--arch/sparc/kernel/pci_sun4v.h21
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S68
8 files changed, 849 insertions, 60 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b23c76b42d6e..60145c9b9f84 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -89,6 +89,14 @@ config ARCH_DEFCONFIG
89config ARCH_PROC_KCORE_TEXT 89config ARCH_PROC_KCORE_TEXT
90 def_bool y 90 def_bool y
91 91
92config ARCH_ATU
93 bool
94 default y if SPARC64
95
96config ARCH_DMA_ADDR_T_64BIT
97 bool
98 default y if ARCH_ATU
99
92config IOMMU_HELPER 100config IOMMU_HELPER
93 bool 101 bool
94 default y if SPARC64 102 default y if SPARC64
@@ -304,6 +312,20 @@ config ARCH_SPARSEMEM_ENABLE
304config ARCH_SPARSEMEM_DEFAULT 312config ARCH_SPARSEMEM_DEFAULT
305 def_bool y if SPARC64 313 def_bool y if SPARC64
306 314
315config FORCE_MAX_ZONEORDER
316 int "Maximum zone order"
317 default "13"
318 help
319 The kernel memory allocator divides physically contiguous memory
320 blocks into "zones", where each zone is a power of two number of
321 pages. This option selects the largest power of two that the kernel
322 keeps in the memory allocator. If you need to allocate very large
323 blocks of physically contiguous memory, then you may need to
324 increase this value.
325
326 This config option is actually maximum order plus one. For example,
327 a value of 13 means that the largest free memory block is 2^12 pages.
328
307source "mm/Kconfig" 329source "mm/Kconfig"
308 330
309if SPARC64 331if SPARC64
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 666d5ba230d2..73cb8978df58 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2335,6 +2335,348 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
2335 */ 2335 */
2336#define HV_FAST_PCI_MSG_SETVALID 0xd3 2336#define HV_FAST_PCI_MSG_SETVALID 0xd3
2337 2337
2338/* PCI IOMMU v2 definitions and services
2339 *
2340 * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO
2341 * definitions and services.
2342 *
2343 * CTE Clump Table Entry. First level table entry in the ATU.
2344 *
2345 * pci_device_list
2346 * A 32-bit aligned list of pci_devices.
2347 *
2348 * pci_device_listp
2349 * real address of a pci_device_list. 32-bit aligned.
2350 *
2351 * iotte IOMMU translation table entry.
2352 *
2353 * iotte_attributes
2354 * IO Attributes for IOMMU v2 mappings. In addition to
2355 * read, write IOMMU v2 supports relax ordering
2356 *
2357 * io_page_list A 64-bit aligned list of real addresses. Each real
2358 * address in an io_page_list must be properly aligned
2359 * to the pagesize of the given IOTSB.
2360 *
2361 * io_page_list_p Real address of an io_page_list, 64-bit aligned.
2362 *
2363 * IOTSB IO Translation Storage Buffer. An aligned table of
2364 * IOTTEs. Each IOTSB has a pagesize, table size, and
2365 * virtual address associated with it that must match
2366 * a pagesize and table size supported by the un-derlying
2367 * hardware implementation. The alignment requirements
2368 * for an IOTSB depend on the pagesize used for that IOTSB.
2369 * Each IOTTE in an IOTSB maps one pagesize-sized page.
2370 * The size of the IOTSB dictates how large of a virtual
2371 * address space the IOTSB is capable of mapping.
2372 *
2373 * iotsb_handle An opaque identifier for an IOTSB. A devhandle plus
2374 * iotsb_handle represents a binding of an IOTSB to a
2375 * PCI root complex.
2376 *
2377 * iotsb_index Zero-based IOTTE number within an IOTSB.
2378 */
2379
2380/* The index_count argument consists of two fields:
2381 * bits 63:48 #iottes and bits 47:0 iotsb_index
2382 */
2383#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \
2384 (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index)))
2385
2386/* pci_iotsb_conf()
2387 * TRAP: HV_FAST_TRAP
2388 * FUNCTION: HV_FAST_PCI_IOTSB_CONF
2389 * ARG0: devhandle
2390 * ARG1: r_addr
2391 * ARG2: size
2392 * ARG3: pagesize
2393 * ARG4: iova
2394 * RET0: status
2395 * RET1: iotsb_handle
2396 * ERRORS: EINVAL Invalid devhandle, size, iova, or pagesize
2397 * EBADALIGN r_addr is not properly aligned
2398 * ENORADDR r_addr is not a valid real address
2399 * ETOOMANY No further IOTSBs may be configured
2400 * EBUSY Duplicate devhandle, raddir, iova combination
2401 *
2402 * Create an IOTSB suitable for the PCI root complex identified by devhandle,
2403 * for the DMA virtual address defined by the argument iova.
2404 *
2405 * r_addr is the properly aligned base address of the IOTSB and size is the
2406 * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to
2407 * being configured. If it contains any values other than zeros then the
2408 * behavior is undefined.
2409 *
2410 * pagesize is the size of each page in the IOTSB. Note that the combination of
2411 * size (table size) and pagesize must be valid.
2412 *
2413 * virt is the DMA virtual address this IOTSB will map.
2414 *
2415 * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1.
2416 * Once configured, privileged access to the IOTSB memory is prohibited and
2417 * creates undefined behavior. The only permitted access is indirect via these
2418 * services.
2419 */
2420#define HV_FAST_PCI_IOTSB_CONF 0x190
2421
2422/* pci_iotsb_info()
2423 * TRAP: HV_FAST_TRAP
2424 * FUNCTION: HV_FAST_PCI_IOTSB_INFO
2425 * ARG0: devhandle
2426 * ARG1: iotsb_handle
2427 * RET0: status
2428 * RET1: r_addr
2429 * RET2: size
2430 * RET3: pagesize
2431 * RET4: iova
2432 * RET5: #bound
2433 * ERRORS: EINVAL Invalid devhandle or iotsb_handle
2434 *
2435 * This service returns configuration information about an IOTSB previously
2436 * created with pci_iotsb_conf.
2437 *
2438 * iotsb_handle value 0 may be used with this service to inquire about the
2439 * legacy IOTSB that may or may not exist. If the service succeeds, the return
2440 * values describe the legacy IOTSB and I/O virtual addresses mapped by that
2441 * table. However, the table base address r_addr may contain the value -1 which
2442 * indicates a memory range that cannot be accessed or be reclaimed.
2443 *
2444 * The return value #bound contains the number of PCI devices that iotsb_handle
2445 * is currently bound to.
2446 */
2447#define HV_FAST_PCI_IOTSB_INFO 0x191
2448
2449/* pci_iotsb_unconf()
2450 * TRAP: HV_FAST_TRAP
2451 * FUNCTION: HV_FAST_PCI_IOTSB_UNCONF
2452 * ARG0: devhandle
2453 * ARG1: iotsb_handle
2454 * RET0: status
2455 * ERRORS: EINVAL Invalid devhandle or iotsb_handle
2456 * EBUSY The IOTSB is bound and may not be unconfigured
2457 *
2458 * This service unconfigures the IOTSB identified by the devhandle and
2459 * iotsb_handle arguments, previously created with pci_iotsb_conf.
2460 * The IOTSB must not be currently bound to any device or the service will fail
2461 *
2462 * If the call succeeds, iotsb_handle is no longer valid.
2463 */
2464#define HV_FAST_PCI_IOTSB_UNCONF 0x192
2465
2466/* pci_iotsb_bind()
2467 * TRAP: HV_FAST_TRAP
2468 * FUNCTION: HV_FAST_PCI_IOTSB_BIND
2469 * ARG0: devhandle
2470 * ARG1: iotsb_handle
2471 * ARG2: pci_device
2472 * RET0: status
2473 * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
2474 * EBUSY A PCI function is already bound to an IOTSB at the same
2475 * address range as specified by devhandle, iotsb_handle.
2476 *
2477 * This service binds the PCI function specified by the argument pci_device to
2478 * the IOTSB specified by the arguments devhandle and iotsb_handle.
2479 *
2480 * The PCI device function is bound to the specified IOTSB with the IOVA range
2481 * specified when the IOTSB was configured via pci_iotsb_conf. If the function
2482 * is already bound then it is unbound first.
2483 */
2484#define HV_FAST_PCI_IOTSB_BIND 0x193
2485
2486/* pci_iotsb_unbind()
2487 * TRAP: HV_FAST_TRAP
2488 * FUNCTION: HV_FAST_PCI_IOTSB_UNBIND
2489 * ARG0: devhandle
2490 * ARG1: iotsb_handle
2491 * ARG2: pci_device
2492 * RET0: status
2493 * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
2494 * ENOMAP The PCI function was not bound to the specified IOTSB
2495 *
2496 * This service unbinds the PCI device specified by the argument pci_device
2497 * from the IOTSB identified * by the arguments devhandle and iotsb_handle.
2498 *
2499 * If the PCI device is not bound to the specified IOTSB then this service will
2500 * fail with status ENOMAP
2501 */
2502#define HV_FAST_PCI_IOTSB_UNBIND 0x194
2503
2504/* pci_iotsb_get_binding()
2505 * TRAP: HV_FAST_TRAP
2506 * FUNCTION: HV_FAST_PCI_IOTSB_GET_BINDING
2507 * ARG0: devhandle
2508 * ARG1: iotsb_handle
2509 * ARG2: iova
2510 * RET0: status
2511 * RET1: iotsb_handle
2512 * ERRORS: EINVAL Invalid devhandle, pci_device, or iova
2513 * ENOMAP The PCI function is not bound to an IOTSB at iova
2514 *
2515 * This service returns the IOTSB binding, iotsb_handle, for a given pci_device
2516 * and DMA virtual address, iova.
2517 *
2518 * iova must be the base address of a DMA virtual address range as defined by
2519 * the iommu-address-ranges property in the root complex device node defined
2520 * by the argument devhandle.
2521 */
2522#define HV_FAST_PCI_IOTSB_GET_BINDING 0x195
2523
2524/* pci_iotsb_map()
2525 * TRAP: HV_FAST_TRAP
2526 * FUNCTION: HV_FAST_PCI_IOTSB_MAP
2527 * ARG0: devhandle
2528 * ARG1: iotsb_handle
2529 * ARG2: index_count
2530 * ARG3: iotte_attributes
2531 * ARG4: io_page_list_p
2532 * RET0: status
2533 * RET1: #mapped
2534 * ERRORS: EINVAL Invalid devhandle, iotsb_handle, #iottes,
2535 * iotsb_index or iotte_attributes
2536 * EBADALIGN Improperly aligned io_page_list_p or I/O page
2537 * address in the I/O page list.
2538 * ENORADDR Invalid io_page_list_p or I/O page address in
2539 * the I/O page list.
2540 *
2541 * This service creates and flushes mappings in the IOTSB defined by the
2542 * arguments devhandle, iotsb.
2543 *
2544 * The index_count argument consists of two fields. Bits 63:48 contain #iotte
2545 * and bits 47:0 contain iotsb_index
2546 *
2547 * The first mapping is created in the IOTSB index specified by iotsb_index.
2548 * Subsequent mappings are created at iotsb_index+1 and so on.
2549 *
2550 * The attributes of each mapping are defined by the argument iotte_attributes.
2551 *
2552 * The io_page_list_p specifies the real address of the 64-bit-aligned list of
2553 * #iottes I/O page addresses. Each page address must be a properly aligned
2554 * real address of a page to be mapped in the IOTSB. The first entry in the I/O
2555 * page list contains the real address of the first page, the 2nd entry for the
2556 * 2nd page, and so on.
2557 *
2558 * #iottes must be greater than zero.
2559 *
2560 * The return value #mapped is the actual number of mappings created, which may
2561 * be less than or equal to the argument #iottes. If the function returns
2562 * successfully with a #mapped value less than the requested #iottes then the
2563 * caller should continue to invoke the service with updated iotsb_index,
2564 * #iottes, and io_page_list_p arguments until all pages are mapped.
2565 *
2566 * This service must not be used to demap a mapping. In other words, all
2567 * mappings must be valid and have one or both of the RW attribute bits set.
2568 *
2569 * Note:
2570 * It is implementation-defined whether I/O page real address validity checking
2571 * is done at time mappings are established or deferred until they are
2572 * accessed.
2573 */
2574#define HV_FAST_PCI_IOTSB_MAP 0x196
2575
2576/* pci_iotsb_map_one()
2577 * TRAP: HV_FAST_TRAP
2578 * FUNCTION: HV_FAST_PCI_IOTSB_MAP_ONE
2579 * ARG0: devhandle
2580 * ARG1: iotsb_handle
2581 * ARG2: iotsb_index
2582 * ARG3: iotte_attributes
2583 * ARG4: r_addr
2584 * RET0: status
2585 * ERRORS: EINVAL Invalid devhandle,iotsb_handle, iotsb_index
2586 * or iotte_attributes
2587 * EBADALIGN Improperly aligned r_addr
2588 * ENORADDR Invalid r_addr
2589 *
2590 * This service creates and flushes a single mapping in the IOTSB defined by the
2591 * arguments devhandle, iotsb.
2592 *
2593 * The mapping for the page at r_addr is created at the IOTSB index specified by
2594 * iotsb_index with the attributes iotte_attributes.
2595 *
2596 * This service must not be used to demap a mapping. In other words, the mapping
2597 * must be valid and have one or both of the RW attribute bits set.
2598 *
2599 * Note:
2600 * It is implementation-defined whether I/O page real address validity checking
2601 * is done at time mappings are established or deferred until they are
2602 * accessed.
2603 */
2604#define HV_FAST_PCI_IOTSB_MAP_ONE 0x197
2605
2606/* pci_iotsb_demap()
2607 * TRAP: HV_FAST_TRAP
2608 * FUNCTION: HV_FAST_PCI_IOTSB_DEMAP
2609 * ARG0: devhandle
2610 * ARG1: iotsb_handle
2611 * ARG2: iotsb_index
2612 * ARG3: #iottes
2613 * RET0: status
2614 * RET1: #unmapped
2615 * ERRORS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index or #iottes
2616 *
2617 * This service unmaps and flushes up to #iottes mappings starting at index
2618 * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb.
2619 *
2620 * #iottes must be greater than zero.
2621 *
2622 * The actual number of IOTTEs unmapped is returned in #unmapped and may be less
2623 * than or equal to the requested number of IOTTEs, #iottes.
2624 *
2625 * If #unmapped is less than #iottes, the caller should continue to invoke this
2626 * service with updated iotsb_index and #iottes arguments until all pages are
2627 * demapped.
2628 */
2629#define HV_FAST_PCI_IOTSB_DEMAP 0x198
2630
2631/* pci_iotsb_getmap()
2632 * TRAP: HV_FAST_TRAP
2633 * FUNCTION: HV_FAST_PCI_IOTSB_GETMAP
2634 * ARG0: devhandle
2635 * ARG1: iotsb_handle
2636 * ARG2: iotsb_index
2637 * RET0: status
2638 * RET1: r_addr
2639 * RET2: iotte_attributes
2640 * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or iotsb_index
2641 * ENOMAP No mapping was found
2642 *
2643 * This service returns the mapping specified by index iotsb_index from the
2644 * IOTSB defined by the arguments devhandle, iotsb.
2645 *
2646 * Upon success, the real address of the mapping shall be returned in
2647 * r_addr and thethe IOTTE mapping attributes shall be returned in
2648 * iotte_attributes.
2649 *
2650 * The return value iotte_attributes may not include optional features used in
2651 * the call to create the mapping.
2652 */
2653#define HV_FAST_PCI_IOTSB_GETMAP 0x199
2654
2655/* pci_iotsb_sync_mappings()
2656 * TRAP: HV_FAST_TRAP
2657 * FUNCTION: HV_FAST_PCI_IOTSB_SYNC_MAPPINGS
2658 * ARG0: devhandle
2659 * ARG1: iotsb_handle
2660 * ARG2: iotsb_index
2661 * ARG3: #iottes
2662 * RET0: status
2663 * RET1: #synced
2664 * ERROS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index, or #iottes
2665 *
2666 * This service synchronizes #iottes mappings starting at index iotsb_index in
2667 * the IOTSB defined by the arguments devhandle, iotsb.
2668 *
2669 * #iottes must be greater than zero.
2670 *
2671 * The actual number of IOTTEs synchronized is returned in #synced, which may
2672 * be less than or equal to the requested number, #iottes.
2673 *
2674 * Upon a successful return, #synced is less than #iottes, the caller should
2675 * continue to invoke this service with updated iotsb_index and #iottes
2676 * arguments until all pages are synchronized.
2677 */
2678#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS 0x19a
2679
2338/* Logical Domain Channel services. */ 2680/* Logical Domain Channel services. */
2339 2681
2340#define LDC_CHANNEL_DOWN 0 2682#define LDC_CHANNEL_DOWN 0
@@ -2993,6 +3335,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
2993#define HV_GRP_SDIO 0x0108 3335#define HV_GRP_SDIO 0x0108
2994#define HV_GRP_SDIO_ERR 0x0109 3336#define HV_GRP_SDIO_ERR 0x0109
2995#define HV_GRP_REBOOT_DATA 0x0110 3337#define HV_GRP_REBOOT_DATA 0x0110
3338#define HV_GRP_ATU 0x0111
2996#define HV_GRP_M7_PERF 0x0114 3339#define HV_GRP_M7_PERF 0x0114
2997#define HV_GRP_NIAG_PERF 0x0200 3340#define HV_GRP_NIAG_PERF 0x0200
2998#define HV_GRP_FIRE_PERF 0x0201 3341#define HV_GRP_FIRE_PERF 0x0201
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index cd0d69fa7592..f24f356f2503 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -24,8 +24,36 @@ struct iommu_arena {
24 unsigned int limit; 24 unsigned int limit;
25}; 25};
26 26
27#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
28
29/* Data structures for SPARC ATU architecture */
30struct atu_iotsb {
31 void *table; /* IOTSB table base virtual addr*/
32 u64 ra; /* IOTSB table real addr */
33 u64 dvma_size; /* ranges[3].size or OS slected 32G size */
34 u64 dvma_base; /* ranges[3].base */
35 u64 table_size; /* IOTSB table size */
36 u64 page_size; /* IO PAGE size for IOTSB */
37 u32 iotsb_num; /* tsbnum is same as iotsb_handle */
38};
39
40struct atu_ranges {
41 u64 base;
42 u64 size;
43};
44
45struct atu {
46 struct atu_ranges *ranges;
47 struct atu_iotsb *iotsb;
48 struct iommu_map_table tbl;
49 u64 base;
50 u64 size;
51 u64 dma_addr_mask;
52};
53
27struct iommu { 54struct iommu {
28 struct iommu_map_table tbl; 55 struct iommu_map_table tbl;
56 struct atu *atu;
29 spinlock_t lock; 57 spinlock_t lock;
30 u32 dma_addr_mask; 58 u32 dma_addr_mask;
31 iopte_t *page_table; 59 iopte_t *page_table;
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 662500fa555f..267731234ce8 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
39 { .group = HV_GRP_SDIO, }, 39 { .group = HV_GRP_SDIO, },
40 { .group = HV_GRP_SDIO_ERR, }, 40 { .group = HV_GRP_SDIO_ERR, },
41 { .group = HV_GRP_REBOOT_DATA, }, 41 { .group = HV_GRP_REBOOT_DATA, },
42 { .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
42 { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, 43 { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
43 { .group = HV_GRP_FIRE_PERF, }, 44 { .group = HV_GRP_FIRE_PERF, },
44 { .group = HV_GRP_N2_CPU, }, 45 { .group = HV_GRP_N2_CPU, },
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5c615abff030..852a3291db96 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
760 struct iommu *iommu = dev->archdata.iommu; 760 struct iommu *iommu = dev->archdata.iommu;
761 u64 dma_addr_mask = iommu->dma_addr_mask; 761 u64 dma_addr_mask = iommu->dma_addr_mask;
762 762
763 if (device_mask >= (1UL << 32UL)) 763 if (device_mask > DMA_BIT_MASK(32)) {
764 return 0; 764 if (iommu->atu)
765 dma_addr_mask = iommu->atu->dma_addr_mask;
766 else
767 return 0;
768 }
765 769
766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 770 if ((device_mask & dma_addr_mask) == dma_addr_mask)
767 return 1; 771 return 1;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index db57d8acdc01..06981cc716b6 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
44 { .major = 1, .minor = 1 }, 44 { .major = 1, .minor = 1 },
45}; 45};
46 46
47static unsigned long vatu_major = 1;
48static unsigned long vatu_minor = 1;
49
47#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) 50#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
48 51
49struct iommu_batch { 52struct iommu_batch {
@@ -69,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
69} 72}
70 73
71/* Interrupts must be disabled. */ 74/* Interrupts must be disabled. */
72static long iommu_batch_flush(struct iommu_batch *p) 75static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
73{ 76{
74 struct pci_pbm_info *pbm = p->dev->archdata.host_controller; 77 struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
78 u64 *pglist = p->pglist;
79 u64 index_count;
75 unsigned long devhandle = pbm->devhandle; 80 unsigned long devhandle = pbm->devhandle;
76 unsigned long prot = p->prot; 81 unsigned long prot = p->prot;
77 unsigned long entry = p->entry; 82 unsigned long entry = p->entry;
78 u64 *pglist = p->pglist;
79 unsigned long npages = p->npages; 83 unsigned long npages = p->npages;
84 unsigned long iotsb_num;
85 unsigned long ret;
86 long num;
80 87
81 /* VPCI maj=1, min=[0,1] only supports read and write */ 88 /* VPCI maj=1, min=[0,1] only supports read and write */
82 if (vpci_major < 2) 89 if (vpci_major < 2)
83 prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); 90 prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
84 91
85 while (npages != 0) { 92 while (npages != 0) {
86 long num; 93 if (mask <= DMA_BIT_MASK(32)) {
87 94 num = pci_sun4v_iommu_map(devhandle,
88 num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), 95 HV_PCI_TSBID(0, entry),
89 npages, prot, __pa(pglist)); 96 npages,
90 if (unlikely(num < 0)) { 97 prot,
91 if (printk_ratelimit()) 98 __pa(pglist));
92 printk("iommu_batch_flush: IOMMU map of " 99 if (unlikely(num < 0)) {
93 "[%08lx:%08llx:%lx:%lx:%lx] failed with " 100 pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
94 "status %ld\n", 101 __func__,
95 devhandle, HV_PCI_TSBID(0, entry), 102 devhandle,
96 npages, prot, __pa(pglist), num); 103 HV_PCI_TSBID(0, entry),
97 return -1; 104 npages, prot, __pa(pglist),
105 num);
106 return -1;
107 }
108 } else {
109 index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
110 iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
111 ret = pci_sun4v_iotsb_map(devhandle,
112 iotsb_num,
113 index_count,
114 prot,
115 __pa(pglist),
116 &num);
117 if (unlikely(ret != HV_EOK)) {
118 pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
119 __func__,
120 devhandle, iotsb_num,
121 index_count, prot,
122 __pa(pglist), ret);
123 return -1;
124 }
98 } 125 }
99
100 entry += num; 126 entry += num;
101 npages -= num; 127 npages -= num;
102 pglist += num; 128 pglist += num;
@@ -108,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p)
108 return 0; 134 return 0;
109} 135}
110 136
111static inline void iommu_batch_new_entry(unsigned long entry) 137static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
112{ 138{
113 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 139 struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
114 140
115 if (p->entry + p->npages == entry) 141 if (p->entry + p->npages == entry)
116 return; 142 return;
117 if (p->entry != ~0UL) 143 if (p->entry != ~0UL)
118 iommu_batch_flush(p); 144 iommu_batch_flush(p, mask);
119 p->entry = entry; 145 p->entry = entry;
120} 146}
121 147
122/* Interrupts must be disabled. */ 148/* Interrupts must be disabled. */
123static inline long iommu_batch_add(u64 phys_page) 149static inline long iommu_batch_add(u64 phys_page, u64 mask)
124{ 150{
125 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 151 struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
126 152
@@ -128,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page)
128 154
129 p->pglist[p->npages++] = phys_page; 155 p->pglist[p->npages++] = phys_page;
130 if (p->npages == PGLIST_NENTS) 156 if (p->npages == PGLIST_NENTS)
131 return iommu_batch_flush(p); 157 return iommu_batch_flush(p, mask);
132 158
133 return 0; 159 return 0;
134} 160}
135 161
136/* Interrupts must be disabled. */ 162/* Interrupts must be disabled. */
137static inline long iommu_batch_end(void) 163static inline long iommu_batch_end(u64 mask)
138{ 164{
139 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 165 struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
140 166
141 BUG_ON(p->npages >= PGLIST_NENTS); 167 BUG_ON(p->npages >= PGLIST_NENTS);
142 168
143 return iommu_batch_flush(p); 169 return iommu_batch_flush(p, mask);
144} 170}
145 171
146static void *dma_4v_alloc_coherent(struct device *dev, size_t size, 172static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
147 dma_addr_t *dma_addrp, gfp_t gfp, 173 dma_addr_t *dma_addrp, gfp_t gfp,
148 unsigned long attrs) 174 unsigned long attrs)
149{ 175{
176 u64 mask;
150 unsigned long flags, order, first_page, npages, n; 177 unsigned long flags, order, first_page, npages, n;
151 unsigned long prot = 0; 178 unsigned long prot = 0;
152 struct iommu *iommu; 179 struct iommu *iommu;
180 struct atu *atu;
181 struct iommu_map_table *tbl;
153 struct page *page; 182 struct page *page;
154 void *ret; 183 void *ret;
155 long entry; 184 long entry;
@@ -174,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
174 memset((char *)first_page, 0, PAGE_SIZE << order); 203 memset((char *)first_page, 0, PAGE_SIZE << order);
175 204
176 iommu = dev->archdata.iommu; 205 iommu = dev->archdata.iommu;
206 atu = iommu->atu;
207
208 mask = dev->coherent_dma_mask;
209 if (mask <= DMA_BIT_MASK(32))
210 tbl = &iommu->tbl;
211 else
212 tbl = &atu->tbl;
177 213
178 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, 214 entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
179 (unsigned long)(-1), 0); 215 (unsigned long)(-1), 0);
180 216
181 if (unlikely(entry == IOMMU_ERROR_CODE)) 217 if (unlikely(entry == IOMMU_ERROR_CODE))
182 goto range_alloc_fail; 218 goto range_alloc_fail;
183 219
184 *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); 220 *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
185 ret = (void *) first_page; 221 ret = (void *) first_page;
186 first_page = __pa(first_page); 222 first_page = __pa(first_page);
187 223
@@ -193,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
193 entry); 229 entry);
194 230
195 for (n = 0; n < npages; n++) { 231 for (n = 0; n < npages; n++) {
196 long err = iommu_batch_add(first_page + (n * PAGE_SIZE)); 232 long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
197 if (unlikely(err < 0L)) 233 if (unlikely(err < 0L))
198 goto iommu_map_fail; 234 goto iommu_map_fail;
199 } 235 }
200 236
201 if (unlikely(iommu_batch_end() < 0L)) 237 if (unlikely(iommu_batch_end(mask) < 0L))
202 goto iommu_map_fail; 238 goto iommu_map_fail;
203 239
204 local_irq_restore(flags); 240 local_irq_restore(flags);
@@ -206,25 +242,71 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
206 return ret; 242 return ret;
207 243
208iommu_map_fail: 244iommu_map_fail:
209 iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); 245 iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
210 246
211range_alloc_fail: 247range_alloc_fail:
212 free_pages(first_page, order); 248 free_pages(first_page, order);
213 return NULL; 249 return NULL;
214} 250}
215 251
216static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, 252unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
217 unsigned long npages) 253 unsigned long iotsb_num,
254 struct pci_bus *bus_dev)
255{
256 struct pci_dev *pdev;
257 unsigned long err;
258 unsigned int bus;
259 unsigned int device;
260 unsigned int fun;
261
262 list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
263 if (pdev->subordinate) {
264 /* No need to bind pci bridge */
265 dma_4v_iotsb_bind(devhandle, iotsb_num,
266 pdev->subordinate);
267 } else {
268 bus = bus_dev->number;
269 device = PCI_SLOT(pdev->devfn);
270 fun = PCI_FUNC(pdev->devfn);
271 err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
272 HV_PCI_DEVICE_BUILD(bus,
273 device,
274 fun));
275
276 /* If bind fails for one device it is going to fail
277 * for rest of the devices because we are sharing
278 * IOTSB. So in case of failure simply return with
279 * error.
280 */
281 if (err)
282 return err;
283 }
284 }
285
286 return 0;
287}
288
289static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
290 dma_addr_t dvma, unsigned long iotsb_num,
291 unsigned long entry, unsigned long npages)
218{ 292{
219 u32 devhandle = *(u32 *)demap_arg;
220 unsigned long num, flags; 293 unsigned long num, flags;
294 unsigned long ret;
221 295
222 local_irq_save(flags); 296 local_irq_save(flags);
223 do { 297 do {
224 num = pci_sun4v_iommu_demap(devhandle, 298 if (dvma <= DMA_BIT_MASK(32)) {
225 HV_PCI_TSBID(0, entry), 299 num = pci_sun4v_iommu_demap(devhandle,
226 npages); 300 HV_PCI_TSBID(0, entry),
227 301 npages);
302 } else {
303 ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
304 entry, npages, &num);
305 if (unlikely(ret != HV_EOK)) {
306 pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
307 ret);
308 }
309 }
228 entry += num; 310 entry += num;
229 npages -= num; 311 npages -= num;
230 } while (npages != 0); 312 } while (npages != 0);
@@ -236,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
236{ 318{
237 struct pci_pbm_info *pbm; 319 struct pci_pbm_info *pbm;
238 struct iommu *iommu; 320 struct iommu *iommu;
321 struct atu *atu;
322 struct iommu_map_table *tbl;
239 unsigned long order, npages, entry; 323 unsigned long order, npages, entry;
324 unsigned long iotsb_num;
240 u32 devhandle; 325 u32 devhandle;
241 326
242 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 327 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
243 iommu = dev->archdata.iommu; 328 iommu = dev->archdata.iommu;
244 pbm = dev->archdata.host_controller; 329 pbm = dev->archdata.host_controller;
330 atu = iommu->atu;
245 devhandle = pbm->devhandle; 331 devhandle = pbm->devhandle;
246 entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); 332
247 dma_4v_iommu_demap(&devhandle, entry, npages); 333 if (dvma <= DMA_BIT_MASK(32)) {
248 iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); 334 tbl = &iommu->tbl;
335 iotsb_num = 0; /* we don't care for legacy iommu */
336 } else {
337 tbl = &atu->tbl;
338 iotsb_num = atu->iotsb->iotsb_num;
339 }
340 entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
341 dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
342 iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
249 order = get_order(size); 343 order = get_order(size);
250 if (order < 10) 344 if (order < 10)
251 free_pages((unsigned long)cpu, order); 345 free_pages((unsigned long)cpu, order);
@@ -257,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
257 unsigned long attrs) 351 unsigned long attrs)
258{ 352{
259 struct iommu *iommu; 353 struct iommu *iommu;
354 struct atu *atu;
355 struct iommu_map_table *tbl;
356 u64 mask;
260 unsigned long flags, npages, oaddr; 357 unsigned long flags, npages, oaddr;
261 unsigned long i, base_paddr; 358 unsigned long i, base_paddr;
262 u32 bus_addr, ret;
263 unsigned long prot; 359 unsigned long prot;
360 dma_addr_t bus_addr, ret;
264 long entry; 361 long entry;
265 362
266 iommu = dev->archdata.iommu; 363 iommu = dev->archdata.iommu;
364 atu = iommu->atu;
267 365
268 if (unlikely(direction == DMA_NONE)) 366 if (unlikely(direction == DMA_NONE))
269 goto bad; 367 goto bad;
@@ -272,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
272 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 370 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
273 npages >>= IO_PAGE_SHIFT; 371 npages >>= IO_PAGE_SHIFT;
274 372
275 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, 373 mask = *dev->dma_mask;
374 if (mask <= DMA_BIT_MASK(32))
375 tbl = &iommu->tbl;
376 else
377 tbl = &atu->tbl;
378
379 entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
276 (unsigned long)(-1), 0); 380 (unsigned long)(-1), 0);
277 381
278 if (unlikely(entry == IOMMU_ERROR_CODE)) 382 if (unlikely(entry == IOMMU_ERROR_CODE))
279 goto bad; 383 goto bad;
280 384
281 bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); 385 bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
282 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 386 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
283 base_paddr = __pa(oaddr & IO_PAGE_MASK); 387 base_paddr = __pa(oaddr & IO_PAGE_MASK);
284 prot = HV_PCI_MAP_ATTR_READ; 388 prot = HV_PCI_MAP_ATTR_READ;
@@ -293,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
293 iommu_batch_start(dev, prot, entry); 397 iommu_batch_start(dev, prot, entry);
294 398
295 for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { 399 for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
296 long err = iommu_batch_add(base_paddr); 400 long err = iommu_batch_add(base_paddr, mask);
297 if (unlikely(err < 0L)) 401 if (unlikely(err < 0L))
298 goto iommu_map_fail; 402 goto iommu_map_fail;
299 } 403 }
300 if (unlikely(iommu_batch_end() < 0L)) 404 if (unlikely(iommu_batch_end(mask) < 0L))
301 goto iommu_map_fail; 405 goto iommu_map_fail;
302 406
303 local_irq_restore(flags); 407 local_irq_restore(flags);
@@ -310,7 +414,7 @@ bad:
310 return DMA_ERROR_CODE; 414 return DMA_ERROR_CODE;
311 415
312iommu_map_fail: 416iommu_map_fail:
313 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); 417 iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
314 return DMA_ERROR_CODE; 418 return DMA_ERROR_CODE;
315} 419}
316 420
@@ -320,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
320{ 424{
321 struct pci_pbm_info *pbm; 425 struct pci_pbm_info *pbm;
322 struct iommu *iommu; 426 struct iommu *iommu;
427 struct atu *atu;
428 struct iommu_map_table *tbl;
323 unsigned long npages; 429 unsigned long npages;
430 unsigned long iotsb_num;
324 long entry; 431 long entry;
325 u32 devhandle; 432 u32 devhandle;
326 433
@@ -332,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
332 439
333 iommu = dev->archdata.iommu; 440 iommu = dev->archdata.iommu;
334 pbm = dev->archdata.host_controller; 441 pbm = dev->archdata.host_controller;
442 atu = iommu->atu;
335 devhandle = pbm->devhandle; 443 devhandle = pbm->devhandle;
336 444
337 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 445 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
338 npages >>= IO_PAGE_SHIFT; 446 npages >>= IO_PAGE_SHIFT;
339 bus_addr &= IO_PAGE_MASK; 447 bus_addr &= IO_PAGE_MASK;
340 entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; 448
341 dma_4v_iommu_demap(&devhandle, entry, npages); 449 if (bus_addr <= DMA_BIT_MASK(32)) {
342 iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); 450 iotsb_num = 0; /* we don't care for legacy iommu */
451 tbl = &iommu->tbl;
452 } else {
453 iotsb_num = atu->iotsb->iotsb_num;
454 tbl = &atu->tbl;
455 }
456 entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
457 dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
458 iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
343} 459}
344 460
345static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 461static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -353,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
353 unsigned long seg_boundary_size; 469 unsigned long seg_boundary_size;
354 int outcount, incount, i; 470 int outcount, incount, i;
355 struct iommu *iommu; 471 struct iommu *iommu;
472 struct atu *atu;
473 struct iommu_map_table *tbl;
474 u64 mask;
356 unsigned long base_shift; 475 unsigned long base_shift;
357 long err; 476 long err;
358 477
359 BUG_ON(direction == DMA_NONE); 478 BUG_ON(direction == DMA_NONE);
360 479
361 iommu = dev->archdata.iommu; 480 iommu = dev->archdata.iommu;
481 atu = iommu->atu;
482
362 if (nelems == 0 || !iommu) 483 if (nelems == 0 || !iommu)
363 return 0; 484 return 0;
364 485
@@ -384,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
384 max_seg_size = dma_get_max_seg_size(dev); 505 max_seg_size = dma_get_max_seg_size(dev);
385 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 506 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
386 IO_PAGE_SIZE) >> IO_PAGE_SHIFT; 507 IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
387 base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; 508
509 mask = *dev->dma_mask;
510 if (mask <= DMA_BIT_MASK(32))
511 tbl = &iommu->tbl;
512 else
513 tbl = &atu->tbl;
514
515 base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
516
388 for_each_sg(sglist, s, nelems, i) { 517 for_each_sg(sglist, s, nelems, i) {
389 unsigned long paddr, npages, entry, out_entry = 0, slen; 518 unsigned long paddr, npages, entry, out_entry = 0, slen;
390 519
@@ -397,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
397 /* Allocate iommu entries for that segment */ 526 /* Allocate iommu entries for that segment */
398 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 527 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
399 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); 528 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
400 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, 529 entry = iommu_tbl_range_alloc(dev, tbl, npages,
401 &handle, (unsigned long)(-1), 0); 530 &handle, (unsigned long)(-1), 0);
402 531
403 /* Handle failure */ 532 /* Handle failure */
404 if (unlikely(entry == IOMMU_ERROR_CODE)) { 533 if (unlikely(entry == IOMMU_ERROR_CODE)) {
405 if (printk_ratelimit()) 534 pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
406 printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" 535 tbl, paddr, npages);
407 " npages %lx\n", iommu, paddr, npages);
408 goto iommu_map_failed; 536 goto iommu_map_failed;
409 } 537 }
410 538
411 iommu_batch_new_entry(entry); 539 iommu_batch_new_entry(entry, mask);
412 540
413 /* Convert entry to a dma_addr_t */ 541 /* Convert entry to a dma_addr_t */
414 dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT); 542 dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
415 dma_addr |= (s->offset & ~IO_PAGE_MASK); 543 dma_addr |= (s->offset & ~IO_PAGE_MASK);
416 544
417 /* Insert into HW table */ 545 /* Insert into HW table */
418 paddr &= IO_PAGE_MASK; 546 paddr &= IO_PAGE_MASK;
419 while (npages--) { 547 while (npages--) {
420 err = iommu_batch_add(paddr); 548 err = iommu_batch_add(paddr, mask);
421 if (unlikely(err < 0L)) 549 if (unlikely(err < 0L))
422 goto iommu_map_failed; 550 goto iommu_map_failed;
423 paddr += IO_PAGE_SIZE; 551 paddr += IO_PAGE_SIZE;
@@ -452,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
452 dma_next = dma_addr + slen; 580 dma_next = dma_addr + slen;
453 } 581 }
454 582
455 err = iommu_batch_end(); 583 err = iommu_batch_end(mask);
456 584
457 if (unlikely(err < 0L)) 585 if (unlikely(err < 0L))
458 goto iommu_map_failed; 586 goto iommu_map_failed;
@@ -475,7 +603,7 @@ iommu_map_failed:
475 vaddr = s->dma_address & IO_PAGE_MASK; 603 vaddr = s->dma_address & IO_PAGE_MASK;
476 npages = iommu_num_pages(s->dma_address, s->dma_length, 604 npages = iommu_num_pages(s->dma_address, s->dma_length,
477 IO_PAGE_SIZE); 605 IO_PAGE_SIZE);
478 iommu_tbl_range_free(&iommu->tbl, vaddr, npages, 606 iommu_tbl_range_free(tbl, vaddr, npages,
479 IOMMU_ERROR_CODE); 607 IOMMU_ERROR_CODE);
480 /* XXX demap? XXX */ 608 /* XXX demap? XXX */
481 s->dma_address = DMA_ERROR_CODE; 609 s->dma_address = DMA_ERROR_CODE;
@@ -496,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
496 struct pci_pbm_info *pbm; 624 struct pci_pbm_info *pbm;
497 struct scatterlist *sg; 625 struct scatterlist *sg;
498 struct iommu *iommu; 626 struct iommu *iommu;
627 struct atu *atu;
499 unsigned long flags, entry; 628 unsigned long flags, entry;
629 unsigned long iotsb_num;
500 u32 devhandle; 630 u32 devhandle;
501 631
502 BUG_ON(direction == DMA_NONE); 632 BUG_ON(direction == DMA_NONE);
503 633
504 iommu = dev->archdata.iommu; 634 iommu = dev->archdata.iommu;
505 pbm = dev->archdata.host_controller; 635 pbm = dev->archdata.host_controller;
636 atu = iommu->atu;
506 devhandle = pbm->devhandle; 637 devhandle = pbm->devhandle;
507 638
508 local_irq_save(flags); 639 local_irq_save(flags);
@@ -512,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
512 dma_addr_t dma_handle = sg->dma_address; 643 dma_addr_t dma_handle = sg->dma_address;
513 unsigned int len = sg->dma_length; 644 unsigned int len = sg->dma_length;
514 unsigned long npages; 645 unsigned long npages;
515 struct iommu_map_table *tbl = &iommu->tbl; 646 struct iommu_map_table *tbl;
516 unsigned long shift = IO_PAGE_SHIFT; 647 unsigned long shift = IO_PAGE_SHIFT;
517 648
518 if (!len) 649 if (!len)
519 break; 650 break;
520 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); 651 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
652
653 if (dma_handle <= DMA_BIT_MASK(32)) {
654 iotsb_num = 0; /* we don't care for legacy iommu */
655 tbl = &iommu->tbl;
656 } else {
657 iotsb_num = atu->iotsb->iotsb_num;
658 tbl = &atu->tbl;
659 }
521 entry = ((dma_handle - tbl->table_map_base) >> shift); 660 entry = ((dma_handle - tbl->table_map_base) >> shift);
522 dma_4v_iommu_demap(&devhandle, entry, npages); 661 dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
523 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, 662 entry, npages);
663 iommu_tbl_range_free(tbl, dma_handle, npages,
524 IOMMU_ERROR_CODE); 664 IOMMU_ERROR_CODE);
525 sg = sg_next(sg); 665 sg = sg_next(sg);
526 } 666 }
@@ -581,6 +721,132 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
581 return cnt; 721 return cnt;
582} 722}
583 723
724static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
725{
726 struct atu *atu = pbm->iommu->atu;
727 struct atu_iotsb *iotsb;
728 void *table;
729 u64 table_size;
730 u64 iotsb_num;
731 unsigned long order;
732 unsigned long err;
733
734 iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
735 if (!iotsb) {
736 err = -ENOMEM;
737 goto out_err;
738 }
739 atu->iotsb = iotsb;
740
741 /* calculate size of IOTSB */
742 table_size = (atu->size / IO_PAGE_SIZE) * 8;
743 order = get_order(table_size);
744 table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
745 if (!table) {
746 err = -ENOMEM;
747 goto table_failed;
748 }
749 iotsb->table = table;
750 iotsb->ra = __pa(table);
751 iotsb->dvma_size = atu->size;
752 iotsb->dvma_base = atu->base;
753 iotsb->table_size = table_size;
754 iotsb->page_size = IO_PAGE_SIZE;
755
756 /* configure and register IOTSB with HV */
757 err = pci_sun4v_iotsb_conf(pbm->devhandle,
758 iotsb->ra,
759 iotsb->table_size,
760 iotsb->page_size,
761 iotsb->dvma_base,
762 &iotsb_num);
763 if (err) {
764 pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
765 goto iotsb_conf_failed;
766 }
767 iotsb->iotsb_num = iotsb_num;
768
769 err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
770 if (err) {
771 pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
772 goto iotsb_conf_failed;
773 }
774
775 return 0;
776
777iotsb_conf_failed:
778 free_pages((unsigned long)table, order);
779table_failed:
780 kfree(iotsb);
781out_err:
782 return err;
783}
784
785static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
786{
787 struct atu *atu = pbm->iommu->atu;
788 unsigned long err;
789 const u64 *ranges;
790 u64 map_size, num_iotte;
791 u64 dma_mask;
792 const u32 *page_size;
793 int len;
794
795 ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
796 &len);
797 if (!ranges) {
798 pr_err(PFX "No iommu-address-ranges\n");
799 return -EINVAL;
800 }
801
802 page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
803 NULL);
804 if (!page_size) {
805 pr_err(PFX "No iommu-pagesizes\n");
806 return -EINVAL;
807 }
808
809 /* There are 4 iommu-address-ranges supported. Each range is pair of
810 * {base, size}. The ranges[0] and ranges[1] are 32bit address space
811 * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit
812 * address ranges to support 64bit addressing. Because 'size' for
813 * address ranges[2] and ranges[3] are same we can select either of
814 * ranges[2] or ranges[3] for mapping. However due to 'size' is too
815 * large for OS to allocate IOTSB we are using fix size 32G
816 * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
817 * to share.
818 */
819 atu->ranges = (struct atu_ranges *)ranges;
820 atu->base = atu->ranges[3].base;
821 atu->size = ATU_64_SPACE_SIZE;
822
823 /* Create IOTSB */
824 err = pci_sun4v_atu_alloc_iotsb(pbm);
825 if (err) {
826 pr_err(PFX "Error creating ATU IOTSB\n");
827 return err;
828 }
829
830 /* Create ATU iommu map.
831 * One bit represents one iotte in IOTSB table.
832 */
833 dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
834 num_iotte = atu->size / IO_PAGE_SIZE;
835 map_size = num_iotte / 8;
836 atu->tbl.table_map_base = atu->base;
837 atu->dma_addr_mask = dma_mask;
838 atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
839 if (!atu->tbl.map)
840 return -ENOMEM;
841
842 iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
843 NULL, false /* no large_pool */,
844 0 /* default npools */,
845 false /* want span boundary checking */);
846
847 return 0;
848}
849
584static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) 850static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
585{ 851{
586 static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; 852 static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
@@ -918,6 +1184,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
918 1184
919 pci_sun4v_scan_bus(pbm, &op->dev); 1185 pci_sun4v_scan_bus(pbm, &op->dev);
920 1186
1187 /* if atu_init fails its not complete failure.
1188 * we can still continue using legacy iommu.
1189 */
1190 if (pbm->iommu->atu) {
1191 err = pci_sun4v_atu_init(pbm);
1192 if (err) {
1193 kfree(pbm->iommu->atu);
1194 pbm->iommu->atu = NULL;
1195 pr_err(PFX "ATU init failed, err=%d\n", err);
1196 }
1197 }
1198
921 pbm->next = pci_pbm_root; 1199 pbm->next = pci_pbm_root;
922 pci_pbm_root = pbm; 1200 pci_pbm_root = pbm;
923 1201
@@ -931,8 +1209,10 @@ static int pci_sun4v_probe(struct platform_device *op)
931 struct pci_pbm_info *pbm; 1209 struct pci_pbm_info *pbm;
932 struct device_node *dp; 1210 struct device_node *dp;
933 struct iommu *iommu; 1211 struct iommu *iommu;
1212 struct atu *atu;
934 u32 devhandle; 1213 u32 devhandle;
935 int i, err = -ENODEV; 1214 int i, err = -ENODEV;
1215 static bool hv_atu = true;
936 1216
937 dp = op->dev.of_node; 1217 dp = op->dev.of_node;
938 1218
@@ -954,6 +1234,19 @@ static int pci_sun4v_probe(struct platform_device *op)
954 pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", 1234 pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
955 vpci_major, vpci_minor); 1235 vpci_major, vpci_minor);
956 1236
1237 err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
1238 if (err) {
1239 /* don't return an error if we fail to register the
1240 * ATU group, but ATU hcalls won't be available.
1241 */
1242 hv_atu = false;
1243 pr_err(PFX "Could not register hvapi ATU err=%d\n",
1244 err);
1245 } else {
1246 pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
1247 vatu_major, vatu_minor);
1248 }
1249
957 dma_ops = &sun4v_dma_ops; 1250 dma_ops = &sun4v_dma_ops;
958 } 1251 }
959 1252
@@ -991,6 +1284,14 @@ static int pci_sun4v_probe(struct platform_device *op)
991 } 1284 }
992 1285
993 pbm->iommu = iommu; 1286 pbm->iommu = iommu;
1287 iommu->atu = NULL;
1288 if (hv_atu) {
1289 atu = kzalloc(sizeof(*atu), GFP_KERNEL);
1290 if (!atu)
1291 pr_err(PFX "Could not allocate atu\n");
1292 else
1293 iommu->atu = atu;
1294 }
994 1295
995 err = pci_sun4v_pbm_init(pbm, op, devhandle); 1296 err = pci_sun4v_pbm_init(pbm, op, devhandle);
996 if (err) 1297 if (err)
@@ -1001,6 +1302,7 @@ static int pci_sun4v_probe(struct platform_device *op)
1001 return 0; 1302 return 0;
1002 1303
1003out_free_iommu: 1304out_free_iommu:
1305 kfree(iommu->atu);
1004 kfree(pbm->iommu); 1306 kfree(pbm->iommu);
1005 1307
1006out_free_controller: 1308out_free_controller:
diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h
index 5642212390b2..22603a4e48bf 100644
--- a/arch/sparc/kernel/pci_sun4v.h
+++ b/arch/sparc/kernel/pci_sun4v.h
@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
89 unsigned long msinum, 89 unsigned long msinum,
90 unsigned long valid); 90 unsigned long valid);
91 91
92/* Sun4v HV IOMMU v2 APIs */
93unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
94 unsigned long ra,
95 unsigned long table_size,
96 unsigned long page_size,
97 unsigned long dvma_base,
98 u64 *iotsb_num);
99unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
100 unsigned long iotsb_num,
101 unsigned int pci_device);
102unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
103 unsigned long iotsb_num,
104 unsigned long iotsb_index_iottes,
105 unsigned long io_attributes,
106 unsigned long io_page_list_pa,
107 long *mapped);
108unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
109 unsigned long iotsb_num,
110 unsigned long iotsb_index,
111 unsigned long iottes,
112 unsigned long *demapped);
92#endif /* !(_PCI_SUN4V_H) */ 113#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
index e606d46c6815..578f09657916 100644
--- a/arch/sparc/kernel/pci_sun4v_asm.S
+++ b/arch/sparc/kernel/pci_sun4v_asm.S
@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
360 mov %o0, %o0 360 mov %o0, %o0
361ENDPROC(pci_sun4v_msg_setvalid) 361ENDPROC(pci_sun4v_msg_setvalid)
362 362
363 /*
364 * %o0: devhandle
365 * %o1: r_addr
366 * %o2: size
367 * %o3: pagesize
368 * %o4: virt
369 * %o5: &iotsb_num/&iotsb_handle
370 *
371 * returns %o0: status
372 * %o1: iotsb_num/iotsb_handle
373 */
374ENTRY(pci_sun4v_iotsb_conf)
375 mov %o5, %g1
376 mov HV_FAST_PCI_IOTSB_CONF, %o5
377 ta HV_FAST_TRAP
378 retl
379 stx %o1, [%g1]
380ENDPROC(pci_sun4v_iotsb_conf)
381
382 /*
383 * %o0: devhandle
384 * %o1: iotsb_num/iotsb_handle
385 * %o2: pci_device
386 *
387 * returns %o0: status
388 */
389ENTRY(pci_sun4v_iotsb_bind)
390 mov HV_FAST_PCI_IOTSB_BIND, %o5
391 ta HV_FAST_TRAP
392 retl
393 nop
394ENDPROC(pci_sun4v_iotsb_bind)
395
396 /*
397 * %o0: devhandle
398 * %o1: iotsb_num/iotsb_handle
399 * %o2: index_count
400 * %o3: iotte_attributes
401 * %o4: io_page_list_p
402 * %o5: &mapped
403 *
404 * returns %o0: status
405 * %o1: #mapped
406 */
407ENTRY(pci_sun4v_iotsb_map)
408 mov %o5, %g1
409 mov HV_FAST_PCI_IOTSB_MAP, %o5
410 ta HV_FAST_TRAP
411 retl
412 stx %o1, [%g1]
413ENDPROC(pci_sun4v_iotsb_map)
414
415 /*
416 * %o0: devhandle
417 * %o1: iotsb_num/iotsb_handle
418 * %o2: iotsb_index
419 * %o3: #iottes
420 * %o4: &demapped
421 *
422 * returns %o0: status
423 * %o1: #demapped
424 */
425ENTRY(pci_sun4v_iotsb_demap)
426 mov HV_FAST_PCI_IOTSB_DEMAP, %o5
427 ta HV_FAST_TRAP
428 retl
429 stx %o1, [%o4]
430ENDPROC(pci_sun4v_iotsb_demap)