diff options
author | David S. Miller <davem@davemloft.net> | 2018-02-14 15:01:52 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-02-14 15:01:52 -0500 |
commit | a92ac140fc30f5e8fc407d75de44e0bce7f59ab6 (patch) | |
tree | 262dd88933b7e899a7d22cd3e94077843ed2f989 | |
parent | 9942895b5ee4b0db53f32fbcb4a51360607aac1b (diff) | |
parent | 7494f980ca0503e3eec6f4ba508186d269b37e7f (diff) |
Merge branch 'cxgb4-speed-up-reading-on-chip-memory'
Rahul Lakkireddy says:
====================
cxgb4: speed up reading on-chip memory
This series of patches speed up reading on-chip memory (EDC and MC)
by reading 64-bits at a time.
Patch 1 reworks logic to read EDC and MC.
Patch 2 adds logic to read EDC and MC 64-bits at a time.
v2:
- Dropped AVX CPU intrinsic instructions.
- Use readq() to read 64-bits at a time.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 86 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 193 |
3 files changed, 211 insertions, 73 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 557fd8bfd54e..6322b8df0ed0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | |||
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type, | |||
878 | &payload->start, &payload->end); | 878 | &payload->start, &payload->end); |
879 | } | 879 | } |
880 | 880 | ||
881 | static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win, | ||
882 | int mtype, u32 addr, u32 len, void *hbuf) | ||
883 | { | ||
884 | u32 win_pf, memoffset, mem_aperture, mem_base; | ||
885 | struct adapter *adap = pdbg_init->adap; | ||
886 | u32 pos, offset, resid; | ||
887 | u32 *res_buf; | ||
888 | u64 *buf; | ||
889 | int ret; | ||
890 | |||
891 | /* Argument sanity checks ... | ||
892 | */ | ||
893 | if (addr & 0x3 || (uintptr_t)hbuf & 0x3) | ||
894 | return -EINVAL; | ||
895 | |||
896 | buf = (u64 *)hbuf; | ||
897 | |||
898 | /* Try to do 64-bit reads. Residual will be handled later. */ | ||
899 | resid = len & 0x7; | ||
900 | len -= resid; | ||
901 | |||
902 | ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, | ||
903 | &mem_aperture); | ||
904 | if (ret) | ||
905 | return ret; | ||
906 | |||
907 | addr = addr + memoffset; | ||
908 | win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); | ||
909 | |||
910 | pos = addr & ~(mem_aperture - 1); | ||
911 | offset = addr - pos; | ||
912 | |||
913 | /* Set up initial PCI-E Memory Window to cover the start of our | ||
914 | * transfer. | ||
915 | */ | ||
916 | t4_memory_update_win(adap, win, pos | win_pf); | ||
917 | |||
918 | /* Transfer data from the adapter */ | ||
919 | while (len > 0) { | ||
920 | *buf++ = le64_to_cpu((__force __le64) | ||
921 | t4_read_reg64(adap, mem_base + offset)); | ||
922 | offset += sizeof(u64); | ||
923 | len -= sizeof(u64); | ||
924 | |||
925 | /* If we've reached the end of our current window aperture, | ||
926 | * move the PCI-E Memory Window on to the next. | ||
927 | */ | ||
928 | if (offset == mem_aperture) { | ||
929 | pos += mem_aperture; | ||
930 | offset = 0; | ||
931 | t4_memory_update_win(adap, win, pos | win_pf); | ||
932 | } | ||
933 | } | ||
934 | |||
935 | res_buf = (u32 *)buf; | ||
936 | /* Read residual in 32-bit multiples */ | ||
937 | while (resid > sizeof(u32)) { | ||
938 | *res_buf++ = le32_to_cpu((__force __le32) | ||
939 | t4_read_reg(adap, mem_base + offset)); | ||
940 | offset += sizeof(u32); | ||
941 | resid -= sizeof(u32); | ||
942 | |||
943 | /* If we've reached the end of our current window aperture, | ||
944 | * move the PCI-E Memory Window on to the next. | ||
945 | */ | ||
946 | if (offset == mem_aperture) { | ||
947 | pos += mem_aperture; | ||
948 | offset = 0; | ||
949 | t4_memory_update_win(adap, win, pos | win_pf); | ||
950 | } | ||
951 | } | ||
952 | |||
953 | /* Transfer residual < 32-bits */ | ||
954 | if (resid) | ||
955 | t4_memory_rw_residual(adap, resid, mem_base + offset, | ||
956 | (u8 *)res_buf, T4_MEMORY_READ); | ||
957 | |||
958 | return 0; | ||
959 | } | ||
960 | |||
881 | #define CUDBG_YIELD_ITERATION 256 | 961 | #define CUDBG_YIELD_ITERATION 256 |
882 | 962 | ||
883 | static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, | 963 | static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, |
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, | |||
937 | goto skip_read; | 1017 | goto skip_read; |
938 | 1018 | ||
939 | spin_lock(&padap->win0_lock); | 1019 | spin_lock(&padap->win0_lock); |
940 | rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, | 1020 | rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type, |
941 | bytes_read, bytes, | 1021 | bytes_read, bytes, temp_buff.data); |
942 | (__be32 *)temp_buff.data, | ||
943 | 1); | ||
944 | spin_unlock(&padap->win0_lock); | 1022 | spin_unlock(&padap->win0_lock); |
945 | if (rc) { | 1023 | if (rc) { |
946 | cudbg_err->sys_err = rc; | 1024 | cudbg_err->sys_err = rc; |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 9040e13ce4b7..d3fa53db61ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | |||
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg); | |||
1488 | u32 t4_get_util_window(struct adapter *adap); | 1488 | u32 t4_get_util_window(struct adapter *adap); |
1489 | void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); | 1489 | void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); |
1490 | 1490 | ||
1491 | int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, | ||
1492 | u32 *mem_base, u32 *mem_aperture); | ||
1493 | void t4_memory_update_win(struct adapter *adap, int win, u32 addr); | ||
1494 | void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, | ||
1495 | int dir); | ||
1491 | #define T4_MEMORY_WRITE 0 | 1496 | #define T4_MEMORY_WRITE 0 |
1492 | #define T4_MEMORY_READ 1 | 1497 | #define T4_MEMORY_READ 1 |
1493 | int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, | 1498 | int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 047609ef0515..ba647462be6c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | |||
@@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx) | |||
484 | } | 484 | } |
485 | 485 | ||
486 | /** | 486 | /** |
487 | * t4_memory_rw_init - Get memory window relative offset, base, and size. | ||
488 | * @adap: the adapter | ||
489 | * @win: PCI-E Memory Window to use | ||
490 | * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC | ||
491 | * @mem_off: memory relative offset with respect to @mtype. | ||
492 | * @mem_base: configured memory base address. | ||
493 | * @mem_aperture: configured memory window aperture. | ||
494 | * | ||
495 | * Get the configured memory window's relative offset, base, and size. | ||
496 | */ | ||
497 | int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, | ||
498 | u32 *mem_base, u32 *mem_aperture) | ||
499 | { | ||
500 | u32 edc_size, mc_size, mem_reg; | ||
501 | |||
502 | /* Offset into the region of memory which is being accessed | ||
503 | * MEM_EDC0 = 0 | ||
504 | * MEM_EDC1 = 1 | ||
505 | * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller | ||
506 | * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) | ||
507 | * MEM_HMA = 4 | ||
508 | */ | ||
509 | edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); | ||
510 | if (mtype == MEM_HMA) { | ||
511 | *mem_off = 2 * (edc_size * 1024 * 1024); | ||
512 | } else if (mtype != MEM_MC1) { | ||
513 | *mem_off = (mtype * (edc_size * 1024 * 1024)); | ||
514 | } else { | ||
515 | mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, | ||
516 | MA_EXT_MEMORY0_BAR_A)); | ||
517 | *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; | ||
518 | } | ||
519 | |||
520 | /* Each PCI-E Memory Window is programmed with a window size -- or | ||
521 | * "aperture" -- which controls the granularity of its mapping onto | ||
522 | * adapter memory. We need to grab that aperture in order to know | ||
523 | * how to use the specified window. The window is also programmed | ||
524 | * with the base address of the Memory Window in BAR0's address | ||
525 | * space. For T4 this is an absolute PCI-E Bus Address. For T5 | ||
526 | * the address is relative to BAR0. | ||
527 | */ | ||
528 | mem_reg = t4_read_reg(adap, | ||
529 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, | ||
530 | win)); | ||
531 | /* a dead adapter will return 0xffffffff for PIO reads */ | ||
532 | if (mem_reg == 0xffffffff) | ||
533 | return -ENXIO; | ||
534 | |||
535 | *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); | ||
536 | *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; | ||
537 | if (is_t4(adap->params.chip)) | ||
538 | *mem_base -= adap->t4_bar0; | ||
539 | |||
540 | return 0; | ||
541 | } | ||
542 | |||
543 | /** | ||
544 | * t4_memory_update_win - Move memory window to specified address. | ||
545 | * @adap: the adapter | ||
546 | * @win: PCI-E Memory Window to use | ||
547 | * @addr: location to move. | ||
548 | * | ||
549 | * Move memory window to specified address. | ||
550 | */ | ||
551 | void t4_memory_update_win(struct adapter *adap, int win, u32 addr) | ||
552 | { | ||
553 | t4_write_reg(adap, | ||
554 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), | ||
555 | addr); | ||
556 | /* Read it back to ensure that changes propagate before we | ||
557 | * attempt to use the new value. | ||
558 | */ | ||
559 | t4_read_reg(adap, | ||
560 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); | ||
561 | } | ||
562 | |||
563 | /** | ||
564 | * t4_memory_rw_residual - Read/Write residual data. | ||
565 | * @adap: the adapter | ||
566 | * @off: relative offset within residual to start read/write. | ||
567 | * @addr: address within indicated memory type. | ||
568 | * @buf: host memory buffer | ||
569 | * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) | ||
570 | * | ||
571 | * Read/Write residual data less than 32-bits. | ||
572 | */ | ||
573 | void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, | ||
574 | int dir) | ||
575 | { | ||
576 | union { | ||
577 | u32 word; | ||
578 | char byte[4]; | ||
579 | } last; | ||
580 | unsigned char *bp; | ||
581 | int i; | ||
582 | |||
583 | if (dir == T4_MEMORY_READ) { | ||
584 | last.word = le32_to_cpu((__force __le32) | ||
585 | t4_read_reg(adap, addr)); | ||
586 | for (bp = (unsigned char *)buf, i = off; i < 4; i++) | ||
587 | bp[i] = last.byte[i]; | ||
588 | } else { | ||
589 | last.word = *buf; | ||
590 | for (i = off; i < 4; i++) | ||
591 | last.byte[i] = 0; | ||
592 | t4_write_reg(adap, addr, | ||
593 | (__force u32)cpu_to_le32(last.word)); | ||
594 | } | ||
595 | } | ||
596 | |||
597 | /** | ||
487 | * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window | 598 | * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window |
488 | * @adap: the adapter | 599 | * @adap: the adapter |
489 | * @win: PCI-E Memory Window to use | 600 | * @win: PCI-E Memory Window to use |
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, | |||
504 | u32 len, void *hbuf, int dir) | 615 | u32 len, void *hbuf, int dir) |
505 | { | 616 | { |
506 | u32 pos, offset, resid, memoffset; | 617 | u32 pos, offset, resid, memoffset; |
507 | u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; | 618 | u32 win_pf, mem_aperture, mem_base; |
508 | u32 *buf; | 619 | u32 *buf; |
620 | int ret; | ||
509 | 621 | ||
510 | /* Argument sanity checks ... | 622 | /* Argument sanity checks ... |
511 | */ | 623 | */ |
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, | |||
521 | resid = len & 0x3; | 633 | resid = len & 0x3; |
522 | len -= resid; | 634 | len -= resid; |
523 | 635 | ||
524 | /* Offset into the region of memory which is being accessed | 636 | ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, |
525 | * MEM_EDC0 = 0 | 637 | &mem_aperture); |
526 | * MEM_EDC1 = 1 | 638 | if (ret) |
527 | * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller | 639 | return ret; |
528 | * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) | ||
529 | * MEM_HMA = 4 | ||
530 | */ | ||
531 | edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); | ||
532 | if (mtype == MEM_HMA) { | ||
533 | memoffset = 2 * (edc_size * 1024 * 1024); | ||
534 | } else if (mtype != MEM_MC1) { | ||
535 | memoffset = (mtype * (edc_size * 1024 * 1024)); | ||
536 | } else { | ||
537 | mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, | ||
538 | MA_EXT_MEMORY0_BAR_A)); | ||
539 | memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; | ||
540 | } | ||
541 | 640 | ||
542 | /* Determine the PCIE_MEM_ACCESS_OFFSET */ | 641 | /* Determine the PCIE_MEM_ACCESS_OFFSET */ |
543 | addr = addr + memoffset; | 642 | addr = addr + memoffset; |
544 | 643 | ||
545 | /* Each PCI-E Memory Window is programmed with a window size -- or | ||
546 | * "aperture" -- which controls the granularity of its mapping onto | ||
547 | * adapter memory. We need to grab that aperture in order to know | ||
548 | * how to use the specified window. The window is also programmed | ||
549 | * with the base address of the Memory Window in BAR0's address | ||
550 | * space. For T4 this is an absolute PCI-E Bus Address. For T5 | ||
551 | * the address is relative to BAR0. | ||
552 | */ | ||
553 | mem_reg = t4_read_reg(adap, | ||
554 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, | ||
555 | win)); | ||
556 | mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); | ||
557 | mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; | ||
558 | if (is_t4(adap->params.chip)) | ||
559 | mem_base -= adap->t4_bar0; | ||
560 | win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); | 644 | win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); |
561 | 645 | ||
562 | /* Calculate our initial PCI-E Memory Window Position and Offset into | 646 | /* Calculate our initial PCI-E Memory Window Position and Offset into |
563 | * that Window. | 647 | * that Window. |
564 | */ | 648 | */ |
565 | pos = addr & ~(mem_aperture-1); | 649 | pos = addr & ~(mem_aperture - 1); |
566 | offset = addr - pos; | 650 | offset = addr - pos; |
567 | 651 | ||
568 | /* Set up initial PCI-E Memory Window to cover the start of our | 652 | /* Set up initial PCI-E Memory Window to cover the start of our |
569 | * transfer. (Read it back to ensure that changes propagate before we | 653 | * transfer. |
570 | * attempt to use the new value.) | ||
571 | */ | 654 | */ |
572 | t4_write_reg(adap, | 655 | t4_memory_update_win(adap, win, pos | win_pf); |
573 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), | ||
574 | pos | win_pf); | ||
575 | t4_read_reg(adap, | ||
576 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); | ||
577 | 656 | ||
578 | /* Transfer data to/from the adapter as long as there's an integral | 657 | /* Transfer data to/from the adapter as long as there's an integral |
579 | * number of 32-bit transfers to complete. | 658 | * number of 32-bit transfers to complete. |
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, | |||
628 | if (offset == mem_aperture) { | 707 | if (offset == mem_aperture) { |
629 | pos += mem_aperture; | 708 | pos += mem_aperture; |
630 | offset = 0; | 709 | offset = 0; |
631 | t4_write_reg(adap, | 710 | t4_memory_update_win(adap, win, pos | win_pf); |
632 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, | ||
633 | win), pos | win_pf); | ||
634 | t4_read_reg(adap, | ||
635 | PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, | ||
636 | win)); | ||
637 | } | 711 | } |
638 | } | 712 | } |
639 | 713 | ||
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, | |||
642 | * residual amount. The PCI-E Memory Window has already been moved | 716 | * residual amount. The PCI-E Memory Window has already been moved |
643 | * above (if necessary) to cover this final transfer. | 717 | * above (if necessary) to cover this final transfer. |
644 | */ | 718 | */ |
645 | if (resid) { | 719 | if (resid) |
646 | union { | 720 | t4_memory_rw_residual(adap, resid, mem_base + offset, |
647 | u32 word; | 721 | (u8 *)buf, dir); |
648 | char byte[4]; | ||
649 | } last; | ||
650 | unsigned char *bp; | ||
651 | int i; | ||
652 | |||
653 | if (dir == T4_MEMORY_READ) { | ||
654 | last.word = le32_to_cpu( | ||
655 | (__force __le32)t4_read_reg(adap, | ||
656 | mem_base + offset)); | ||
657 | for (bp = (unsigned char *)buf, i = resid; i < 4; i++) | ||
658 | bp[i] = last.byte[i]; | ||
659 | } else { | ||
660 | last.word = *buf; | ||
661 | for (i = resid; i < 4; i++) | ||
662 | last.byte[i] = 0; | ||
663 | t4_write_reg(adap, mem_base + offset, | ||
664 | (__force u32)cpu_to_le32(last.word)); | ||
665 | } | ||
666 | } | ||
667 | 722 | ||
668 | return 0; | 723 | return 0; |
669 | } | 724 | } |