aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-02-14 15:01:52 -0500
committerDavid S. Miller <davem@davemloft.net>2018-02-14 15:01:52 -0500
commita92ac140fc30f5e8fc407d75de44e0bce7f59ab6 (patch)
tree262dd88933b7e899a7d22cd3e94077843ed2f989
parent9942895b5ee4b0db53f32fbcb4a51360607aac1b (diff)
parent7494f980ca0503e3eec6f4ba508186d269b37e7f (diff)
Merge branch 'cxgb4-speed-up-reading-on-chip-memory'
Rahul Lakkireddy says: ==================== cxgb4: speed up reading on-chip memory This series of patches speed up reading on-chip memory (EDC and MC) by reading 64-bits at a time. Patch 1 reworks logic to read EDC and MC. Patch 2 adds logic to read EDC and MC 64-bits at a time. v2: - Dropped AVX CPU intrinsic instructions. - Use readq() to read 64-bits at a time. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c86
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c193
3 files changed, 211 insertions, 73 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 557fd8bfd54e..6322b8df0ed0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
878 &payload->start, &payload->end); 878 &payload->start, &payload->end);
879} 879}
880 880
881static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
882 int mtype, u32 addr, u32 len, void *hbuf)
883{
884 u32 win_pf, memoffset, mem_aperture, mem_base;
885 struct adapter *adap = pdbg_init->adap;
886 u32 pos, offset, resid;
887 u32 *res_buf;
888 u64 *buf;
889 int ret;
890
891 /* Argument sanity checks ...
892 */
893 if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
894 return -EINVAL;
895
896 buf = (u64 *)hbuf;
897
898 /* Try to do 64-bit reads. Residual will be handled later. */
899 resid = len & 0x7;
900 len -= resid;
901
902 ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
903 &mem_aperture);
904 if (ret)
905 return ret;
906
907 addr = addr + memoffset;
908 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
909
910 pos = addr & ~(mem_aperture - 1);
911 offset = addr - pos;
912
913 /* Set up initial PCI-E Memory Window to cover the start of our
914 * transfer.
915 */
916 t4_memory_update_win(adap, win, pos | win_pf);
917
918 /* Transfer data from the adapter */
919 while (len > 0) {
920 *buf++ = le64_to_cpu((__force __le64)
921 t4_read_reg64(adap, mem_base + offset));
922 offset += sizeof(u64);
923 len -= sizeof(u64);
924
925 /* If we've reached the end of our current window aperture,
926 * move the PCI-E Memory Window on to the next.
927 */
928 if (offset == mem_aperture) {
929 pos += mem_aperture;
930 offset = 0;
931 t4_memory_update_win(adap, win, pos | win_pf);
932 }
933 }
934
935 res_buf = (u32 *)buf;
936 /* Read residual in 32-bit multiples */
937 while (resid > sizeof(u32)) {
938 *res_buf++ = le32_to_cpu((__force __le32)
939 t4_read_reg(adap, mem_base + offset));
940 offset += sizeof(u32);
941 resid -= sizeof(u32);
942
943 /* If we've reached the end of our current window aperture,
944 * move the PCI-E Memory Window on to the next.
945 */
946 if (offset == mem_aperture) {
947 pos += mem_aperture;
948 offset = 0;
949 t4_memory_update_win(adap, win, pos | win_pf);
950 }
951 }
952
953 /* Transfer residual < 32-bits */
954 if (resid)
955 t4_memory_rw_residual(adap, resid, mem_base + offset,
956 (u8 *)res_buf, T4_MEMORY_READ);
957
958 return 0;
959}
960
881#define CUDBG_YIELD_ITERATION 256 961#define CUDBG_YIELD_ITERATION 256
882 962
883static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, 963static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
937 goto skip_read; 1017 goto skip_read;
938 1018
939 spin_lock(&padap->win0_lock); 1019 spin_lock(&padap->win0_lock);
940 rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, 1020 rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
941 bytes_read, bytes, 1021 bytes_read, bytes, temp_buff.data);
942 (__be32 *)temp_buff.data,
943 1);
944 spin_unlock(&padap->win0_lock); 1022 spin_unlock(&padap->win0_lock);
945 if (rc) { 1023 if (rc) {
946 cudbg_err->sys_err = rc; 1024 cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13ce4b7..d3fa53db61ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
1488u32 t4_get_util_window(struct adapter *adap); 1488u32 t4_get_util_window(struct adapter *adap);
1489void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); 1489void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
1490 1490
1491int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
1492 u32 *mem_base, u32 *mem_aperture);
1493void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
1494void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
1495 int dir);
1491#define T4_MEMORY_WRITE 0 1496#define T4_MEMORY_WRITE 0
1492#define T4_MEMORY_READ 1 1497#define T4_MEMORY_READ 1
1493int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, 1498int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 047609ef0515..ba647462be6c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
484} 484}
485 485
486/** 486/**
487 * t4_memory_rw_init - Get memory window relative offset, base, and size.
488 * @adap: the adapter
489 * @win: PCI-E Memory Window to use
490 * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
491 * @mem_off: memory relative offset with respect to @mtype.
492 * @mem_base: configured memory base address.
493 * @mem_aperture: configured memory window aperture.
494 *
495 * Get the configured memory window's relative offset, base, and size.
496 */
497int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
498 u32 *mem_base, u32 *mem_aperture)
499{
500 u32 edc_size, mc_size, mem_reg;
501
502 /* Offset into the region of memory which is being accessed
503 * MEM_EDC0 = 0
504 * MEM_EDC1 = 1
505 * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
506 * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
507 * MEM_HMA = 4
508 */
509 edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
510 if (mtype == MEM_HMA) {
511 *mem_off = 2 * (edc_size * 1024 * 1024);
512 } else if (mtype != MEM_MC1) {
513 *mem_off = (mtype * (edc_size * 1024 * 1024));
514 } else {
515 mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
516 MA_EXT_MEMORY0_BAR_A));
517 *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
518 }
519
520 /* Each PCI-E Memory Window is programmed with a window size -- or
521 * "aperture" -- which controls the granularity of its mapping onto
522 * adapter memory. We need to grab that aperture in order to know
523 * how to use the specified window. The window is also programmed
524 * with the base address of the Memory Window in BAR0's address
525 * space. For T4 this is an absolute PCI-E Bus Address. For T5
526 * the address is relative to BAR0.
527 */
528 mem_reg = t4_read_reg(adap,
529 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
530 win));
531 /* a dead adapter will return 0xffffffff for PIO reads */
532 if (mem_reg == 0xffffffff)
533 return -ENXIO;
534
535 *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
536 *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
537 if (is_t4(adap->params.chip))
538 *mem_base -= adap->t4_bar0;
539
540 return 0;
541}
542
543/**
544 * t4_memory_update_win - Move memory window to specified address.
545 * @adap: the adapter
546 * @win: PCI-E Memory Window to use
547 * @addr: location to move.
548 *
549 * Move memory window to specified address.
550 */
551void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
552{
553 t4_write_reg(adap,
554 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
555 addr);
556 /* Read it back to ensure that changes propagate before we
557 * attempt to use the new value.
558 */
559 t4_read_reg(adap,
560 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
561}
562
563/**
564 * t4_memory_rw_residual - Read/Write residual data.
565 * @adap: the adapter
566 * @off: relative offset within residual to start read/write.
567 * @addr: address within indicated memory type.
568 * @buf: host memory buffer
569 * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
570 *
571 * Read/Write residual data less than 32-bits.
572 */
573void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
574 int dir)
575{
576 union {
577 u32 word;
578 char byte[4];
579 } last;
580 unsigned char *bp;
581 int i;
582
583 if (dir == T4_MEMORY_READ) {
584 last.word = le32_to_cpu((__force __le32)
585 t4_read_reg(adap, addr));
586 for (bp = (unsigned char *)buf, i = off; i < 4; i++)
587 bp[i] = last.byte[i];
588 } else {
589 last.word = *buf;
590 for (i = off; i < 4; i++)
591 last.byte[i] = 0;
592 t4_write_reg(adap, addr,
593 (__force u32)cpu_to_le32(last.word));
594 }
595}
596
597/**
487 * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window 598 * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
488 * @adap: the adapter 599 * @adap: the adapter
489 * @win: PCI-E Memory Window to use 600 * @win: PCI-E Memory Window to use
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
504 u32 len, void *hbuf, int dir) 615 u32 len, void *hbuf, int dir)
505{ 616{
506 u32 pos, offset, resid, memoffset; 617 u32 pos, offset, resid, memoffset;
507 u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; 618 u32 win_pf, mem_aperture, mem_base;
508 u32 *buf; 619 u32 *buf;
620 int ret;
509 621
510 /* Argument sanity checks ... 622 /* Argument sanity checks ...
511 */ 623 */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
521 resid = len & 0x3; 633 resid = len & 0x3;
522 len -= resid; 634 len -= resid;
523 635
524 /* Offset into the region of memory which is being accessed 636 ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
525 * MEM_EDC0 = 0 637 &mem_aperture);
526 * MEM_EDC1 = 1 638 if (ret)
527 * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller 639 return ret;
528 * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
529 * MEM_HMA = 4
530 */
531 edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
532 if (mtype == MEM_HMA) {
533 memoffset = 2 * (edc_size * 1024 * 1024);
534 } else if (mtype != MEM_MC1) {
535 memoffset = (mtype * (edc_size * 1024 * 1024));
536 } else {
537 mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
538 MA_EXT_MEMORY0_BAR_A));
539 memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
540 }
541 640
542 /* Determine the PCIE_MEM_ACCESS_OFFSET */ 641 /* Determine the PCIE_MEM_ACCESS_OFFSET */
543 addr = addr + memoffset; 642 addr = addr + memoffset;
544 643
545 /* Each PCI-E Memory Window is programmed with a window size -- or
546 * "aperture" -- which controls the granularity of its mapping onto
547 * adapter memory. We need to grab that aperture in order to know
548 * how to use the specified window. The window is also programmed
549 * with the base address of the Memory Window in BAR0's address
550 * space. For T4 this is an absolute PCI-E Bus Address. For T5
551 * the address is relative to BAR0.
552 */
553 mem_reg = t4_read_reg(adap,
554 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
555 win));
556 mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
557 mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
558 if (is_t4(adap->params.chip))
559 mem_base -= adap->t4_bar0;
560 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); 644 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
561 645
562 /* Calculate our initial PCI-E Memory Window Position and Offset into 646 /* Calculate our initial PCI-E Memory Window Position and Offset into
563 * that Window. 647 * that Window.
564 */ 648 */
565 pos = addr & ~(mem_aperture-1); 649 pos = addr & ~(mem_aperture - 1);
566 offset = addr - pos; 650 offset = addr - pos;
567 651
568 /* Set up initial PCI-E Memory Window to cover the start of our 652 /* Set up initial PCI-E Memory Window to cover the start of our
569 * transfer. (Read it back to ensure that changes propagate before we 653 * transfer.
570 * attempt to use the new value.)
571 */ 654 */
572 t4_write_reg(adap, 655 t4_memory_update_win(adap, win, pos | win_pf);
573 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
574 pos | win_pf);
575 t4_read_reg(adap,
576 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
577 656
578 /* Transfer data to/from the adapter as long as there's an integral 657 /* Transfer data to/from the adapter as long as there's an integral
579 * number of 32-bit transfers to complete. 658 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
628 if (offset == mem_aperture) { 707 if (offset == mem_aperture) {
629 pos += mem_aperture; 708 pos += mem_aperture;
630 offset = 0; 709 offset = 0;
631 t4_write_reg(adap, 710 t4_memory_update_win(adap, win, pos | win_pf);
632 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
633 win), pos | win_pf);
634 t4_read_reg(adap,
635 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
636 win));
637 } 711 }
638 } 712 }
639 713
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
642 * residual amount. The PCI-E Memory Window has already been moved 716 * residual amount. The PCI-E Memory Window has already been moved
643 * above (if necessary) to cover this final transfer. 717 * above (if necessary) to cover this final transfer.
644 */ 718 */
645 if (resid) { 719 if (resid)
646 union { 720 t4_memory_rw_residual(adap, resid, mem_base + offset,
647 u32 word; 721 (u8 *)buf, dir);
648 char byte[4];
649 } last;
650 unsigned char *bp;
651 int i;
652
653 if (dir == T4_MEMORY_READ) {
654 last.word = le32_to_cpu(
655 (__force __le32)t4_read_reg(adap,
656 mem_base + offset));
657 for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
658 bp[i] = last.byte[i];
659 } else {
660 last.word = *buf;
661 for (i = resid; i < 4; i++)
662 last.byte[i] = 0;
663 t4_write_reg(adap, mem_base + offset,
664 (__force u32)cpu_to_le32(last.word));
665 }
666 }
667 722
668 return 0; 723 return 0;
669} 724}