diff options
author | Hariprasad Shenai <hariprasad@chelsio.com> | 2015-03-04 07:46:28 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-05 15:11:52 -0500 |
commit | f36e58e5668694cd89d0a4d04a767a6286d497cc (patch) | |
tree | 24472433ad14872d1ae55a1346b5030c74de1a48 /drivers/net/ethernet | |
parent | 1c6a5b0e3446c36e3fc3f4531b1cf2db61f8319b (diff) |
cxgb4: Try and provide an RDMA CIQ per cpu
To allow for better scalability on systems with large core counts, we
will try and allocate enough RDMA Concentrator IQs and MSI/X vectors as
we have cores. If we cannot get enough MSI/X vectors, fall back to the
minimum required: 1 per adapter rx channel.
Also clean up cxgb_enable_msix() to make it readable and correct a bug
where the vectors are not correctly assigned if the driver doesn't get
the full amount requested.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 53 |
3 files changed, 46 insertions, 17 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 97842d03675b..4555634b985d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | |||
@@ -369,7 +369,7 @@ enum { | |||
369 | MAX_OFLD_QSETS = 16, /* # of offload Tx/Rx queue sets */ | 369 | MAX_OFLD_QSETS = 16, /* # of offload Tx/Rx queue sets */ |
370 | MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ | 370 | MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ |
371 | MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ | 371 | MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ |
372 | MAX_RDMA_CIQS = NCHAN, /* # of RDMA concentrator IQs */ | 372 | MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ |
373 | MAX_ISCSI_QUEUES = NCHAN, /* # of streaming iSCSI Rx queues */ | 373 | MAX_ISCSI_QUEUES = NCHAN, /* # of streaming iSCSI Rx queues */ |
374 | }; | 374 | }; |
375 | 375 | ||
@@ -599,8 +599,8 @@ struct sge { | |||
599 | u16 rdmaqs; /* # of available RDMA Rx queues */ | 599 | u16 rdmaqs; /* # of available RDMA Rx queues */ |
600 | u16 rdmaciqs; /* # of available RDMA concentrator IQs */ | 600 | u16 rdmaciqs; /* # of available RDMA concentrator IQs */ |
601 | u16 ofld_rxq[MAX_OFLD_QSETS]; | 601 | u16 ofld_rxq[MAX_OFLD_QSETS]; |
602 | u16 rdma_rxq[NCHAN]; | 602 | u16 rdma_rxq[MAX_RDMA_QUEUES]; |
603 | u16 rdma_ciq[NCHAN]; | 603 | u16 rdma_ciq[MAX_RDMA_CIQS]; |
604 | u16 timer_val[SGE_NTIMERS]; | 604 | u16 timer_val[SGE_NTIMERS]; |
605 | u8 counter_val[SGE_NCOUNTERS]; | 605 | u8 counter_val[SGE_NCOUNTERS]; |
606 | u32 fl_pg_order; /* large page allocation size */ | 606 | u32 fl_pg_order; /* large page allocation size */ |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 78854ceb0870..0918c16bb154 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | |||
@@ -1769,6 +1769,8 @@ do { \ | |||
1769 | int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); | 1769 | int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); |
1770 | 1770 | ||
1771 | S("QType:", "RDMA-CPL"); | 1771 | S("QType:", "RDMA-CPL"); |
1772 | S("Interface:", | ||
1773 | rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); | ||
1772 | R("RspQ ID:", rspq.abs_id); | 1774 | R("RspQ ID:", rspq.abs_id); |
1773 | R("RspQ size:", rspq.size); | 1775 | R("RspQ size:", rspq.size); |
1774 | R("RspQE size:", rspq.iqe_len); | 1776 | R("RspQE size:", rspq.iqe_len); |
@@ -1788,6 +1790,8 @@ do { \ | |||
1788 | int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); | 1790 | int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); |
1789 | 1791 | ||
1790 | S("QType:", "RDMA-CIQ"); | 1792 | S("QType:", "RDMA-CIQ"); |
1793 | S("Interface:", | ||
1794 | rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); | ||
1791 | R("RspQ ID:", rspq.abs_id); | 1795 | R("RspQ ID:", rspq.abs_id); |
1792 | R("RspQ size:", rspq.size); | 1796 | R("RspQ size:", rspq.size); |
1793 | R("RspQE size:", rspq.iqe_len); | 1797 | R("RspQE size:", rspq.iqe_len); |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 836e41166915..e344bdcd40b3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | |||
@@ -1057,7 +1057,8 @@ freeout: t4_free_sge_resources(adap); | |||
1057 | 1057 | ||
1058 | ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq); | 1058 | ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq); |
1059 | ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq); | 1059 | ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq); |
1060 | ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, 1, s->rdma_ciq); | 1060 | j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ |
1061 | ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq); | ||
1061 | 1062 | ||
1062 | #undef ALLOC_OFLD_RXQS | 1063 | #undef ALLOC_OFLD_RXQS |
1063 | 1064 | ||
@@ -5702,7 +5703,16 @@ static void cfg_queues(struct adapter *adap) | |||
5702 | s->ofldqsets = adap->params.nports; | 5703 | s->ofldqsets = adap->params.nports; |
5703 | /* For RDMA one Rx queue per channel suffices */ | 5704 | /* For RDMA one Rx queue per channel suffices */ |
5704 | s->rdmaqs = adap->params.nports; | 5705 | s->rdmaqs = adap->params.nports; |
5705 | s->rdmaciqs = adap->params.nports; | 5706 | /* Try and allow at least 1 CIQ per cpu rounding down |
5707 | * to the number of ports, with a minimum of 1 per port. | ||
5708 | * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. | ||
5709 | * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. | ||
5710 | * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. | ||
5711 | */ | ||
5712 | s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); | ||
5713 | s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * | ||
5714 | adap->params.nports; | ||
5715 | s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); | ||
5706 | } | 5716 | } |
5707 | 5717 | ||
5708 | for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { | 5718 | for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { |
@@ -5788,12 +5798,17 @@ static void reduce_ethqs(struct adapter *adap, int n) | |||
5788 | static int enable_msix(struct adapter *adap) | 5798 | static int enable_msix(struct adapter *adap) |
5789 | { | 5799 | { |
5790 | int ofld_need = 0; | 5800 | int ofld_need = 0; |
5791 | int i, want, need; | 5801 | int i, want, need, allocated; |
5792 | struct sge *s = &adap->sge; | 5802 | struct sge *s = &adap->sge; |
5793 | unsigned int nchan = adap->params.nports; | 5803 | unsigned int nchan = adap->params.nports; |
5794 | struct msix_entry entries[MAX_INGQ + 1]; | 5804 | struct msix_entry *entries; |
5805 | |||
5806 | entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1), | ||
5807 | GFP_KERNEL); | ||
5808 | if (!entries) | ||
5809 | return -ENOMEM; | ||
5795 | 5810 | ||
5796 | for (i = 0; i < ARRAY_SIZE(entries); ++i) | 5811 | for (i = 0; i < MAX_INGQ + 1; ++i) |
5797 | entries[i].entry = i; | 5812 | entries[i].entry = i; |
5798 | 5813 | ||
5799 | want = s->max_ethqsets + EXTRA_VECS; | 5814 | want = s->max_ethqsets + EXTRA_VECS; |
@@ -5810,29 +5825,39 @@ static int enable_msix(struct adapter *adap) | |||
5810 | #else | 5825 | #else |
5811 | need = adap->params.nports + EXTRA_VECS + ofld_need; | 5826 | need = adap->params.nports + EXTRA_VECS + ofld_need; |
5812 | #endif | 5827 | #endif |
5813 | want = pci_enable_msix_range(adap->pdev, entries, need, want); | 5828 | allocated = pci_enable_msix_range(adap->pdev, entries, need, want); |
5814 | if (want < 0) | 5829 | if (allocated < 0) { |
5815 | return want; | 5830 | dev_info(adap->pdev_dev, "not enough MSI-X vectors left," |
5831 | " not using MSI-X\n"); | ||
5832 | kfree(entries); | ||
5833 | return allocated; | ||
5834 | } | ||
5816 | 5835 | ||
5817 | /* | 5836 | /* Distribute available vectors to the various queue groups. |
5818 | * Distribute available vectors to the various queue groups. | ||
5819 | * Every group gets its minimum requirement and NIC gets top | 5837 | * Every group gets its minimum requirement and NIC gets top |
5820 | * priority for leftovers. | 5838 | * priority for leftovers. |
5821 | */ | 5839 | */ |
5822 | i = want - EXTRA_VECS - ofld_need; | 5840 | i = allocated - EXTRA_VECS - ofld_need; |
5823 | if (i < s->max_ethqsets) { | 5841 | if (i < s->max_ethqsets) { |
5824 | s->max_ethqsets = i; | 5842 | s->max_ethqsets = i; |
5825 | if (i < s->ethqsets) | 5843 | if (i < s->ethqsets) |
5826 | reduce_ethqs(adap, i); | 5844 | reduce_ethqs(adap, i); |
5827 | } | 5845 | } |
5828 | if (is_offload(adap)) { | 5846 | if (is_offload(adap)) { |
5829 | i = want - EXTRA_VECS - s->max_ethqsets; | 5847 | if (allocated < want) { |
5830 | i -= ofld_need - nchan; | 5848 | s->rdmaqs = nchan; |
5849 | s->rdmaciqs = nchan; | ||
5850 | } | ||
5851 | |||
5852 | /* leftovers go to OFLD */ | ||
5853 | i = allocated - EXTRA_VECS - s->max_ethqsets - | ||
5854 | s->rdmaqs - s->rdmaciqs; | ||
5831 | s->ofldqsets = (i / nchan) * nchan; /* round down */ | 5855 | s->ofldqsets = (i / nchan) * nchan; /* round down */ |
5832 | } | 5856 | } |
5833 | for (i = 0; i < want; ++i) | 5857 | for (i = 0; i < allocated; ++i) |
5834 | adap->msix_info[i].vec = entries[i].vector; | 5858 | adap->msix_info[i].vec = entries[i].vector; |
5835 | 5859 | ||
5860 | kfree(entries); | ||
5836 | return 0; | 5861 | return 0; |
5837 | } | 5862 | } |
5838 | 5863 | ||