aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/chelsio
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2014-03-14 12:22:08 -0400
committerDavid S. Miller <davem@davemloft.net>2014-03-14 22:44:11 -0400
commit05eb23893c2cf9502a9cec0c32e7f1d1ed2895c8 (patch)
treeb7552ee535f565d5a83039ed976a442c7f869402 /drivers/net/ethernet/chelsio
parent7a2cea2aaae2d5eb5c00c49c52180c7c2c66130a (diff)
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB usage, and also fails to avoid DB FIFO drops under heavy load. This commit fixes these deficiencies and makes the avoidance logic more optimal. This is done by more efficiently notifying the ULDs of potential DB problems, and implements a smoother flow control algorithm in iw_cxgb4, which is the ULD that puts the most load on the DB fifo. Design: cxgb4: Direct ULD callback from the DB FULL/DROP interrupt handler. This allows the ULD to stop doing user DB writes as quickly as possible. While user DB usage is disabled, the LLD will accumulate DB write events for its queues. Then once DB usage is reenabled, a single DB write is done for each queue with its accumulated write count. This reduces the load put on the DB fifo when reenabling. iw_cxgb4: Instead of marking each qp to indicate DB writes are disabled, we create a device-global status page that each user process maps. This allows iw_cxgb4 to only set this single bit to disable all DB writes for all user QPs vs traversing the idr of all the active QPs. If the libcxgb4 doesn't support this, then we fall back to the old approach of marking each QP. Thus we allow the new driver to work with an older libcxgb4. When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes via the status page and transition the DB state to STOPPED. As user processes see that DB writes are disabled, they call into iw_cxgb4 to submit their DB write events. Since the DB state is in STOPPED, the QP trying to write gets enqueued on a new DB "flow control" list. As subsequent DB writes are submitted for this flow controlled QP, the amount of writes are accumulated for each QP on the flow control list. So all the user QPs that are actively ringing the DB get put on this list and the number of writes they request are accumulated. When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq context, we change the DB state to FLOW_CONTROL, and begin resuming all the QPs that are on the flow control list. This logic runs on until the flow control list is empty or we exit FLOW_CONTROL mode (due to a DB DROP upcall, for example). QPs are removed from this list, and their accumulated DB write counts written to the DB FIFO. Sets of QPs, called chunks in the code, are removed at one time. The chunk size is 64. So 64 QPs are resumed at a time, and before the next chunk is resumed, the logic waits (blocks) for the DB FIFO to drain. This prevents resuming to quickly and overflowing the FIFO. Once the flow control list is empty, the db state transitions back to NORMAL and user QPs are again allowed to write directly to the user DB register. The algorithm is designed such that if the DB write load is high enough, then all the DB writes get submitted by the kernel using this flow controlled approach to avoid DB drops. As the load lightens though, we resume to normal DB writes directly by user applications. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/chelsio')
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c87
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c8
3 files changed, 58 insertions, 38 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 50abe1d61287..32db37709263 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -500,6 +500,7 @@ struct sge_txq {
500 spinlock_t db_lock; 500 spinlock_t db_lock;
501 int db_disabled; 501 int db_disabled;
502 unsigned short db_pidx; 502 unsigned short db_pidx;
503 unsigned short db_pidx_inc;
503 u64 udb; 504 u64 udb;
504}; 505};
505 506
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0ac53dd84c61..cc04d090354c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3578,14 +3578,25 @@ static void drain_db_fifo(struct adapter *adap, int usecs)
3578 3578
3579static void disable_txq_db(struct sge_txq *q) 3579static void disable_txq_db(struct sge_txq *q)
3580{ 3580{
3581 spin_lock_irq(&q->db_lock); 3581 unsigned long flags;
3582
3583 spin_lock_irqsave(&q->db_lock, flags);
3582 q->db_disabled = 1; 3584 q->db_disabled = 1;
3583 spin_unlock_irq(&q->db_lock); 3585 spin_unlock_irqrestore(&q->db_lock, flags);
3584} 3586}
3585 3587
3586static void enable_txq_db(struct sge_txq *q) 3588static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
3587{ 3589{
3588 spin_lock_irq(&q->db_lock); 3590 spin_lock_irq(&q->db_lock);
3591 if (q->db_pidx_inc) {
3592 /* Make sure that all writes to the TX descriptors
3593 * are committed before we tell HW about them.
3594 */
3595 wmb();
3596 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
3597 QID(q->cntxt_id) | PIDX(q->db_pidx_inc));
3598 q->db_pidx_inc = 0;
3599 }
3589 q->db_disabled = 0; 3600 q->db_disabled = 0;
3590 spin_unlock_irq(&q->db_lock); 3601 spin_unlock_irq(&q->db_lock);
3591} 3602}
@@ -3607,11 +3618,32 @@ static void enable_dbs(struct adapter *adap)
3607 int i; 3618 int i;
3608 3619
3609 for_each_ethrxq(&adap->sge, i) 3620 for_each_ethrxq(&adap->sge, i)
3610 enable_txq_db(&adap->sge.ethtxq[i].q); 3621 enable_txq_db(adap, &adap->sge.ethtxq[i].q);
3611 for_each_ofldrxq(&adap->sge, i) 3622 for_each_ofldrxq(&adap->sge, i)
3612 enable_txq_db(&adap->sge.ofldtxq[i].q); 3623 enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
3613 for_each_port(adap, i) 3624 for_each_port(adap, i)
3614 enable_txq_db(&adap->sge.ctrlq[i].q); 3625 enable_txq_db(adap, &adap->sge.ctrlq[i].q);
3626}
3627
3628static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
3629{
3630 if (adap->uld_handle[CXGB4_ULD_RDMA])
3631 ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
3632 cmd);
3633}
3634
3635static void process_db_full(struct work_struct *work)
3636{
3637 struct adapter *adap;
3638
3639 adap = container_of(work, struct adapter, db_full_task);
3640
3641 drain_db_fifo(adap, dbfifo_drain_delay);
3642 enable_dbs(adap);
3643 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
3644 t4_set_reg_field(adap, SGE_INT_ENABLE3,
3645 DBFIFO_HP_INT | DBFIFO_LP_INT,
3646 DBFIFO_HP_INT | DBFIFO_LP_INT);
3615} 3647}
3616 3648
3617static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) 3649static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
@@ -3619,7 +3651,7 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
3619 u16 hw_pidx, hw_cidx; 3651 u16 hw_pidx, hw_cidx;
3620 int ret; 3652 int ret;
3621 3653
3622 spin_lock_bh(&q->db_lock); 3654 spin_lock_irq(&q->db_lock);
3623 ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx); 3655 ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
3624 if (ret) 3656 if (ret)
3625 goto out; 3657 goto out;
@@ -3636,7 +3668,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
3636 } 3668 }
3637out: 3669out:
3638 q->db_disabled = 0; 3670 q->db_disabled = 0;
3639 spin_unlock_bh(&q->db_lock); 3671 q->db_pidx_inc = 0;
3672 spin_unlock_irq(&q->db_lock);
3640 if (ret) 3673 if (ret)
3641 CH_WARN(adap, "DB drop recovery failed.\n"); 3674 CH_WARN(adap, "DB drop recovery failed.\n");
3642} 3675}
@@ -3652,29 +3685,6 @@ static void recover_all_queues(struct adapter *adap)
3652 sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); 3685 sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
3653} 3686}
3654 3687
3655static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
3656{
3657 mutex_lock(&uld_mutex);
3658 if (adap->uld_handle[CXGB4_ULD_RDMA])
3659 ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
3660 cmd);
3661 mutex_unlock(&uld_mutex);
3662}
3663
3664static void process_db_full(struct work_struct *work)
3665{
3666 struct adapter *adap;
3667
3668 adap = container_of(work, struct adapter, db_full_task);
3669
3670 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
3671 drain_db_fifo(adap, dbfifo_drain_delay);
3672 t4_set_reg_field(adap, SGE_INT_ENABLE3,
3673 DBFIFO_HP_INT | DBFIFO_LP_INT,
3674 DBFIFO_HP_INT | DBFIFO_LP_INT);
3675 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
3676}
3677
3678static void process_db_drop(struct work_struct *work) 3688static void process_db_drop(struct work_struct *work)
3679{ 3689{
3680 struct adapter *adap; 3690 struct adapter *adap;
@@ -3682,11 +3692,13 @@ static void process_db_drop(struct work_struct *work)
3682 adap = container_of(work, struct adapter, db_drop_task); 3692 adap = container_of(work, struct adapter, db_drop_task);
3683 3693
3684 if (is_t4(adap->params.chip)) { 3694 if (is_t4(adap->params.chip)) {
3685 disable_dbs(adap); 3695 drain_db_fifo(adap, dbfifo_drain_delay);
3686 notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP); 3696 notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
3687 drain_db_fifo(adap, 1); 3697 drain_db_fifo(adap, dbfifo_drain_delay);
3688 recover_all_queues(adap); 3698 recover_all_queues(adap);
3699 drain_db_fifo(adap, dbfifo_drain_delay);
3689 enable_dbs(adap); 3700 enable_dbs(adap);
3701 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
3690 } else { 3702 } else {
3691 u32 dropped_db = t4_read_reg(adap, 0x010ac); 3703 u32 dropped_db = t4_read_reg(adap, 0x010ac);
3692 u16 qid = (dropped_db >> 15) & 0x1ffff; 3704 u16 qid = (dropped_db >> 15) & 0x1ffff;
@@ -3727,6 +3739,8 @@ static void process_db_drop(struct work_struct *work)
3727void t4_db_full(struct adapter *adap) 3739void t4_db_full(struct adapter *adap)
3728{ 3740{
3729 if (is_t4(adap->params.chip)) { 3741 if (is_t4(adap->params.chip)) {
3742 disable_dbs(adap);
3743 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
3730 t4_set_reg_field(adap, SGE_INT_ENABLE3, 3744 t4_set_reg_field(adap, SGE_INT_ENABLE3,
3731 DBFIFO_HP_INT | DBFIFO_LP_INT, 0); 3745 DBFIFO_HP_INT | DBFIFO_LP_INT, 0);
3732 queue_work(workq, &adap->db_full_task); 3746 queue_work(workq, &adap->db_full_task);
@@ -3735,8 +3749,11 @@ void t4_db_full(struct adapter *adap)
3735 3749
3736void t4_db_dropped(struct adapter *adap) 3750void t4_db_dropped(struct adapter *adap)
3737{ 3751{
3738 if (is_t4(adap->params.chip)) 3752 if (is_t4(adap->params.chip)) {
3739 queue_work(workq, &adap->db_drop_task); 3753 disable_dbs(adap);
3754 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
3755 }
3756 queue_work(workq, &adap->db_drop_task);
3740} 3757}
3741 3758
3742static void uld_attach(struct adapter *adap, unsigned int uld) 3759static void uld_attach(struct adapter *adap, unsigned int uld)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 46429f9d0592..d4db382ff8c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -860,9 +860,10 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
860static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) 860static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
861{ 861{
862 unsigned int *wr, index; 862 unsigned int *wr, index;
863 unsigned long flags;
863 864
864 wmb(); /* write descriptors before telling HW */ 865 wmb(); /* write descriptors before telling HW */
865 spin_lock(&q->db_lock); 866 spin_lock_irqsave(&q->db_lock, flags);
866 if (!q->db_disabled) { 867 if (!q->db_disabled) {
867 if (is_t4(adap->params.chip)) { 868 if (is_t4(adap->params.chip)) {
868 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), 869 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
@@ -878,9 +879,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
878 writel(n, adap->bar2 + q->udb + 8); 879 writel(n, adap->bar2 + q->udb + 8);
879 wmb(); 880 wmb();
880 } 881 }
881 } 882 } else
883 q->db_pidx_inc += n;
882 q->db_pidx = q->pidx; 884 q->db_pidx = q->pidx;
883 spin_unlock(&q->db_lock); 885 spin_unlock_irqrestore(&q->db_lock, flags);
884} 886}
885 887
886/** 888/**