aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHariprasad Shenai <hariprasad@chelsio.com>2014-09-25 14:53:52 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-28 17:32:10 -0400
commitd63a6dcf06cb3e6f3d08d79ba23603b03828650e (patch)
tree999af643615347ff030c2a5c846df1fecc061514
parent825bae5d97d351ddf9720400df133fc4b1bc6a13 (diff)
cxgb4: Use BAR2 Going To Sleep (GTS) for T5 and later.
Use BAR2 GTS for T5. If we are on T4 use the old doorbell mechanism; otherwise ue the new BAR2 mechanism. Use BAR2 doorbells for refilling FL's. Based on original work by Casey Leedom <leedom@chelsio.com> Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c186
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h19
3 files changed, 167 insertions, 42 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index c067b7888ac4..54b10d4c990a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */
431 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ 431 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
432 __be64 *desc; /* address of HW Rx descriptor ring */ 432 __be64 *desc; /* address of HW Rx descriptor ring */
433 dma_addr_t addr; /* bus address of HW ring start */ 433 dma_addr_t addr; /* bus address of HW ring start */
434 u64 udb; /* BAR2 offset of User Doorbell area */
434}; 435};
435 436
436/* A packet gather list */ 437/* A packet gather list */
@@ -459,6 +460,7 @@ struct sge_rspq { /* state for an SGE response queue */
459 u16 abs_id; /* absolute SGE id for the response q */ 460 u16 abs_id; /* absolute SGE id for the response q */
460 __be64 *desc; /* address of HW response ring */ 461 __be64 *desc; /* address of HW response ring */
461 dma_addr_t phys_addr; /* physical address of the ring */ 462 dma_addr_t phys_addr; /* physical address of the ring */
463 u64 udb; /* BAR2 offset of User Doorbell area */
462 unsigned int iqe_len; /* entry size */ 464 unsigned int iqe_len; /* entry size */
463 unsigned int size; /* capacity of response queue */ 465 unsigned int size; /* capacity of response queue */
464 struct adapter *adap; 466 struct adapter *adap;
@@ -516,7 +518,7 @@ struct sge_txq {
516 int db_disabled; 518 int db_disabled;
517 unsigned short db_pidx; 519 unsigned short db_pidx;
518 unsigned short db_pidx_inc; 520 unsigned short db_pidx_inc;
519 u64 udb; 521 u64 udb; /* BAR2 offset of User Doorbell area */
520}; 522};
521 523
522struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ 524struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index d22d728d4e5c..87db53343543 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -521,9 +521,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
521 val = PIDX(q->pend_cred / 8); 521 val = PIDX(q->pend_cred / 8);
522 if (!is_t4(adap->params.chip)) 522 if (!is_t4(adap->params.chip))
523 val |= DBTYPE(1); 523 val |= DBTYPE(1);
524 val |= DBPRIO(1);
524 wmb(); 525 wmb();
525 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | 526
526 QID(q->cntxt_id) | val); 527 /* If we're on T4, use the old doorbell mechanism; otherwise
528 * use the new BAR2 mechanism.
529 */
530 if (is_t4(adap->params.chip)) {
531 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
532 val | QID(q->cntxt_id));
533 } else {
534 writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
535
536 /* This Write memory Barrier will force the write to
537 * the User Doorbell area to be flushed.
538 */
539 wmb();
540 }
527 q->pend_cred &= 7; 541 q->pend_cred &= 7;
528 } 542 }
529} 543}
@@ -859,30 +873,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
859 */ 873 */
860static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) 874static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
861{ 875{
862 unsigned int *wr, index;
863 unsigned long flags;
864
865 wmb(); /* write descriptors before telling HW */ 876 wmb(); /* write descriptors before telling HW */
866 spin_lock_irqsave(&q->db_lock, flags); 877
867 if (!q->db_disabled) { 878 if (is_t4(adap->params.chip)) {
868 if (is_t4(adap->params.chip)) { 879 u32 val = PIDX(n);
880 unsigned long flags;
881
882 /* For T4 we need to participate in the Doorbell Recovery
883 * mechanism.
884 */
885 spin_lock_irqsave(&q->db_lock, flags);
886 if (!q->db_disabled)
869 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), 887 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
870 QID(q->cntxt_id) | PIDX(n)); 888 QID(q->cntxt_id) | val);
889 else
890 q->db_pidx_inc += n;
891 q->db_pidx = q->pidx;
892 spin_unlock_irqrestore(&q->db_lock, flags);
893 } else {
894 u32 val = PIDX_T5(n);
895
896 /* T4 and later chips share the same PIDX field offset within
897 * the doorbell, but T5 and later shrank the field in order to
898 * gain a bit for Doorbell Priority. The field was absurdly
899 * large in the first place (14 bits) so we just use the T5
900 * and later limits and warn if a Queue ID is too large.
901 */
902 WARN_ON(val & DBPRIO(1));
903
904 /* For T5 and later we use the Write-Combine mapped BAR2 User
905 * Doorbell mechanism. If we're only writing a single TX
906 * Descriptor and TX Write Combining hasn't been disabled, we
907 * can use the Write Combining Gather Buffer; otherwise we use
908 * the simple doorbell.
909 */
910 if (n == 1) {
911 int index = (q->pidx
912 ? (q->pidx - 1)
913 : (q->size - 1));
914 unsigned int *wr = (unsigned int *)&q->desc[index];
915
916 cxgb_pio_copy((u64 __iomem *)
917 (adap->bar2 + q->udb +
918 SGE_UDB_WCDOORBELL),
919 (u64 *)wr);
871 } else { 920 } else {
872 if (n == 1) { 921 writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
873 index = q->pidx ? (q->pidx - 1) : (q->size - 1);
874 wr = (unsigned int *)&q->desc[index];
875 cxgb_pio_copy((u64 __iomem *)
876 (adap->bar2 + q->udb + 64),
877 (u64 *)wr);
878 } else
879 writel(n, adap->bar2 + q->udb + 8);
880 wmb();
881 } 922 }
882 } else 923
883 q->db_pidx_inc += n; 924 /* This Write Memory Barrier will force the write to the User
884 q->db_pidx = q->pidx; 925 * Doorbell area to be flushed. This is needed to prevent
885 spin_unlock_irqrestore(&q->db_lock, flags); 926 * writes on different CPUs for the same queue from hitting
927 * the adapter out of order. This is required when some Work
928 * Requests take the Write Combine Gather Buffer path (user
929 * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
930 * take the traditional path where we simply increment the
931 * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
932 * hardware DMA read the actual Work Request.
933 */
934 wmb();
935 }
886} 936}
887 937
888/** 938/**
@@ -1916,6 +1966,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
1916 unsigned int params; 1966 unsigned int params;
1917 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); 1967 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
1918 int work_done = process_responses(q, budget); 1968 int work_done = process_responses(q, budget);
1969 u32 val;
1919 1970
1920 if (likely(work_done < budget)) { 1971 if (likely(work_done < budget)) {
1921 napi_complete(napi); 1972 napi_complete(napi);
@@ -1924,8 +1975,14 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
1924 } else 1975 } else
1925 params = QINTR_TIMER_IDX(7); 1976 params = QINTR_TIMER_IDX(7);
1926 1977
1927 t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | 1978 val = CIDXINC(work_done) | SEINTARM(params);
1928 INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); 1979 if (is_t4(q->adap->params.chip)) {
1980 t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS),
1981 val | INGRESSQID((u32)q->cntxt_id));
1982 } else {
1983 writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS);
1984 wmb();
1985 }
1929 return work_done; 1986 return work_done;
1930} 1987}
1931 1988
@@ -1949,6 +2006,7 @@ static unsigned int process_intrq(struct adapter *adap)
1949 unsigned int credits; 2006 unsigned int credits;
1950 const struct rsp_ctrl *rc; 2007 const struct rsp_ctrl *rc;
1951 struct sge_rspq *q = &adap->sge.intrq; 2008 struct sge_rspq *q = &adap->sge.intrq;
2009 u32 val;
1952 2010
1953 spin_lock(&adap->sge.intrq_lock); 2011 spin_lock(&adap->sge.intrq_lock);
1954 for (credits = 0; ; credits++) { 2012 for (credits = 0; ; credits++) {
@@ -1967,8 +2025,14 @@ static unsigned int process_intrq(struct adapter *adap)
1967 rspq_next(q); 2025 rspq_next(q);
1968 } 2026 }
1969 2027
1970 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | 2028 val = CIDXINC(credits) | SEINTARM(q->intr_params);
1971 INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); 2029 if (is_t4(adap->params.chip)) {
2030 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
2031 val | INGRESSQID(q->cntxt_id));
2032 } else {
2033 writel(val, adap->bar2 + q->udb + SGE_UDB_GTS);
2034 wmb();
2035 }
1972 spin_unlock(&adap->sge.intrq_lock); 2036 spin_unlock(&adap->sge.intrq_lock);
1973 return credits; 2037 return credits;
1974} 2038}
@@ -2149,6 +2213,51 @@ static void sge_tx_timer_cb(unsigned long data)
2149 mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); 2213 mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
2150} 2214}
2151 2215
2216/**
2217 * udb_address - return the BAR2 User Doorbell address for a Queue
2218 * @adap: the adapter
2219 * @cntxt_id: the Queue Context ID
2220 * @qpp: Queues Per Page (for all PFs)
2221 *
2222 * Returns the BAR2 address of the user Doorbell associated with the
2223 * indicated Queue Context ID. Note that this is only applicable
2224 * for T5 and later.
2225 */
2226static u64 udb_address(struct adapter *adap, unsigned int cntxt_id,
2227 unsigned int qpp)
2228{
2229 u64 udb;
2230 unsigned int s_qpp;
2231 unsigned short udb_density;
2232 unsigned long qpshift;
2233 int page;
2234
2235 BUG_ON(is_t4(adap->params.chip));
2236
2237 s_qpp = (QUEUESPERPAGEPF0 +
2238 (QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn);
2239 udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK);
2240 qpshift = PAGE_SHIFT - ilog2(udb_density);
2241 udb = cntxt_id << qpshift;
2242 udb &= PAGE_MASK;
2243 page = udb / PAGE_SIZE;
2244 udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE;
2245
2246 return udb;
2247}
2248
2249static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id)
2250{
2251 return udb_address(adap, cntxt_id,
2252 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF));
2253}
2254
2255static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id)
2256{
2257 return udb_address(adap, cntxt_id,
2258 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF));
2259}
2260
2152int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, 2261int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
2153 struct net_device *dev, int intr_idx, 2262 struct net_device *dev, int intr_idx,
2154 struct sge_fl *fl, rspq_handler_t hnd) 2263 struct sge_fl *fl, rspq_handler_t hnd)
@@ -2214,6 +2323,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
2214 iq->next_intr_params = iq->intr_params; 2323 iq->next_intr_params = iq->intr_params;
2215 iq->cntxt_id = ntohs(c.iqid); 2324 iq->cntxt_id = ntohs(c.iqid);
2216 iq->abs_id = ntohs(c.physiqid); 2325 iq->abs_id = ntohs(c.physiqid);
2326 if (!is_t4(adap->params.chip))
2327 iq->udb = udb_address_iq(adap, iq->cntxt_id);
2217 iq->size--; /* subtract status entry */ 2328 iq->size--; /* subtract status entry */
2218 iq->netdev = dev; 2329 iq->netdev = dev;
2219 iq->handler = hnd; 2330 iq->handler = hnd;
@@ -2229,6 +2340,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
2229 fl->pidx = fl->cidx = 0; 2340 fl->pidx = fl->cidx = 0;
2230 fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; 2341 fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
2231 adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; 2342 adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl;
2343
2344 /* Note, we must initialize the Free List User Doorbell
2345 * address before refilling the Free List!
2346 */
2347 if (!is_t4(adap->params.chip))
2348 fl->udb = udb_address_eq(adap, fl->cntxt_id);
2232 refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); 2349 refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
2233 } 2350 }
2234 return 0; 2351 return 0;
@@ -2254,21 +2371,8 @@ err:
2254static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) 2371static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
2255{ 2372{
2256 q->cntxt_id = id; 2373 q->cntxt_id = id;
2257 if (!is_t4(adap->params.chip)) { 2374 if (!is_t4(adap->params.chip))
2258 unsigned int s_qpp; 2375 q->udb = udb_address_eq(adap, q->cntxt_id);
2259 unsigned short udb_density;
2260 unsigned long qpshift;
2261 int page;
2262
2263 s_qpp = QUEUESPERPAGEPF1 * adap->fn;
2264 udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap,
2265 SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp));
2266 qpshift = PAGE_SHIFT - ilog2(udb_density);
2267 q->udb = q->cntxt_id << qpshift;
2268 q->udb &= PAGE_MASK;
2269 page = q->udb / PAGE_SIZE;
2270 q->udb += (q->cntxt_id - (page * udb_density)) * 128;
2271 }
2272 2376
2273 q->in_use = 0; 2377 q->in_use = 0;
2274 q->cidx = q->pidx = 0; 2378 q->cidx = q->pidx = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 39fb325474f7..847a162af98c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -157,8 +157,27 @@
157#define QUEUESPERPAGEPF0_MASK 0x0000000fU 157#define QUEUESPERPAGEPF0_MASK 0x0000000fU
158#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) 158#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK)
159 159
160#define QUEUESPERPAGEPF0 0
160#define QUEUESPERPAGEPF1 4 161#define QUEUESPERPAGEPF1 4
161 162
163/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
164 * The User Doorbells are each 128 bytes in length with a Simple Doorbell at
165 * offsets 8x and a Write Combining single 64-byte Egress Queue Unit
166 * (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64. For Ingress Queues,
167 * we have a Going To Sleep register at offsets 8x+4.
168 *
169 * As noted above, we have many instances of the Simple Doorbell and Going To
170 * Sleep registers at offsets 8x and 8x+4, respectively. We want to use a
171 * non-64-byte aligned offset for the Simple Doorbell in order to attempt to
172 * avoid buffering of the writes to the Simple Doorbell and we want to use a
173 * non-contiguous offset for the Going To Sleep writes in order to avoid
174 * possible combining between them.
175 */
176#define SGE_UDB_SIZE 128
177#define SGE_UDB_KDOORBELL 8
178#define SGE_UDB_GTS 20
179#define SGE_UDB_WCDOORBELL 64
180
162#define SGE_INT_CAUSE1 0x1024 181#define SGE_INT_CAUSE1 0x1024
163#define SGE_INT_CAUSE2 0x1030 182#define SGE_INT_CAUSE2 0x1030
164#define SGE_INT_CAUSE3 0x103c 183#define SGE_INT_CAUSE3 0x103c