diff options
author | Hariprasad Shenai <hariprasad@chelsio.com> | 2014-09-25 14:53:52 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-28 17:32:10 -0400 |
commit | d63a6dcf06cb3e6f3d08d79ba23603b03828650e (patch) | |
tree | 999af643615347ff030c2a5c846df1fecc061514 | |
parent | 825bae5d97d351ddf9720400df133fc4b1bc6a13 (diff) |
cxgb4: Use BAR2 Going To Sleep (GTS) for T5 and later.
Use BAR2 GTS for T5. If we are on T4 use the old doorbell mechanism;
otherwise ue the new BAR2 mechanism. Use BAR2 doorbells for refilling FL's.
Based on original work by Casey Leedom <leedom@chelsio.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 186 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 19 |
3 files changed, 167 insertions, 42 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c067b7888ac4..54b10d4c990a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | |||
@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */ | |||
431 | struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ | 431 | struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ |
432 | __be64 *desc; /* address of HW Rx descriptor ring */ | 432 | __be64 *desc; /* address of HW Rx descriptor ring */ |
433 | dma_addr_t addr; /* bus address of HW ring start */ | 433 | dma_addr_t addr; /* bus address of HW ring start */ |
434 | u64 udb; /* BAR2 offset of User Doorbell area */ | ||
434 | }; | 435 | }; |
435 | 436 | ||
436 | /* A packet gather list */ | 437 | /* A packet gather list */ |
@@ -459,6 +460,7 @@ struct sge_rspq { /* state for an SGE response queue */ | |||
459 | u16 abs_id; /* absolute SGE id for the response q */ | 460 | u16 abs_id; /* absolute SGE id for the response q */ |
460 | __be64 *desc; /* address of HW response ring */ | 461 | __be64 *desc; /* address of HW response ring */ |
461 | dma_addr_t phys_addr; /* physical address of the ring */ | 462 | dma_addr_t phys_addr; /* physical address of the ring */ |
463 | u64 udb; /* BAR2 offset of User Doorbell area */ | ||
462 | unsigned int iqe_len; /* entry size */ | 464 | unsigned int iqe_len; /* entry size */ |
463 | unsigned int size; /* capacity of response queue */ | 465 | unsigned int size; /* capacity of response queue */ |
464 | struct adapter *adap; | 466 | struct adapter *adap; |
@@ -516,7 +518,7 @@ struct sge_txq { | |||
516 | int db_disabled; | 518 | int db_disabled; |
517 | unsigned short db_pidx; | 519 | unsigned short db_pidx; |
518 | unsigned short db_pidx_inc; | 520 | unsigned short db_pidx_inc; |
519 | u64 udb; | 521 | u64 udb; /* BAR2 offset of User Doorbell area */ |
520 | }; | 522 | }; |
521 | 523 | ||
522 | struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ | 524 | struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index d22d728d4e5c..87db53343543 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c | |||
@@ -521,9 +521,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) | |||
521 | val = PIDX(q->pend_cred / 8); | 521 | val = PIDX(q->pend_cred / 8); |
522 | if (!is_t4(adap->params.chip)) | 522 | if (!is_t4(adap->params.chip)) |
523 | val |= DBTYPE(1); | 523 | val |= DBTYPE(1); |
524 | val |= DBPRIO(1); | ||
524 | wmb(); | 525 | wmb(); |
525 | t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | | 526 | |
526 | QID(q->cntxt_id) | val); | 527 | /* If we're on T4, use the old doorbell mechanism; otherwise |
528 | * use the new BAR2 mechanism. | ||
529 | */ | ||
530 | if (is_t4(adap->params.chip)) { | ||
531 | t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), | ||
532 | val | QID(q->cntxt_id)); | ||
533 | } else { | ||
534 | writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); | ||
535 | |||
536 | /* This Write memory Barrier will force the write to | ||
537 | * the User Doorbell area to be flushed. | ||
538 | */ | ||
539 | wmb(); | ||
540 | } | ||
527 | q->pend_cred &= 7; | 541 | q->pend_cred &= 7; |
528 | } | 542 | } |
529 | } | 543 | } |
@@ -859,30 +873,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) | |||
859 | */ | 873 | */ |
860 | static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) | 874 | static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) |
861 | { | 875 | { |
862 | unsigned int *wr, index; | ||
863 | unsigned long flags; | ||
864 | |||
865 | wmb(); /* write descriptors before telling HW */ | 876 | wmb(); /* write descriptors before telling HW */ |
866 | spin_lock_irqsave(&q->db_lock, flags); | 877 | |
867 | if (!q->db_disabled) { | 878 | if (is_t4(adap->params.chip)) { |
868 | if (is_t4(adap->params.chip)) { | 879 | u32 val = PIDX(n); |
880 | unsigned long flags; | ||
881 | |||
882 | /* For T4 we need to participate in the Doorbell Recovery | ||
883 | * mechanism. | ||
884 | */ | ||
885 | spin_lock_irqsave(&q->db_lock, flags); | ||
886 | if (!q->db_disabled) | ||
869 | t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), | 887 | t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), |
870 | QID(q->cntxt_id) | PIDX(n)); | 888 | QID(q->cntxt_id) | val); |
889 | else | ||
890 | q->db_pidx_inc += n; | ||
891 | q->db_pidx = q->pidx; | ||
892 | spin_unlock_irqrestore(&q->db_lock, flags); | ||
893 | } else { | ||
894 | u32 val = PIDX_T5(n); | ||
895 | |||
896 | /* T4 and later chips share the same PIDX field offset within | ||
897 | * the doorbell, but T5 and later shrank the field in order to | ||
898 | * gain a bit for Doorbell Priority. The field was absurdly | ||
899 | * large in the first place (14 bits) so we just use the T5 | ||
900 | * and later limits and warn if a Queue ID is too large. | ||
901 | */ | ||
902 | WARN_ON(val & DBPRIO(1)); | ||
903 | |||
904 | /* For T5 and later we use the Write-Combine mapped BAR2 User | ||
905 | * Doorbell mechanism. If we're only writing a single TX | ||
906 | * Descriptor and TX Write Combining hasn't been disabled, we | ||
907 | * can use the Write Combining Gather Buffer; otherwise we use | ||
908 | * the simple doorbell. | ||
909 | */ | ||
910 | if (n == 1) { | ||
911 | int index = (q->pidx | ||
912 | ? (q->pidx - 1) | ||
913 | : (q->size - 1)); | ||
914 | unsigned int *wr = (unsigned int *)&q->desc[index]; | ||
915 | |||
916 | cxgb_pio_copy((u64 __iomem *) | ||
917 | (adap->bar2 + q->udb + | ||
918 | SGE_UDB_WCDOORBELL), | ||
919 | (u64 *)wr); | ||
871 | } else { | 920 | } else { |
872 | if (n == 1) { | 921 | writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); |
873 | index = q->pidx ? (q->pidx - 1) : (q->size - 1); | ||
874 | wr = (unsigned int *)&q->desc[index]; | ||
875 | cxgb_pio_copy((u64 __iomem *) | ||
876 | (adap->bar2 + q->udb + 64), | ||
877 | (u64 *)wr); | ||
878 | } else | ||
879 | writel(n, adap->bar2 + q->udb + 8); | ||
880 | wmb(); | ||
881 | } | 922 | } |
882 | } else | 923 | |
883 | q->db_pidx_inc += n; | 924 | /* This Write Memory Barrier will force the write to the User |
884 | q->db_pidx = q->pidx; | 925 | * Doorbell area to be flushed. This is needed to prevent |
885 | spin_unlock_irqrestore(&q->db_lock, flags); | 926 | * writes on different CPUs for the same queue from hitting |
927 | * the adapter out of order. This is required when some Work | ||
928 | * Requests take the Write Combine Gather Buffer path (user | ||
929 | * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some | ||
930 | * take the traditional path where we simply increment the | ||
931 | * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the | ||
932 | * hardware DMA read the actual Work Request. | ||
933 | */ | ||
934 | wmb(); | ||
935 | } | ||
886 | } | 936 | } |
887 | 937 | ||
888 | /** | 938 | /** |
@@ -1916,6 +1966,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) | |||
1916 | unsigned int params; | 1966 | unsigned int params; |
1917 | struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); | 1967 | struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); |
1918 | int work_done = process_responses(q, budget); | 1968 | int work_done = process_responses(q, budget); |
1969 | u32 val; | ||
1919 | 1970 | ||
1920 | if (likely(work_done < budget)) { | 1971 | if (likely(work_done < budget)) { |
1921 | napi_complete(napi); | 1972 | napi_complete(napi); |
@@ -1924,8 +1975,14 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) | |||
1924 | } else | 1975 | } else |
1925 | params = QINTR_TIMER_IDX(7); | 1976 | params = QINTR_TIMER_IDX(7); |
1926 | 1977 | ||
1927 | t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | | 1978 | val = CIDXINC(work_done) | SEINTARM(params); |
1928 | INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); | 1979 | if (is_t4(q->adap->params.chip)) { |
1980 | t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), | ||
1981 | val | INGRESSQID((u32)q->cntxt_id)); | ||
1982 | } else { | ||
1983 | writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS); | ||
1984 | wmb(); | ||
1985 | } | ||
1929 | return work_done; | 1986 | return work_done; |
1930 | } | 1987 | } |
1931 | 1988 | ||
@@ -1949,6 +2006,7 @@ static unsigned int process_intrq(struct adapter *adap) | |||
1949 | unsigned int credits; | 2006 | unsigned int credits; |
1950 | const struct rsp_ctrl *rc; | 2007 | const struct rsp_ctrl *rc; |
1951 | struct sge_rspq *q = &adap->sge.intrq; | 2008 | struct sge_rspq *q = &adap->sge.intrq; |
2009 | u32 val; | ||
1952 | 2010 | ||
1953 | spin_lock(&adap->sge.intrq_lock); | 2011 | spin_lock(&adap->sge.intrq_lock); |
1954 | for (credits = 0; ; credits++) { | 2012 | for (credits = 0; ; credits++) { |
@@ -1967,8 +2025,14 @@ static unsigned int process_intrq(struct adapter *adap) | |||
1967 | rspq_next(q); | 2025 | rspq_next(q); |
1968 | } | 2026 | } |
1969 | 2027 | ||
1970 | t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | | 2028 | val = CIDXINC(credits) | SEINTARM(q->intr_params); |
1971 | INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); | 2029 | if (is_t4(adap->params.chip)) { |
2030 | t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), | ||
2031 | val | INGRESSQID(q->cntxt_id)); | ||
2032 | } else { | ||
2033 | writel(val, adap->bar2 + q->udb + SGE_UDB_GTS); | ||
2034 | wmb(); | ||
2035 | } | ||
1972 | spin_unlock(&adap->sge.intrq_lock); | 2036 | spin_unlock(&adap->sge.intrq_lock); |
1973 | return credits; | 2037 | return credits; |
1974 | } | 2038 | } |
@@ -2149,6 +2213,51 @@ static void sge_tx_timer_cb(unsigned long data) | |||
2149 | mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); | 2213 | mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); |
2150 | } | 2214 | } |
2151 | 2215 | ||
2216 | /** | ||
2217 | * udb_address - return the BAR2 User Doorbell address for a Queue | ||
2218 | * @adap: the adapter | ||
2219 | * @cntxt_id: the Queue Context ID | ||
2220 | * @qpp: Queues Per Page (for all PFs) | ||
2221 | * | ||
2222 | * Returns the BAR2 address of the user Doorbell associated with the | ||
2223 | * indicated Queue Context ID. Note that this is only applicable | ||
2224 | * for T5 and later. | ||
2225 | */ | ||
2226 | static u64 udb_address(struct adapter *adap, unsigned int cntxt_id, | ||
2227 | unsigned int qpp) | ||
2228 | { | ||
2229 | u64 udb; | ||
2230 | unsigned int s_qpp; | ||
2231 | unsigned short udb_density; | ||
2232 | unsigned long qpshift; | ||
2233 | int page; | ||
2234 | |||
2235 | BUG_ON(is_t4(adap->params.chip)); | ||
2236 | |||
2237 | s_qpp = (QUEUESPERPAGEPF0 + | ||
2238 | (QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn); | ||
2239 | udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK); | ||
2240 | qpshift = PAGE_SHIFT - ilog2(udb_density); | ||
2241 | udb = cntxt_id << qpshift; | ||
2242 | udb &= PAGE_MASK; | ||
2243 | page = udb / PAGE_SIZE; | ||
2244 | udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE; | ||
2245 | |||
2246 | return udb; | ||
2247 | } | ||
2248 | |||
2249 | static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id) | ||
2250 | { | ||
2251 | return udb_address(adap, cntxt_id, | ||
2252 | t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF)); | ||
2253 | } | ||
2254 | |||
2255 | static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id) | ||
2256 | { | ||
2257 | return udb_address(adap, cntxt_id, | ||
2258 | t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF)); | ||
2259 | } | ||
2260 | |||
2152 | int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, | 2261 | int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, |
2153 | struct net_device *dev, int intr_idx, | 2262 | struct net_device *dev, int intr_idx, |
2154 | struct sge_fl *fl, rspq_handler_t hnd) | 2263 | struct sge_fl *fl, rspq_handler_t hnd) |
@@ -2214,6 +2323,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, | |||
2214 | iq->next_intr_params = iq->intr_params; | 2323 | iq->next_intr_params = iq->intr_params; |
2215 | iq->cntxt_id = ntohs(c.iqid); | 2324 | iq->cntxt_id = ntohs(c.iqid); |
2216 | iq->abs_id = ntohs(c.physiqid); | 2325 | iq->abs_id = ntohs(c.physiqid); |
2326 | if (!is_t4(adap->params.chip)) | ||
2327 | iq->udb = udb_address_iq(adap, iq->cntxt_id); | ||
2217 | iq->size--; /* subtract status entry */ | 2328 | iq->size--; /* subtract status entry */ |
2218 | iq->netdev = dev; | 2329 | iq->netdev = dev; |
2219 | iq->handler = hnd; | 2330 | iq->handler = hnd; |
@@ -2229,6 +2340,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, | |||
2229 | fl->pidx = fl->cidx = 0; | 2340 | fl->pidx = fl->cidx = 0; |
2230 | fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; | 2341 | fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; |
2231 | adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; | 2342 | adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; |
2343 | |||
2344 | /* Note, we must initialize the Free List User Doorbell | ||
2345 | * address before refilling the Free List! | ||
2346 | */ | ||
2347 | if (!is_t4(adap->params.chip)) | ||
2348 | fl->udb = udb_address_eq(adap, fl->cntxt_id); | ||
2232 | refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); | 2349 | refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); |
2233 | } | 2350 | } |
2234 | return 0; | 2351 | return 0; |
@@ -2254,21 +2371,8 @@ err: | |||
2254 | static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) | 2371 | static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) |
2255 | { | 2372 | { |
2256 | q->cntxt_id = id; | 2373 | q->cntxt_id = id; |
2257 | if (!is_t4(adap->params.chip)) { | 2374 | if (!is_t4(adap->params.chip)) |
2258 | unsigned int s_qpp; | 2375 | q->udb = udb_address_eq(adap, q->cntxt_id); |
2259 | unsigned short udb_density; | ||
2260 | unsigned long qpshift; | ||
2261 | int page; | ||
2262 | |||
2263 | s_qpp = QUEUESPERPAGEPF1 * adap->fn; | ||
2264 | udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap, | ||
2265 | SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)); | ||
2266 | qpshift = PAGE_SHIFT - ilog2(udb_density); | ||
2267 | q->udb = q->cntxt_id << qpshift; | ||
2268 | q->udb &= PAGE_MASK; | ||
2269 | page = q->udb / PAGE_SIZE; | ||
2270 | q->udb += (q->cntxt_id - (page * udb_density)) * 128; | ||
2271 | } | ||
2272 | 2376 | ||
2273 | q->in_use = 0; | 2377 | q->in_use = 0; |
2274 | q->cidx = q->pidx = 0; | 2378 | q->cidx = q->pidx = 0; |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 39fb325474f7..847a162af98c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | |||
@@ -157,8 +157,27 @@ | |||
157 | #define QUEUESPERPAGEPF0_MASK 0x0000000fU | 157 | #define QUEUESPERPAGEPF0_MASK 0x0000000fU |
158 | #define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) | 158 | #define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) |
159 | 159 | ||
160 | #define QUEUESPERPAGEPF0 0 | ||
160 | #define QUEUESPERPAGEPF1 4 | 161 | #define QUEUESPERPAGEPF1 4 |
161 | 162 | ||
163 | /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. | ||
164 | * The User Doorbells are each 128 bytes in length with a Simple Doorbell at | ||
165 | * offsets 8x and a Write Combining single 64-byte Egress Queue Unit | ||
166 | * (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64. For Ingress Queues, | ||
167 | * we have a Going To Sleep register at offsets 8x+4. | ||
168 | * | ||
169 | * As noted above, we have many instances of the Simple Doorbell and Going To | ||
170 | * Sleep registers at offsets 8x and 8x+4, respectively. We want to use a | ||
171 | * non-64-byte aligned offset for the Simple Doorbell in order to attempt to | ||
172 | * avoid buffering of the writes to the Simple Doorbell and we want to use a | ||
173 | * non-contiguous offset for the Going To Sleep writes in order to avoid | ||
174 | * possible combining between them. | ||
175 | */ | ||
176 | #define SGE_UDB_SIZE 128 | ||
177 | #define SGE_UDB_KDOORBELL 8 | ||
178 | #define SGE_UDB_GTS 20 | ||
179 | #define SGE_UDB_WCDOORBELL 64 | ||
180 | |||
162 | #define SGE_INT_CAUSE1 0x1024 | 181 | #define SGE_INT_CAUSE1 0x1024 |
163 | #define SGE_INT_CAUSE2 0x1030 | 182 | #define SGE_INT_CAUSE2 0x1030 |
164 | #define SGE_INT_CAUSE3 0x103c | 183 | #define SGE_INT_CAUSE3 0x103c |