aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladislav Zolotarov <vladz@broadcom.com>2008-06-23 23:34:36 -0400
committerDavid S. Miller <davem@davemloft.net>2008-06-23 23:34:36 -0400
commit7a9b25577c8a06d998fb11b28bf8229aa9623205 (patch)
tree99d1a31c0112557794398b8add1cb22c51667afa
parentbb2a0f7ae477740d947b442f640a5d10b51025c0 (diff)
bnx2x: Add TPA, Broadcoms HW LRO
The TPA stands for Transparent Packet Aggregation. When enabled, the FW aggregate in-order TCP packets according to the 4-tuple match and sends 1 big packet to the driver. This packet is stored on an SGL in which each SGE is 1 page. The FW also implements a timeout algorithm and it honors all TCP flag, including the push flag as a trigger to halt aggregation. After receiving Ben Hutchings comments, we also added ethtool support, so now, thanks to Ben's patch, when forwarding is enabled, our aggregation is turned off using the LRO flags. Signed-off-by: Vladislav Zolotarov <vladz@broadcom.com> Signed-off-by: Eilon Greenstein <eilong@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bnx2x.h276
-rw-r--r--drivers/net/bnx2x_main.c744
2 files changed, 884 insertions, 136 deletions
diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h
index f7d73d6c3981..4bf4f7b205f2 100644
--- a/drivers/net/bnx2x.h
+++ b/drivers/net/bnx2x.h
@@ -132,8 +132,8 @@
132#define is_multi(bp) (bp->num_queues > 1) 132#define is_multi(bp) (bp->num_queues > 1)
133 133
134 134
135/* fast path */
135 136
136#define bnx2x_sp_check(bp, var) ((bp->slowpath) ? (&bp->slowpath->var) : NULL)
137struct sw_rx_bd { 137struct sw_rx_bd {
138 struct sk_buff *skb; 138 struct sk_buff *skb;
139 DECLARE_PCI_UNMAP_ADDR(mapping) 139 DECLARE_PCI_UNMAP_ADDR(mapping)
@@ -144,6 +144,52 @@ struct sw_tx_bd {
144 u16 first_bd; 144 u16 first_bd;
145}; 145};
146 146
147struct sw_rx_page {
148 struct page *page;
149 DECLARE_PCI_UNMAP_ADDR(mapping)
150};
151
152
153/* MC hsi */
154#define BCM_PAGE_SHIFT 12
155#define BCM_PAGE_SIZE (1 << BCM_PAGE_SHIFT)
156#define BCM_PAGE_MASK (~(BCM_PAGE_SIZE - 1))
157#define BCM_PAGE_ALIGN(addr) (((addr) + BCM_PAGE_SIZE - 1) & BCM_PAGE_MASK)
158
159#define PAGES_PER_SGE_SHIFT 0
160#define PAGES_PER_SGE (1 << PAGES_PER_SGE_SHIFT)
161
162/* SGE ring related macros */
163#define NUM_RX_SGE_PAGES 2
164#define RX_SGE_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge))
165#define MAX_RX_SGE_CNT (RX_SGE_CNT - 2)
166/* RX_SGE_CNT is promissed to be a power of 2 */
167#define RX_SGE_MASK (RX_SGE_CNT - 1)
168#define NUM_RX_SGE (RX_SGE_CNT * NUM_RX_SGE_PAGES)
169#define MAX_RX_SGE (NUM_RX_SGE - 1)
170#define NEXT_SGE_IDX(x) ((((x) & RX_SGE_MASK) == \
171 (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1)
172#define RX_SGE(x) ((x) & MAX_RX_SGE)
173
174/* SGE producer mask related macros */
175/* Number of bits in one sge_mask array element */
176#define RX_SGE_MASK_ELEM_SZ 64
177#define RX_SGE_MASK_ELEM_SHIFT 6
178#define RX_SGE_MASK_ELEM_MASK ((u64)RX_SGE_MASK_ELEM_SZ - 1)
179
180/* Creates a bitmask of all ones in less significant bits.
181 idx - index of the most significant bit in the created mask */
182#define RX_SGE_ONES_MASK(idx) \
183 (((u64)0x1 << (((idx) & RX_SGE_MASK_ELEM_MASK) + 1)) - 1)
184#define RX_SGE_MASK_ELEM_ONE_MASK ((u64)(~0))
185
186/* Number of u64 elements in SGE mask array */
187#define RX_SGE_MASK_LEN ((NUM_RX_SGE_PAGES * RX_SGE_CNT) / \
188 RX_SGE_MASK_ELEM_SZ)
189#define RX_SGE_MASK_LEN_MASK (RX_SGE_MASK_LEN - 1)
190#define NEXT_SGE_MASK_ELEM(el) (((el) + 1) & RX_SGE_MASK_LEN_MASK)
191
192
147struct bnx2x_fastpath { 193struct bnx2x_fastpath {
148 194
149 struct napi_struct napi; 195 struct napi_struct napi;
@@ -159,7 +205,8 @@ struct bnx2x_fastpath {
159 struct eth_tx_bd *tx_desc_ring; 205 struct eth_tx_bd *tx_desc_ring;
160 dma_addr_t tx_desc_mapping; 206 dma_addr_t tx_desc_mapping;
161 207
162 struct sw_rx_bd *rx_buf_ring; 208 struct sw_rx_bd *rx_buf_ring; /* BDs mappings ring */
209 struct sw_rx_page *rx_page_ring; /* SGE pages mappings ring */
163 210
164 struct eth_rx_bd *rx_desc_ring; 211 struct eth_rx_bd *rx_desc_ring;
165 dma_addr_t rx_desc_mapping; 212 dma_addr_t rx_desc_mapping;
@@ -167,6 +214,12 @@ struct bnx2x_fastpath {
167 union eth_rx_cqe *rx_comp_ring; 214 union eth_rx_cqe *rx_comp_ring;
168 dma_addr_t rx_comp_mapping; 215 dma_addr_t rx_comp_mapping;
169 216
217 /* SGE ring */
218 struct eth_rx_sge *rx_sge_ring;
219 dma_addr_t rx_sge_mapping;
220
221 u64 sge_mask[RX_SGE_MASK_LEN];
222
170 int state; 223 int state;
171#define BNX2X_FP_STATE_CLOSED 0 224#define BNX2X_FP_STATE_CLOSED 0
172#define BNX2X_FP_STATE_IRQ 0x80000 225#define BNX2X_FP_STATE_IRQ 0x80000
@@ -197,27 +250,152 @@ struct bnx2x_fastpath {
197 u16 rx_bd_cons; 250 u16 rx_bd_cons;
198 u16 rx_comp_prod; 251 u16 rx_comp_prod;
199 u16 rx_comp_cons; 252 u16 rx_comp_cons;
253 u16 rx_sge_prod;
254 /* The last maximal completed SGE */
255 u16 last_max_sge;
200 u16 *rx_cons_sb; 256 u16 *rx_cons_sb;
257 u16 *rx_bd_cons_sb;
201 258
202 unsigned long tx_pkt, 259 unsigned long tx_pkt,
203 rx_pkt, 260 rx_pkt,
204 rx_calls; 261 rx_calls,
262 rx_alloc_failed;
263 /* TPA related */
264 struct sw_rx_bd tpa_pool[ETH_MAX_AGGREGATION_QUEUES_E1H];
265 u8 tpa_state[ETH_MAX_AGGREGATION_QUEUES_E1H];
266#define BNX2X_TPA_START 1
267#define BNX2X_TPA_STOP 2
268 u8 disable_tpa;
269#ifdef BNX2X_STOP_ON_ERROR
270 u64 tpa_queue_used;
271#endif
205 272
206 struct bnx2x *bp; /* parent */ 273 struct bnx2x *bp; /* parent */
207}; 274};
208 275
209#define bnx2x_fp(bp, nr, var) (bp->fp[nr].var) 276#define bnx2x_fp(bp, nr, var) (bp->fp[nr].var)
277
278
279/* MC hsi */
280#define MAX_FETCH_BD 13 /* HW max BDs per packet */
281#define RX_COPY_THRESH 92
282
283#define NUM_TX_RINGS 16
284#define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
285#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1)
286#define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS)
287#define MAX_TX_BD (NUM_TX_BD - 1)
288#define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
289#define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \
290 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
291#define TX_BD(x) ((x) & MAX_TX_BD)
292#define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT)
293
294/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
295#define NUM_RX_RINGS 8
296#define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
297#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2)
298#define RX_DESC_MASK (RX_DESC_CNT - 1)
299#define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS)
300#define MAX_RX_BD (NUM_RX_BD - 1)
301#define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
302#define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \
303 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
304#define RX_BD(x) ((x) & MAX_RX_BD)
305
306/* As long as CQE is 4 times bigger than BD entry we have to allocate
307 4 times more pages for CQ ring in order to keep it balanced with
308 BD ring */
309#define NUM_RCQ_RINGS (NUM_RX_RINGS * 4)
310#define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
311#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1)
312#define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS)
313#define MAX_RCQ_BD (NUM_RCQ_BD - 1)
314#define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
315#define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \
316 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
317#define RCQ_BD(x) ((x) & MAX_RCQ_BD)
318
319
210/* This is needed for determening of last_max */ 320/* This is needed for determening of last_max */
211#define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b)) 321#define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b))
212 322
323#define __SGE_MASK_SET_BIT(el, bit) \
324 do { \
325 el = ((el) | ((u64)0x1 << (bit))); \
326 } while (0)
327
328#define __SGE_MASK_CLEAR_BIT(el, bit) \
329 do { \
330 el = ((el) & (~((u64)0x1 << (bit)))); \
331 } while (0)
332
333#define SGE_MASK_SET_BIT(fp, idx) \
334 __SGE_MASK_SET_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
335 ((idx) & RX_SGE_MASK_ELEM_MASK))
336
337#define SGE_MASK_CLEAR_BIT(fp, idx) \
338 __SGE_MASK_CLEAR_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
339 ((idx) & RX_SGE_MASK_ELEM_MASK))
340
341
342/* used on a CID received from the HW */
343#define SW_CID(x) (le32_to_cpu(x) & \
344 (COMMON_RAMROD_ETH_RX_CQE_CID >> 7))
345#define CQE_CMD(x) (le32_to_cpu(x) >> \
346 COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
347
213#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr_hi), \ 348#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr_hi), \
214 le32_to_cpu((bd)->addr_lo)) 349 le32_to_cpu((bd)->addr_lo))
215#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes)) 350#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes))
216 351
352
353#define DPM_TRIGER_TYPE 0x40
354#define DOORBELL(bp, cid, val) \
355 do { \
356 writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
357 DPM_TRIGER_TYPE); \
358 } while (0)
359
360
361/* TX CSUM helpers */
362#define SKB_CS_OFF(skb) (offsetof(struct tcphdr, check) - \
363 skb->csum_offset)
364#define SKB_CS(skb) (*(u16 *)(skb_transport_header(skb) + \
365 skb->csum_offset))
366
367#define pbd_tcp_flags(skb) (ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff)
368
369#define XMIT_PLAIN 0
370#define XMIT_CSUM_V4 0x1
371#define XMIT_CSUM_V6 0x2
372#define XMIT_CSUM_TCP 0x4
373#define XMIT_GSO_V4 0x8
374#define XMIT_GSO_V6 0x10
375
376#define XMIT_CSUM (XMIT_CSUM_V4 | XMIT_CSUM_V6)
377#define XMIT_GSO (XMIT_GSO_V4 | XMIT_GSO_V6)
378
379
217/* stuff added to make the code fit 80Col */ 380/* stuff added to make the code fit 80Col */
218 381
219#define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE) 382#define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE)
220 383
384#define TPA_TYPE_START ETH_FAST_PATH_RX_CQE_START_FLG
385#define TPA_TYPE_END ETH_FAST_PATH_RX_CQE_END_FLG
386#define TPA_TYPE(cqe_fp_flags) ((cqe_fp_flags) & \
387 (TPA_TYPE_START | TPA_TYPE_END))
388
389#define BNX2X_RX_SUM_OK(cqe) \
390 (!(cqe->fast_path_cqe.status_flags & \
391 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
392 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
393
394#define BNX2X_RX_SUM_FIX(cqe) \
395 ((le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & \
396 PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == \
397 (1 << PARSING_FLAGS_OVER_ETHERNET_PROTOCOL_SHIFT))
398
221#define ETH_RX_ERROR_FALGS (ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG | \ 399#define ETH_RX_ERROR_FALGS (ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG | \
222 ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | \ 400 ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | \
223 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG) 401 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)
@@ -247,6 +425,9 @@ struct bnx2x_fastpath {
247#define BNX2X_TX_SB_INDEX \ 425#define BNX2X_TX_SB_INDEX \
248 (&fp->status_blk->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX]) 426 (&fp->status_blk->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX])
249 427
428
429/* end of fast path */
430
250/* common */ 431/* common */
251 432
252struct bnx2x_common { 433struct bnx2x_common {
@@ -546,7 +727,7 @@ struct bnx2x {
546 struct pci_dev *pdev; 727 struct pci_dev *pdev;
547 728
548 atomic_t intr_sem; 729 atomic_t intr_sem;
549 struct msix_entry msix_table[MAX_CONTEXT+1]; 730 struct msix_entry msix_table[MAX_CONTEXT+1];
550 731
551 int tx_ring_size; 732 int tx_ring_size;
552 733
@@ -604,6 +785,7 @@ struct bnx2x {
604#define USING_DAC_FLAG 0x10 785#define USING_DAC_FLAG 0x10
605#define USING_MSIX_FLAG 0x20 786#define USING_MSIX_FLAG 0x20
606#define ASF_ENABLE_FLAG 0x40 787#define ASF_ENABLE_FLAG 0x40
788#define TPA_ENABLE_FLAG 0x80
607#define NO_MCP_FLAG 0x100 789#define NO_MCP_FLAG 0x100
608#define BP_NOMCP(bp) (bp->flags & NO_MCP_FLAG) 790#define BP_NOMCP(bp) (bp->flags & NO_MCP_FLAG)
609 791
@@ -725,76 +907,6 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
725 u32 len32); 907 u32 len32);
726int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode); 908int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode);
727 909
728
729/* MC hsi */
730#define RX_COPY_THRESH 92
731#define BCM_PAGE_SHIFT 12
732#define BCM_PAGE_SIZE (1 << BCM_PAGE_SHIFT)
733#define BCM_PAGE_MASK (~(BCM_PAGE_SIZE - 1))
734#define BCM_PAGE_ALIGN(addr) (((addr) + BCM_PAGE_SIZE - 1) & BCM_PAGE_MASK)
735
736#define NUM_TX_RINGS 16
737#define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
738#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1)
739#define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS)
740#define MAX_TX_BD (NUM_TX_BD - 1)
741#define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
742#define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \
743 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
744#define TX_BD(x) ((x) & MAX_TX_BD)
745#define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT)
746
747/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
748#define NUM_RX_RINGS 8
749#define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
750#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2)
751#define RX_DESC_MASK (RX_DESC_CNT - 1)
752#define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS)
753#define MAX_RX_BD (NUM_RX_BD - 1)
754#define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
755#define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \
756 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
757#define RX_BD(x) ((x) & MAX_RX_BD)
758
759#define NUM_RCQ_RINGS (NUM_RX_RINGS * 2)
760#define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
761#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1)
762#define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS)
763#define MAX_RCQ_BD (NUM_RCQ_BD - 1)
764#define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
765#define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \
766 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
767#define RCQ_BD(x) ((x) & MAX_RCQ_BD)
768
769
770/* used on a CID received from the HW */
771#define SW_CID(x) (le32_to_cpu(x) & \
772 (COMMON_RAMROD_ETH_RX_CQE_CID >> 1))
773#define CQE_CMD(x) (le32_to_cpu(x) >> \
774 COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
775
776#define STROM_ASSERT_ARRAY_SIZE 50
777
778
779
780/* must be used on a CID before placing it on a HW ring */
781#define HW_CID(bp, x) ((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
782
783#define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe))
784#define MAX_SP_DESC_CNT (SP_DESC_CNT - 1)
785
786
787#define BNX2X_BTR 3
788#define MAX_SPQ_PENDING 8
789
790
791#define DPM_TRIGER_TYPE 0x40
792#define DOORBELL(bp, cid, val) \
793 do { \
794 writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
795 DPM_TRIGER_TYPE); \
796 } while (0)
797
798static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, 910static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
799 int wait) 911 int wait)
800{ 912{
@@ -874,14 +986,20 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
874#define BNX2X_LOOPBACK_FAILED (BNX2X_MAC_LOOPBACK_FAILED | \ 986#define BNX2X_LOOPBACK_FAILED (BNX2X_MAC_LOOPBACK_FAILED | \
875 BNX2X_PHY_LOOPBACK_FAILED) 987 BNX2X_PHY_LOOPBACK_FAILED)
876 988
877#define pbd_tcp_flags(skb) (ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff) 989
990#define STROM_ASSERT_ARRAY_SIZE 50
991
878 992
879/* must be used on a CID before placing it on a HW ring */ 993/* must be used on a CID before placing it on a HW ring */
994#define HW_CID(bp, x) ((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
995
996#define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe))
997#define MAX_SP_DESC_CNT (SP_DESC_CNT - 1)
998
999
1000#define BNX2X_BTR 3
1001#define MAX_SPQ_PENDING 8
880 1002
881#define BNX2X_RX_SUM_OK(cqe) \
882 (!(cqe->fast_path_cqe.status_flags & \
883 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
884 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
885 1003
886/* CMNG constants 1004/* CMNG constants
887 derived from lab experiments, and not from system spec calculations !!! */ 1005 derived from lab experiments, and not from system spec calculations !!! */
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index ccfe33c110b4..fabde5555e32 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -79,6 +79,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
79static int use_inta; 79static int use_inta;
80static int poll; 80static int poll;
81static int debug; 81static int debug;
82static int disable_tpa;
82static int nomcp; 83static int nomcp;
83static int load_count[3]; /* 0-common, 1-port0, 2-port1 */ 84static int load_count[3]; /* 0-common, 1-port0, 2-port1 */
84static int use_multi; 85static int use_multi;
@@ -86,6 +87,7 @@ static int use_multi;
86module_param(use_inta, int, 0); 87module_param(use_inta, int, 0);
87module_param(poll, int, 0); 88module_param(poll, int, 0);
88module_param(debug, int, 0); 89module_param(debug, int, 0);
90module_param(disable_tpa, int, 0);
89module_param(nomcp, int, 0); 91module_param(nomcp, int, 0);
90MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X"); 92MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X");
91MODULE_PARM_DESC(poll, "use polling (for debug)"); 93MODULE_PARM_DESC(poll, "use polling (for debug)");
@@ -512,13 +514,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
512 i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod, 514 i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
513 fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb)); 515 fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb));
514 BNX2X_ERR(" rx_comp_prod(%x) rx_comp_cons(%x)" 516 BNX2X_ERR(" rx_comp_prod(%x) rx_comp_cons(%x)"
515 " *rx_cons_sb(%x)\n", 517 " *rx_cons_sb(%x) *rx_bd_cons_sb(%x)"
518 " rx_sge_prod(%x) last_max_sge(%x)\n",
516 fp->rx_comp_prod, fp->rx_comp_cons, 519 fp->rx_comp_prod, fp->rx_comp_cons,
517 le16_to_cpu(*fp->rx_cons_sb)); 520 le16_to_cpu(*fp->rx_cons_sb),
521 le16_to_cpu(*fp->rx_bd_cons_sb),
522 fp->rx_sge_prod, fp->last_max_sge);
518 BNX2X_ERR(" fp_c_idx(%x) fp_u_idx(%x)" 523 BNX2X_ERR(" fp_c_idx(%x) fp_u_idx(%x)"
519 " bd data(%x,%x)\n", 524 " bd data(%x,%x) rx_alloc_failed(%lx)\n",
520 fp->fp_c_idx, fp->fp_u_idx, hw_prods->packets_prod, 525 fp->fp_c_idx, fp->fp_u_idx, hw_prods->packets_prod,
521 hw_prods->bds_prod); 526 hw_prods->bds_prod, fp->rx_alloc_failed);
522 527
523 start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10); 528 start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
524 end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245); 529 end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
@@ -548,6 +553,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
548 j, rx_bd[1], rx_bd[0], sw_bd->skb); 553 j, rx_bd[1], rx_bd[0], sw_bd->skb);
549 } 554 }
550 555
556 start = 0;
557 end = RX_SGE_CNT*NUM_RX_SGE_PAGES;
558 for (j = start; j < end; j++) {
559 u32 *rx_sge = (u32 *)&fp->rx_sge_ring[j];
560 struct sw_rx_page *sw_page = &fp->rx_page_ring[j];
561
562 BNX2X_ERR("rx_sge[%x]=[%x:%x] sw_page=[%p]\n",
563 j, rx_sge[1], rx_sge[0], sw_page->page);
564 }
565
551 start = RCQ_BD(fp->rx_comp_cons - 10); 566 start = RCQ_BD(fp->rx_comp_cons - 10);
552 end = RCQ_BD(fp->rx_comp_cons + 503); 567 end = RCQ_BD(fp->rx_comp_cons + 503);
553 for (j = start; j < end; j++) { 568 for (j = start; j < end; j++) {
@@ -963,6 +978,62 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
963 mb(); /* force bnx2x_wait_ramrod() to see the change */ 978 mb(); /* force bnx2x_wait_ramrod() to see the change */
964} 979}
965 980
981static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
982 struct bnx2x_fastpath *fp, u16 index)
983{
984 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
985 struct page *page = sw_buf->page;
986 struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
987
988 /* Skip "next page" elements */
989 if (!page)
990 return;
991
992 pci_unmap_page(bp->pdev, pci_unmap_addr(sw_buf, mapping),
993 BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
994 __free_pages(page, PAGES_PER_SGE_SHIFT);
995
996 sw_buf->page = NULL;
997 sge->addr_hi = 0;
998 sge->addr_lo = 0;
999}
1000
1001static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp,
1002 struct bnx2x_fastpath *fp, int last)
1003{
1004 int i;
1005
1006 for (i = 0; i < last; i++)
1007 bnx2x_free_rx_sge(bp, fp, i);
1008}
1009
1010static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
1011 struct bnx2x_fastpath *fp, u16 index)
1012{
1013 struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT);
1014 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
1015 struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
1016 dma_addr_t mapping;
1017
1018 if (unlikely(page == NULL))
1019 return -ENOMEM;
1020
1021 mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE,
1022 PCI_DMA_FROMDEVICE);
1023 if (unlikely(dma_mapping_error(mapping))) {
1024 __free_pages(page, PAGES_PER_SGE_SHIFT);
1025 return -ENOMEM;
1026 }
1027
1028 sw_buf->page = page;
1029 pci_unmap_addr_set(sw_buf, mapping, mapping);
1030
1031 sge->addr_hi = cpu_to_le32(U64_HI(mapping));
1032 sge->addr_lo = cpu_to_le32(U64_LO(mapping));
1033
1034 return 0;
1035}
1036
966static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp, 1037static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
967 struct bnx2x_fastpath *fp, u16 index) 1038 struct bnx2x_fastpath *fp, u16 index)
968{ 1039{
@@ -1016,12 +1087,310 @@ static void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
1016 *prod_bd = *cons_bd; 1087 *prod_bd = *cons_bd;
1017} 1088}
1018 1089
1090static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
1091 u16 idx)
1092{
1093 u16 last_max = fp->last_max_sge;
1094
1095 if (SUB_S16(idx, last_max) > 0)
1096 fp->last_max_sge = idx;
1097}
1098
1099static void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
1100{
1101 int i, j;
1102
1103 for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
1104 int idx = RX_SGE_CNT * i - 1;
1105
1106 for (j = 0; j < 2; j++) {
1107 SGE_MASK_CLEAR_BIT(fp, idx);
1108 idx--;
1109 }
1110 }
1111}
1112
1113static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
1114 struct eth_fast_path_rx_cqe *fp_cqe)
1115{
1116 struct bnx2x *bp = fp->bp;
1117 u16 sge_len = BCM_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
1118 le16_to_cpu(fp_cqe->len_on_bd)) >>
1119 BCM_PAGE_SHIFT;
1120 u16 last_max, last_elem, first_elem;
1121 u16 delta = 0;
1122 u16 i;
1123
1124 if (!sge_len)
1125 return;
1126
1127 /* First mark all used pages */
1128 for (i = 0; i < sge_len; i++)
1129 SGE_MASK_CLEAR_BIT(fp, RX_SGE(le16_to_cpu(fp_cqe->sgl[i])));
1130
1131 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
1132 sge_len - 1, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
1133
1134 /* Here we assume that the last SGE index is the biggest */
1135 prefetch((void *)(fp->sge_mask));
1136 bnx2x_update_last_max_sge(fp, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
1137
1138 last_max = RX_SGE(fp->last_max_sge);
1139 last_elem = last_max >> RX_SGE_MASK_ELEM_SHIFT;
1140 first_elem = RX_SGE(fp->rx_sge_prod) >> RX_SGE_MASK_ELEM_SHIFT;
1141
1142 /* If ring is not full */
1143 if (last_elem + 1 != first_elem)
1144 last_elem++;
1145
1146 /* Now update the prod */
1147 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
1148 if (likely(fp->sge_mask[i]))
1149 break;
1150
1151 fp->sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK;
1152 delta += RX_SGE_MASK_ELEM_SZ;
1153 }
1154
1155 if (delta > 0) {
1156 fp->rx_sge_prod += delta;
1157 /* clear page-end entries */
1158 bnx2x_clear_sge_mask_next_elems(fp);
1159 }
1160
1161 DP(NETIF_MSG_RX_STATUS,
1162 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
1163 fp->last_max_sge, fp->rx_sge_prod);
1164}
1165
1166static inline void bnx2x_init_sge_ring_bit_mask(struct bnx2x_fastpath *fp)
1167{
1168 /* Set the mask to all 1-s: it's faster to compare to 0 than to 0xf-s */
1169 memset(fp->sge_mask, 0xff,
1170 (NUM_RX_SGE >> RX_SGE_MASK_ELEM_SHIFT)*sizeof(u64));
1171
1172 /* Clear the two last indeces in the page to 1:
1173 these are the indeces that correspond to the "next" element,
1174 hence will never be indicated and should be removed from
1175 the calculations. */
1176 bnx2x_clear_sge_mask_next_elems(fp);
1177}
1178
1179static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
1180 struct sk_buff *skb, u16 cons, u16 prod)
1181{
1182 struct bnx2x *bp = fp->bp;
1183 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
1184 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
1185 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
1186 dma_addr_t mapping;
1187
1188 /* move empty skb from pool to prod and map it */
1189 prod_rx_buf->skb = fp->tpa_pool[queue].skb;
1190 mapping = pci_map_single(bp->pdev, fp->tpa_pool[queue].skb->data,
1191 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
1192 pci_unmap_addr_set(prod_rx_buf, mapping, mapping);
1193
1194 /* move partial skb from cons to pool (don't unmap yet) */
1195 fp->tpa_pool[queue] = *cons_rx_buf;
1196
1197 /* mark bin state as start - print error if current state != stop */
1198 if (fp->tpa_state[queue] != BNX2X_TPA_STOP)
1199 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
1200
1201 fp->tpa_state[queue] = BNX2X_TPA_START;
1202
1203 /* point prod_bd to new skb */
1204 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
1205 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
1206
1207#ifdef BNX2X_STOP_ON_ERROR
1208 fp->tpa_queue_used |= (1 << queue);
1209#ifdef __powerpc64__
1210 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
1211#else
1212 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
1213#endif
1214 fp->tpa_queue_used);
1215#endif
1216}
1217
1218static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
1219 struct sk_buff *skb,
1220 struct eth_fast_path_rx_cqe *fp_cqe,
1221 u16 cqe_idx)
1222{
1223 struct sw_rx_page *rx_pg, old_rx_pg;
1224 struct page *sge;
1225 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
1226 u32 i, frag_len, frag_size, pages;
1227 int err;
1228 int j;
1229
1230 frag_size = le16_to_cpu(fp_cqe->pkt_len) - len_on_bd;
1231 pages = BCM_PAGE_ALIGN(frag_size) >> BCM_PAGE_SHIFT;
1232
1233 /* This is needed in order to enable forwarding support */
1234 if (frag_size)
1235 skb_shinfo(skb)->gso_size = min((u32)BCM_PAGE_SIZE,
1236 max(frag_size, (u32)len_on_bd));
1237
1238#ifdef BNX2X_STOP_ON_ERROR
1239 if (pages > 8*PAGES_PER_SGE) {
1240 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
1241 pages, cqe_idx);
1242 BNX2X_ERR("fp_cqe->pkt_len = %d fp_cqe->len_on_bd = %d\n",
1243 fp_cqe->pkt_len, len_on_bd);
1244 bnx2x_panic();
1245 return -EINVAL;
1246 }
1247#endif
1248
1249 /* Run through the SGL and compose the fragmented skb */
1250 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
1251 u16 sge_idx = RX_SGE(le16_to_cpu(fp_cqe->sgl[j]));
1252
1253 /* FW gives the indices of the SGE as if the ring is an array
1254 (meaning that "next" element will consume 2 indices) */
1255 frag_len = min(frag_size, (u32)(BCM_PAGE_SIZE*PAGES_PER_SGE));
1256 rx_pg = &fp->rx_page_ring[sge_idx];
1257 sge = rx_pg->page;
1258 old_rx_pg = *rx_pg;
1259
1260 /* If we fail to allocate a substitute page, we simply stop
1261 where we are and drop the whole packet */
1262 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
1263 if (unlikely(err)) {
1264 fp->rx_alloc_failed++;
1265 return err;
1266 }
1267
1268 /* Unmap the page as we r going to pass it to the stack */
1269 pci_unmap_page(bp->pdev, pci_unmap_addr(&old_rx_pg, mapping),
1270 BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
1271
1272 /* Add one frag and update the appropriate fields in the skb */
1273 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
1274
1275 skb->data_len += frag_len;
1276 skb->truesize += frag_len;
1277 skb->len += frag_len;
1278
1279 frag_size -= frag_len;
1280 }
1281
1282 return 0;
1283}
1284
1285static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
1286 u16 queue, int pad, int len, union eth_rx_cqe *cqe,
1287 u16 cqe_idx)
1288{
1289 struct sw_rx_bd *rx_buf = &fp->tpa_pool[queue];
1290 struct sk_buff *skb = rx_buf->skb;
1291 /* alloc new skb */
1292 struct sk_buff *new_skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
1293
1294 /* Unmap skb in the pool anyway, as we are going to change
1295 pool entry status to BNX2X_TPA_STOP even if new skb allocation
1296 fails. */
1297 pci_unmap_single(bp->pdev, pci_unmap_addr(rx_buf, mapping),
1298 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
1299
1300 /* if alloc failed drop the packet and keep the buffer in the bin */
1301 if (likely(new_skb)) {
1302
1303 prefetch(skb);
1304 prefetch(((char *)(skb)) + 128);
1305
1306 /* else fix ip xsum and give it to the stack */
1307 /* (no need to map the new skb) */
1308#ifdef BNX2X_STOP_ON_ERROR
1309 if (pad + len > bp->rx_buf_size) {
1310 BNX2X_ERR("skb_put is about to fail... "
1311 "pad %d len %d rx_buf_size %d\n",
1312 pad, len, bp->rx_buf_size);
1313 bnx2x_panic();
1314 return;
1315 }
1316#endif
1317
1318 skb_reserve(skb, pad);
1319 skb_put(skb, len);
1320
1321 skb->protocol = eth_type_trans(skb, bp->dev);
1322 skb->ip_summed = CHECKSUM_UNNECESSARY;
1323
1324 {
1325 struct iphdr *iph;
1326
1327 iph = (struct iphdr *)skb->data;
1328 iph->check = 0;
1329 iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
1330 }
1331
1332 if (!bnx2x_fill_frag_skb(bp, fp, skb,
1333 &cqe->fast_path_cqe, cqe_idx)) {
1334#ifdef BCM_VLAN
1335 if ((bp->vlgrp != NULL) &&
1336 (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) &
1337 PARSING_FLAGS_VLAN))
1338 vlan_hwaccel_receive_skb(skb, bp->vlgrp,
1339 le16_to_cpu(cqe->fast_path_cqe.
1340 vlan_tag));
1341 else
1342#endif
1343 netif_receive_skb(skb);
1344 } else {
1345 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
1346 " - dropping packet!\n");
1347 dev_kfree_skb(skb);
1348 }
1349
1350 bp->dev->last_rx = jiffies;
1351
1352 /* put new skb in bin */
1353 fp->tpa_pool[queue].skb = new_skb;
1354
1355 } else {
1356 DP(NETIF_MSG_RX_STATUS,
1357 "Failed to allocate new skb - dropping packet!\n");
1358 fp->rx_alloc_failed++;
1359 }
1360
1361 fp->tpa_state[queue] = BNX2X_TPA_STOP;
1362}
1363
1364static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
1365 struct bnx2x_fastpath *fp,
1366 u16 bd_prod, u16 rx_comp_prod,
1367 u16 rx_sge_prod)
1368{
1369 struct tstorm_eth_rx_producers rx_prods = {0};
1370 int i;
1371
1372 /* Update producers */
1373 rx_prods.bd_prod = bd_prod;
1374 rx_prods.cqe_prod = rx_comp_prod;
1375 rx_prods.sge_prod = rx_sge_prod;
1376
1377 for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++)
1378 REG_WR(bp, BAR_TSTRORM_INTMEM +
1379 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
1380 ((u32 *)&rx_prods)[i]);
1381
1382 DP(NETIF_MSG_RX_STATUS,
1383 "Wrote: bd_prod %u cqe_prod %u sge_prod %u\n",
1384 bd_prod, rx_comp_prod, rx_sge_prod);
1385}
1386
1019static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) 1387static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1020{ 1388{
1021 struct bnx2x *bp = fp->bp; 1389 struct bnx2x *bp = fp->bp;
1022 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; 1390 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
1023 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod; 1391 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
1024 int rx_pkt = 0; 1392 int rx_pkt = 0;
1393 u16 queue;
1025 1394
1026#ifdef BNX2X_STOP_ON_ERROR 1395#ifdef BNX2X_STOP_ON_ERROR
1027 if (unlikely(bp->panic)) 1396 if (unlikely(bp->panic))
@@ -1082,6 +1451,49 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1082 len = le16_to_cpu(cqe->fast_path_cqe.pkt_len); 1451 len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
1083 pad = cqe->fast_path_cqe.placement_offset; 1452 pad = cqe->fast_path_cqe.placement_offset;
1084 1453
1454 /* If CQE is marked both TPA_START and TPA_END
1455 it is a non-TPA CQE */
1456 if ((!fp->disable_tpa) &&
1457 (TPA_TYPE(cqe_fp_flags) !=
1458 (TPA_TYPE_START | TPA_TYPE_END))) {
1459 queue = cqe->fast_path_cqe.queue_index;
1460
1461 if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_START) {
1462 DP(NETIF_MSG_RX_STATUS,
1463 "calling tpa_start on queue %d\n",
1464 queue);
1465
1466 bnx2x_tpa_start(fp, queue, skb,
1467 bd_cons, bd_prod);
1468 goto next_rx;
1469 }
1470
1471 if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_END) {
1472 DP(NETIF_MSG_RX_STATUS,
1473 "calling tpa_stop on queue %d\n",
1474 queue);
1475
1476 if (!BNX2X_RX_SUM_FIX(cqe))
1477 BNX2X_ERR("STOP on none TCP "
1478 "data\n");
1479
1480 /* This is a size of the linear data
1481 on this skb */
1482 len = le16_to_cpu(cqe->fast_path_cqe.
1483 len_on_bd);
1484 bnx2x_tpa_stop(bp, fp, queue, pad,
1485 len, cqe, comp_ring_cons);
1486#ifdef BNX2X_STOP_ON_ERROR
1487 if (bp->panic)
1488 return -EINVAL;
1489#endif
1490
1491 bnx2x_update_sge_prod(fp,
1492 &cqe->fast_path_cqe);
1493 goto next_cqe;
1494 }
1495 }
1496
1085 pci_dma_sync_single_for_device(bp->pdev, 1497 pci_dma_sync_single_for_device(bp->pdev,
1086 pci_unmap_addr(rx_buf, mapping), 1498 pci_unmap_addr(rx_buf, mapping),
1087 pad + RX_COPY_THRESH, 1499 pad + RX_COPY_THRESH,
@@ -1112,7 +1524,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1112 DP(NETIF_MSG_RX_ERR, 1524 DP(NETIF_MSG_RX_ERR,
1113 "ERROR packet dropped " 1525 "ERROR packet dropped "
1114 "because of alloc failure\n"); 1526 "because of alloc failure\n");
1115 /* TBD count this as a drop? */ 1527 fp->rx_alloc_failed++;
1116 goto reuse_rx; 1528 goto reuse_rx;
1117 } 1529 }
1118 1530
@@ -1138,6 +1550,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1138 DP(NETIF_MSG_RX_ERR, 1550 DP(NETIF_MSG_RX_ERR,
1139 "ERROR packet dropped because " 1551 "ERROR packet dropped because "
1140 "of alloc failure\n"); 1552 "of alloc failure\n");
1553 fp->rx_alloc_failed++;
1141reuse_rx: 1554reuse_rx:
1142 bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod); 1555 bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod);
1143 goto next_rx; 1556 goto next_rx;
@@ -1184,11 +1597,9 @@ next_cqe:
1184 fp->rx_comp_cons = sw_comp_cons; 1597 fp->rx_comp_cons = sw_comp_cons;
1185 fp->rx_comp_prod = sw_comp_prod; 1598 fp->rx_comp_prod = sw_comp_prod;
1186 1599
1187 REG_WR(bp, BAR_TSTRORM_INTMEM + 1600 /* Update producers */
1188 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)), 1601 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
1189 sw_comp_prod); 1602 fp->rx_sge_prod);
1190
1191
1192 mmiowb(); /* keep prod updates ordered */ 1603 mmiowb(); /* keep prod updates ordered */
1193 1604
1194 fp->rx_pkt += rx_pkt; 1605 fp->rx_pkt += rx_pkt;
@@ -2745,10 +3156,10 @@ static void bnx2x_stats_pmf_update(struct bnx2x *bp)
2745 dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI); 3156 dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI);
2746 dmae->src_addr_lo = (bp->port.port_stx >> 2) + DMAE_LEN32_RD_MAX; 3157 dmae->src_addr_lo = (bp->port.port_stx >> 2) + DMAE_LEN32_RD_MAX;
2747 dmae->src_addr_hi = 0; 3158 dmae->src_addr_hi = 0;
2748 dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats) 3159 dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats) +
2749 + DMAE_LEN32_RD_MAX * 4); 3160 DMAE_LEN32_RD_MAX * 4);
2750 dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats) 3161 dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats) +
2751 + DMAE_LEN32_RD_MAX * 4); 3162 DMAE_LEN32_RD_MAX * 4);
2752 dmae->len = (sizeof(struct host_port_stats) >> 2) - DMAE_LEN32_RD_MAX; 3163 dmae->len = (sizeof(struct host_port_stats) >> 2) - DMAE_LEN32_RD_MAX;
2753 dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp)); 3164 dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
2754 dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp)); 3165 dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
@@ -3365,11 +3776,12 @@ static void bnx2x_stats_update(struct bnx2x *bp)
3365 printk(KERN_DEBUG " tx avail (%4x) tx hc idx (%x)" 3776 printk(KERN_DEBUG " tx avail (%4x) tx hc idx (%x)"
3366 " tx pkt (%lx)\n", 3777 " tx pkt (%lx)\n",
3367 bnx2x_tx_avail(bp->fp), 3778 bnx2x_tx_avail(bp->fp),
3368 *bp->fp->tx_cons_sb, nstats->tx_packets); 3779 le16_to_cpu(*bp->fp->tx_cons_sb), nstats->tx_packets);
3369 printk(KERN_DEBUG " rx usage (%4x) rx hc idx (%x)" 3780 printk(KERN_DEBUG " rx usage (%4x) rx hc idx (%x)"
3370 " rx pkt (%lx)\n", 3781 " rx pkt (%lx)\n",
3371 (u16)(*bp->fp->rx_cons_sb - bp->fp->rx_comp_cons), 3782 (u16)(le16_to_cpu(*bp->fp->rx_cons_sb) -
3372 *bp->fp->rx_cons_sb, nstats->rx_packets); 3783 bp->fp->rx_comp_cons),
3784 le16_to_cpu(*bp->fp->rx_cons_sb), nstats->rx_packets);
3373 printk(KERN_DEBUG " %s (Xoff events %u) brb drops %u\n", 3785 printk(KERN_DEBUG " %s (Xoff events %u) brb drops %u\n",
3374 netif_queue_stopped(bp->dev)? "Xoff" : "Xon", 3786 netif_queue_stopped(bp->dev)? "Xoff" : "Xon",
3375 estats->driver_xoff, estats->brb_drop_lo); 3787 estats->driver_xoff, estats->brb_drop_lo);
@@ -3623,6 +4035,8 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
3623 REG_WR(bp, BAR_CSTRORM_INTMEM + 4035 REG_WR(bp, BAR_CSTRORM_INTMEM +
3624 ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4), 4036 ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
3625 U64_HI(section)); 4037 U64_HI(section));
4038 REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_CSB_FUNC_OFF +
4039 CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
3626 4040
3627 for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++) 4041 for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++)
3628 REG_WR16(bp, BAR_CSTRORM_INTMEM + 4042 REG_WR16(bp, BAR_CSTRORM_INTMEM +
@@ -3814,22 +4228,94 @@ static void bnx2x_update_coalesce(struct bnx2x *bp)
3814 } 4228 }
3815} 4229}
3816 4230
4231static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
4232 struct bnx2x_fastpath *fp, int last)
4233{
4234 int i;
4235
4236 for (i = 0; i < last; i++) {
4237 struct sw_rx_bd *rx_buf = &(fp->tpa_pool[i]);
4238 struct sk_buff *skb = rx_buf->skb;
4239
4240 if (skb == NULL) {
4241 DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
4242 continue;
4243 }
4244
4245 if (fp->tpa_state[i] == BNX2X_TPA_START)
4246 pci_unmap_single(bp->pdev,
4247 pci_unmap_addr(rx_buf, mapping),
4248 bp->rx_buf_use_size,
4249 PCI_DMA_FROMDEVICE);
4250
4251 dev_kfree_skb(skb);
4252 rx_buf->skb = NULL;
4253 }
4254}
4255
3817static void bnx2x_init_rx_rings(struct bnx2x *bp) 4256static void bnx2x_init_rx_rings(struct bnx2x *bp)
3818{ 4257{
3819 u16 ring_prod; 4258 int func = BP_FUNC(bp);
4259 u16 ring_prod, cqe_ring_prod = 0;
3820 int i, j; 4260 int i, j;
3821 4261
3822 bp->rx_buf_use_size = bp->dev->mtu; 4262 bp->rx_buf_use_size = bp->dev->mtu;
3823
3824 bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD; 4263 bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD;
3825 bp->rx_buf_size = bp->rx_buf_use_size + 64; 4264 bp->rx_buf_size = bp->rx_buf_use_size + 64;
3826 4265
4266 if (bp->flags & TPA_ENABLE_FLAG) {
4267 DP(NETIF_MSG_IFUP,
4268 "rx_buf_use_size %d rx_buf_size %d effective_mtu %d\n",
4269 bp->rx_buf_use_size, bp->rx_buf_size,
4270 bp->dev->mtu + ETH_OVREHEAD);
4271
4272 for_each_queue(bp, j) {
4273 for (i = 0; i < ETH_MAX_AGGREGATION_QUEUES_E1H; i++) {
4274 struct bnx2x_fastpath *fp = &bp->fp[j];
4275
4276 fp->tpa_pool[i].skb =
4277 netdev_alloc_skb(bp->dev, bp->rx_buf_size);
4278 if (!fp->tpa_pool[i].skb) {
4279 BNX2X_ERR("Failed to allocate TPA "
4280 "skb pool for queue[%d] - "
4281 "disabling TPA on this "
4282 "queue!\n", j);
4283 bnx2x_free_tpa_pool(bp, fp, i);
4284 fp->disable_tpa = 1;
4285 break;
4286 }
4287 pci_unmap_addr_set((struct sw_rx_bd *)
4288 &bp->fp->tpa_pool[i],
4289 mapping, 0);
4290 fp->tpa_state[i] = BNX2X_TPA_STOP;
4291 }
4292 }
4293 }
4294
3827 for_each_queue(bp, j) { 4295 for_each_queue(bp, j) {
3828 struct bnx2x_fastpath *fp = &bp->fp[j]; 4296 struct bnx2x_fastpath *fp = &bp->fp[j];
3829 4297
3830 fp->rx_bd_cons = 0; 4298 fp->rx_bd_cons = 0;
3831 fp->rx_cons_sb = BNX2X_RX_SB_INDEX; 4299 fp->rx_cons_sb = BNX2X_RX_SB_INDEX;
4300 fp->rx_bd_cons_sb = BNX2X_RX_SB_BD_INDEX;
4301
4302 /* "next page" elements initialization */
4303 /* SGE ring */
4304 for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
4305 struct eth_rx_sge *sge;
4306
4307 sge = &fp->rx_sge_ring[RX_SGE_CNT * i - 2];
4308 sge->addr_hi =
4309 cpu_to_le32(U64_HI(fp->rx_sge_mapping +
4310 BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
4311 sge->addr_lo =
4312 cpu_to_le32(U64_LO(fp->rx_sge_mapping +
4313 BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
4314 }
4315
4316 bnx2x_init_sge_ring_bit_mask(fp);
3832 4317
4318 /* RX BD ring */
3833 for (i = 1; i <= NUM_RX_RINGS; i++) { 4319 for (i = 1; i <= NUM_RX_RINGS; i++) {
3834 struct eth_rx_bd *rx_bd; 4320 struct eth_rx_bd *rx_bd;
3835 4321
@@ -3856,35 +4342,61 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
3856 BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS))); 4342 BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS)));
3857 } 4343 }
3858 4344
3859 /* rx completion queue */ 4345 /* Allocate SGEs and initialize the ring elements */
3860 fp->rx_comp_cons = ring_prod = 0; 4346 for (i = 0, ring_prod = 0;
4347 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
3861 4348
4349 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
4350 BNX2X_ERR("was only able to allocate "
4351 "%d rx sges\n", i);
4352 BNX2X_ERR("disabling TPA for queue[%d]\n", j);
4353 /* Cleanup already allocated elements */
4354 bnx2x_free_rx_sge_range(bp, fp, ring_prod);
4355 bnx2x_free_tpa_pool(bp, fp,
4356 ETH_MAX_AGGREGATION_QUEUES_E1H);
4357 fp->disable_tpa = 1;
4358 ring_prod = 0;
4359 break;
4360 }
4361 ring_prod = NEXT_SGE_IDX(ring_prod);
4362 }
4363 fp->rx_sge_prod = ring_prod;
4364
4365 /* Allocate BDs and initialize BD ring */
4366 fp->rx_comp_cons = fp->rx_alloc_failed = 0;
4367 cqe_ring_prod = ring_prod = 0;
3862 for (i = 0; i < bp->rx_ring_size; i++) { 4368 for (i = 0; i < bp->rx_ring_size; i++) {
3863 if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) { 4369 if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
3864 BNX2X_ERR("was only able to allocate " 4370 BNX2X_ERR("was only able to allocate "
3865 "%d rx skbs\n", i); 4371 "%d rx skbs\n", i);
4372 fp->rx_alloc_failed++;
3866 break; 4373 break;
3867 } 4374 }
3868 ring_prod = NEXT_RX_IDX(ring_prod); 4375 ring_prod = NEXT_RX_IDX(ring_prod);
4376 cqe_ring_prod = NEXT_RCQ_IDX(cqe_ring_prod);
3869 BUG_TRAP(ring_prod > i); 4377 BUG_TRAP(ring_prod > i);
3870 } 4378 }
3871 4379
3872 fp->rx_bd_prod = fp->rx_comp_prod = ring_prod; 4380 fp->rx_bd_prod = ring_prod;
4381 /* must not have more available CQEs than BDs */
4382 fp->rx_comp_prod = min((u16)(NUM_RCQ_RINGS*RCQ_DESC_CNT),
4383 cqe_ring_prod);
3873 fp->rx_pkt = fp->rx_calls = 0; 4384 fp->rx_pkt = fp->rx_calls = 0;
3874 4385
3875 /* Warning! this will generate an interrupt (to the TSTORM) */ 4386 /* Warning!
3876 /* must only be done when chip is initialized */ 4387 * this will generate an interrupt (to the TSTORM)
3877 REG_WR(bp, BAR_TSTRORM_INTMEM + 4388 * must only be done after chip is initialized
3878 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)), 4389 */
3879 ring_prod); 4390 bnx2x_update_rx_prod(bp, fp, ring_prod, fp->rx_comp_prod,
4391 fp->rx_sge_prod);
3880 if (j != 0) 4392 if (j != 0)
3881 continue; 4393 continue;
3882 4394
3883 REG_WR(bp, BAR_USTRORM_INTMEM + 4395 REG_WR(bp, BAR_USTRORM_INTMEM +
3884 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)), 4396 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
3885 U64_LO(fp->rx_comp_mapping)); 4397 U64_LO(fp->rx_comp_mapping));
3886 REG_WR(bp, BAR_USTRORM_INTMEM + 4398 REG_WR(bp, BAR_USTRORM_INTMEM +
3887 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)) + 4, 4399 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
3888 U64_HI(fp->rx_comp_mapping)); 4400 U64_HI(fp->rx_comp_mapping));
3889 } 4401 }
3890} 4402}
@@ -3972,6 +4484,18 @@ static void bnx2x_init_context(struct bnx2x *bp)
3972 U64_HI(fp->rx_desc_mapping); 4484 U64_HI(fp->rx_desc_mapping);
3973 context->ustorm_st_context.common.bd_page_base_lo = 4485 context->ustorm_st_context.common.bd_page_base_lo =
3974 U64_LO(fp->rx_desc_mapping); 4486 U64_LO(fp->rx_desc_mapping);
4487 if (!fp->disable_tpa) {
4488 context->ustorm_st_context.common.flags |=
4489 (USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA |
4490 USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_SGE_RING);
4491 context->ustorm_st_context.common.sge_buff_size =
4492 (u16)(BCM_PAGE_SIZE*PAGES_PER_SGE);
4493 context->ustorm_st_context.common.sge_page_base_hi =
4494 U64_HI(fp->rx_sge_mapping);
4495 context->ustorm_st_context.common.sge_page_base_lo =
4496 U64_LO(fp->rx_sge_mapping);
4497 }
4498
3975 context->cstorm_st_context.sb_index_number = 4499 context->cstorm_st_context.sb_index_number =
3976 HC_INDEX_C_ETH_TX_CQ_CONS; 4500 HC_INDEX_C_ETH_TX_CQ_CONS;
3977 context->cstorm_st_context.status_block_id = sb_id; 4501 context->cstorm_st_context.status_block_id = sb_id;
@@ -4022,6 +4546,18 @@ static void bnx2x_set_client_config(struct bnx2x *bp)
4022 } 4546 }
4023#endif 4547#endif
4024 4548
4549 if (bp->flags & TPA_ENABLE_FLAG) {
4550 tstorm_client.max_sges_for_packet =
4551 BCM_PAGE_ALIGN(tstorm_client.mtu) >> BCM_PAGE_SHIFT;
4552 tstorm_client.max_sges_for_packet =
4553 ((tstorm_client.max_sges_for_packet +
4554 PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >>
4555 PAGES_PER_SGE_SHIFT;
4556
4557 tstorm_client.config_flags |=
4558 TSTORM_ETH_CLIENT_CONFIG_ENABLE_SGE_RING;
4559 }
4560
4025 for_each_queue(bp, i) { 4561 for_each_queue(bp, i) {
4026 REG_WR(bp, BAR_TSTRORM_INTMEM + 4562 REG_WR(bp, BAR_TSTRORM_INTMEM +
4027 TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id), 4563 TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id),
@@ -4136,8 +4672,8 @@ static void bnx2x_init_internal(struct bnx2x *bp)
4136 REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNCTION_MODE_OFFSET, 4672 REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNCTION_MODE_OFFSET,
4137 IS_E1HMF(bp)); 4673 IS_E1HMF(bp));
4138 4674
4139 REG_WR16(bp, BAR_XSTRORM_INTMEM + 4675 REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_E1HOV_OFFSET(func),
4140 XSTORM_E1HOV_OFFSET(func), bp->e1hov); 4676 bp->e1hov);
4141 } 4677 }
4142 4678
4143 /* Zero this manualy as its initialization is 4679 /* Zero this manualy as its initialization is
@@ -4145,6 +4681,25 @@ static void bnx2x_init_internal(struct bnx2x *bp)
4145 for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++) 4681 for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++)
4146 REG_WR(bp, BAR_USTRORM_INTMEM + 4682 REG_WR(bp, BAR_USTRORM_INTMEM +
4147 USTORM_AGG_DATA_OFFSET + 4*i, 0); 4683 USTORM_AGG_DATA_OFFSET + 4*i, 0);
4684
4685 for_each_queue(bp, i) {
4686 struct bnx2x_fastpath *fp = &bp->fp[i];
4687 u16 max_agg_size;
4688
4689 REG_WR(bp, BAR_USTRORM_INTMEM +
4690 USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)),
4691 U64_LO(fp->rx_comp_mapping));
4692 REG_WR(bp, BAR_USTRORM_INTMEM +
4693 USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)) + 4,
4694 U64_HI(fp->rx_comp_mapping));
4695
4696 max_agg_size = min((u32)(bp->rx_buf_use_size +
4697 8*BCM_PAGE_SIZE*PAGES_PER_SGE),
4698 (u32)0xffff);
4699 REG_WR16(bp, BAR_USTRORM_INTMEM +
4700 USTORM_MAX_AGG_SIZE_OFFSET(port, FP_CL_ID(fp)),
4701 max_agg_size);
4702 }
4148} 4703}
4149 4704
4150static void bnx2x_nic_init(struct bnx2x *bp) 4705static void bnx2x_nic_init(struct bnx2x *bp)
@@ -4767,6 +5322,17 @@ static int bnx2x_init_common(struct bnx2x *bp)
4767 5322
4768 enable_blocks_attention(bp); 5323 enable_blocks_attention(bp);
4769 5324
5325 if (bp->flags & TPA_ENABLE_FLAG) {
5326 struct tstorm_eth_tpa_exist tmp = {0};
5327
5328 tmp.tpa_exist = 1;
5329
5330 REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET,
5331 ((u32 *)&tmp)[0]);
5332 REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET + 4,
5333 ((u32 *)&tmp)[1]);
5334 }
5335
4770 return 0; 5336 return 0;
4771} 5337}
4772 5338
@@ -5145,8 +5711,12 @@ static void bnx2x_free_mem(struct bnx2x *bp)
5145 bnx2x_fp(bp, i, rx_comp_mapping), 5711 bnx2x_fp(bp, i, rx_comp_mapping),
5146 sizeof(struct eth_fast_path_rx_cqe) * 5712 sizeof(struct eth_fast_path_rx_cqe) *
5147 NUM_RCQ_BD); 5713 NUM_RCQ_BD);
5148 }
5149 5714
5715 /* SGE ring */
5716 BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_sge_ring),
5717 bnx2x_fp(bp, i, rx_sge_mapping),
5718 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
5719 }
5150 /* end of fastpath */ 5720 /* end of fastpath */
5151 5721
5152 BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, 5722 BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping,
@@ -5161,7 +5731,7 @@ static void bnx2x_free_mem(struct bnx2x *bp)
5161 BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024); 5731 BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024);
5162 BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024); 5732 BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024);
5163#endif 5733#endif
5164 BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, PAGE_SIZE); 5734 BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, BCM_PAGE_SIZE);
5165 5735
5166#undef BNX2X_PCI_FREE 5736#undef BNX2X_PCI_FREE
5167#undef BNX2X_KFREE 5737#undef BNX2X_KFREE
@@ -5223,6 +5793,12 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
5223 sizeof(struct eth_fast_path_rx_cqe) * 5793 sizeof(struct eth_fast_path_rx_cqe) *
5224 NUM_RCQ_BD); 5794 NUM_RCQ_BD);
5225 5795
5796 /* SGE ring */
5797 BNX2X_ALLOC(bnx2x_fp(bp, i, rx_page_ring),
5798 sizeof(struct sw_rx_page) * NUM_RX_SGE);
5799 BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_sge_ring),
5800 &bnx2x_fp(bp, i, rx_sge_mapping),
5801 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
5226 } 5802 }
5227 /* end of fastpath */ 5803 /* end of fastpath */
5228 5804
@@ -5313,6 +5889,9 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
5313 rx_buf->skb = NULL; 5889 rx_buf->skb = NULL;
5314 dev_kfree_skb(skb); 5890 dev_kfree_skb(skb);
5315 } 5891 }
5892 if (!fp->disable_tpa)
5893 bnx2x_free_tpa_pool(bp, fp,
5894 ETH_MAX_AGGREGATION_QUEUES_E1H);
5316 } 5895 }
5317} 5896}
5318 5897
@@ -5664,6 +6243,10 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
5664 if (bnx2x_alloc_mem(bp)) 6243 if (bnx2x_alloc_mem(bp))
5665 return -ENOMEM; 6244 return -ENOMEM;
5666 6245
6246 for_each_queue(bp, i)
6247 bnx2x_fp(bp, i, disable_tpa) =
6248 ((bp->flags & TPA_ENABLE_FLAG) == 0);
6249
5667 /* Disable interrupt handling until HW is initialized */ 6250 /* Disable interrupt handling until HW is initialized */
5668 atomic_set(&bp->intr_sem, 1); 6251 atomic_set(&bp->intr_sem, 1);
5669 6252
@@ -5792,6 +6375,11 @@ load_int_disable:
5792 /* Release IRQs */ 6375 /* Release IRQs */
5793 bnx2x_free_irq(bp); 6376 bnx2x_free_irq(bp);
5794 6377
6378 /* Free SKBs, SGEs, TPA pool and driver internals */
6379 bnx2x_free_skbs(bp);
6380 for_each_queue(bp, i)
6381 bnx2x_free_rx_sge_range(bp, bp->fp + i,
6382 RX_SGE_CNT*NUM_RX_SGE_PAGES);
5795load_error: 6383load_error:
5796 bnx2x_free_mem(bp); 6384 bnx2x_free_mem(bp);
5797 6385
@@ -6090,8 +6678,11 @@ unload_error:
6090 if (!BP_NOMCP(bp)) 6678 if (!BP_NOMCP(bp))
6091 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE); 6679 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE);
6092 6680
6093 /* Free SKBs and driver internals */ 6681 /* Free SKBs, SGEs, TPA pool and driver internals */
6094 bnx2x_free_skbs(bp); 6682 bnx2x_free_skbs(bp);
6683 for_each_queue(bp, i)
6684 bnx2x_free_rx_sge_range(bp, bp->fp + i,
6685 RX_SGE_CNT*NUM_RX_SGE_PAGES);
6095 bnx2x_free_mem(bp); 6686 bnx2x_free_mem(bp);
6096 6687
6097 bp->state = BNX2X_STATE_CLOSED; 6688 bp->state = BNX2X_STATE_CLOSED;
@@ -6767,6 +7358,16 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
6767 printk(KERN_ERR PFX 7358 printk(KERN_ERR PFX
6768 "MCP disabled, must load devices in order!\n"); 7359 "MCP disabled, must load devices in order!\n");
6769 7360
7361 /* Set TPA flags */
7362 if (disable_tpa) {
7363 bp->flags &= ~TPA_ENABLE_FLAG;
7364 bp->dev->features &= ~NETIF_F_LRO;
7365 } else {
7366 bp->flags |= TPA_ENABLE_FLAG;
7367 bp->dev->features |= NETIF_F_LRO;
7368 }
7369
7370
6770 bp->tx_ring_size = MAX_TX_AVAIL; 7371 bp->tx_ring_size = MAX_TX_AVAIL;
6771 bp->rx_ring_size = MAX_RX_AVAIL; 7372 bp->rx_ring_size = MAX_RX_AVAIL;
6772 7373
@@ -7556,6 +8157,33 @@ static int bnx2x_set_coalesce(struct net_device *dev,
7556 return 0; 8157 return 0;
7557} 8158}
7558 8159
8160static int bnx2x_set_flags(struct net_device *dev, u32 data)
8161{
8162 struct bnx2x *bp = netdev_priv(dev);
8163 int changed = 0;
8164 int rc = 0;
8165
8166 if (data & ETH_FLAG_LRO) {
8167 if (!(dev->features & NETIF_F_LRO)) {
8168 dev->features |= NETIF_F_LRO;
8169 bp->flags |= TPA_ENABLE_FLAG;
8170 changed = 1;
8171 }
8172
8173 } else if (dev->features & NETIF_F_LRO) {
8174 dev->features &= ~NETIF_F_LRO;
8175 bp->flags &= ~TPA_ENABLE_FLAG;
8176 changed = 1;
8177 }
8178
8179 if (changed && netif_running(dev)) {
8180 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
8181 rc = bnx2x_nic_load(bp, LOAD_NORMAL);
8182 }
8183
8184 return rc;
8185}
8186
7559static void bnx2x_get_ringparam(struct net_device *dev, 8187static void bnx2x_get_ringparam(struct net_device *dev,
7560 struct ethtool_ringparam *ering) 8188 struct ethtool_ringparam *ering)
7561{ 8189{
@@ -7896,35 +8524,37 @@ static int bnx2x_phys_id(struct net_device *dev, u32 data)
7896} 8524}
7897 8525
7898static struct ethtool_ops bnx2x_ethtool_ops = { 8526static struct ethtool_ops bnx2x_ethtool_ops = {
7899 .get_settings = bnx2x_get_settings, 8527 .get_settings = bnx2x_get_settings,
7900 .set_settings = bnx2x_set_settings, 8528 .set_settings = bnx2x_set_settings,
7901 .get_drvinfo = bnx2x_get_drvinfo, 8529 .get_drvinfo = bnx2x_get_drvinfo,
7902 .get_wol = bnx2x_get_wol, 8530 .get_wol = bnx2x_get_wol,
7903 .set_wol = bnx2x_set_wol, 8531 .set_wol = bnx2x_set_wol,
7904 .get_msglevel = bnx2x_get_msglevel, 8532 .get_msglevel = bnx2x_get_msglevel,
7905 .set_msglevel = bnx2x_set_msglevel, 8533 .set_msglevel = bnx2x_set_msglevel,
7906 .nway_reset = bnx2x_nway_reset, 8534 .nway_reset = bnx2x_nway_reset,
7907 .get_link = ethtool_op_get_link, 8535 .get_link = ethtool_op_get_link,
7908 .get_eeprom_len = bnx2x_get_eeprom_len, 8536 .get_eeprom_len = bnx2x_get_eeprom_len,
7909 .get_eeprom = bnx2x_get_eeprom, 8537 .get_eeprom = bnx2x_get_eeprom,
7910 .set_eeprom = bnx2x_set_eeprom, 8538 .set_eeprom = bnx2x_set_eeprom,
7911 .get_coalesce = bnx2x_get_coalesce, 8539 .get_coalesce = bnx2x_get_coalesce,
7912 .set_coalesce = bnx2x_set_coalesce, 8540 .set_coalesce = bnx2x_set_coalesce,
7913 .get_ringparam = bnx2x_get_ringparam, 8541 .get_ringparam = bnx2x_get_ringparam,
7914 .set_ringparam = bnx2x_set_ringparam, 8542 .set_ringparam = bnx2x_set_ringparam,
7915 .get_pauseparam = bnx2x_get_pauseparam, 8543 .get_pauseparam = bnx2x_get_pauseparam,
7916 .set_pauseparam = bnx2x_set_pauseparam, 8544 .set_pauseparam = bnx2x_set_pauseparam,
7917 .get_rx_csum = bnx2x_get_rx_csum, 8545 .get_rx_csum = bnx2x_get_rx_csum,
7918 .set_rx_csum = bnx2x_set_rx_csum, 8546 .set_rx_csum = bnx2x_set_rx_csum,
7919 .get_tx_csum = ethtool_op_get_tx_csum, 8547 .get_tx_csum = ethtool_op_get_tx_csum,
7920 .set_tx_csum = ethtool_op_set_tx_csum, 8548 .set_tx_csum = ethtool_op_set_tx_csum,
7921 .get_sg = ethtool_op_get_sg, 8549 .set_flags = bnx2x_set_flags,
7922 .set_sg = ethtool_op_set_sg, 8550 .get_flags = ethtool_op_get_flags,
8551 .get_sg = ethtool_op_get_sg,
8552 .set_sg = ethtool_op_set_sg,
7923 .get_tso = ethtool_op_get_tso, 8553 .get_tso = ethtool_op_get_tso,
7924 .set_tso = bnx2x_set_tso, 8554 .set_tso = bnx2x_set_tso,
7925 .self_test_count = bnx2x_self_test_count, 8555 .self_test_count = bnx2x_self_test_count,
7926 .self_test = bnx2x_self_test, 8556 .self_test = bnx2x_self_test,
7927 .get_strings = bnx2x_get_strings, 8557 .get_strings = bnx2x_get_strings,
7928 .phys_id = bnx2x_phys_id, 8558 .phys_id = bnx2x_phys_id,
7929 .get_stats_count = bnx2x_get_stats_count, 8559 .get_stats_count = bnx2x_get_stats_count,
7930 .get_ethtool_stats = bnx2x_get_ethtool_stats, 8560 .get_ethtool_stats = bnx2x_get_ethtool_stats,