aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/bnx2x.h276
-rw-r--r--drivers/net/bnx2x_main.c744
2 files changed, 884 insertions, 136 deletions
diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h
index f7d73d6c3981..4bf4f7b205f2 100644
--- a/drivers/net/bnx2x.h
+++ b/drivers/net/bnx2x.h
@@ -132,8 +132,8 @@
132#define is_multi(bp) (bp->num_queues > 1) 132#define is_multi(bp) (bp->num_queues > 1)
133 133
134 134
135/* fast path */
135 136
136#define bnx2x_sp_check(bp, var) ((bp->slowpath) ? (&bp->slowpath->var) : NULL)
137struct sw_rx_bd { 137struct sw_rx_bd {
138 struct sk_buff *skb; 138 struct sk_buff *skb;
139 DECLARE_PCI_UNMAP_ADDR(mapping) 139 DECLARE_PCI_UNMAP_ADDR(mapping)
@@ -144,6 +144,52 @@ struct sw_tx_bd {
144 u16 first_bd; 144 u16 first_bd;
145}; 145};
146 146
147struct sw_rx_page {
148 struct page *page;
149 DECLARE_PCI_UNMAP_ADDR(mapping)
150};
151
152
153/* MC hsi */
154#define BCM_PAGE_SHIFT 12
155#define BCM_PAGE_SIZE (1 << BCM_PAGE_SHIFT)
156#define BCM_PAGE_MASK (~(BCM_PAGE_SIZE - 1))
157#define BCM_PAGE_ALIGN(addr) (((addr) + BCM_PAGE_SIZE - 1) & BCM_PAGE_MASK)
158
159#define PAGES_PER_SGE_SHIFT 0
160#define PAGES_PER_SGE (1 << PAGES_PER_SGE_SHIFT)
161
162/* SGE ring related macros */
163#define NUM_RX_SGE_PAGES 2
164#define RX_SGE_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge))
165#define MAX_RX_SGE_CNT (RX_SGE_CNT - 2)
166/* RX_SGE_CNT is promissed to be a power of 2 */
167#define RX_SGE_MASK (RX_SGE_CNT - 1)
168#define NUM_RX_SGE (RX_SGE_CNT * NUM_RX_SGE_PAGES)
169#define MAX_RX_SGE (NUM_RX_SGE - 1)
170#define NEXT_SGE_IDX(x) ((((x) & RX_SGE_MASK) == \
171 (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1)
172#define RX_SGE(x) ((x) & MAX_RX_SGE)
173
174/* SGE producer mask related macros */
175/* Number of bits in one sge_mask array element */
176#define RX_SGE_MASK_ELEM_SZ 64
177#define RX_SGE_MASK_ELEM_SHIFT 6
178#define RX_SGE_MASK_ELEM_MASK ((u64)RX_SGE_MASK_ELEM_SZ - 1)
179
180/* Creates a bitmask of all ones in less significant bits.
181 idx - index of the most significant bit in the created mask */
182#define RX_SGE_ONES_MASK(idx) \
183 (((u64)0x1 << (((idx) & RX_SGE_MASK_ELEM_MASK) + 1)) - 1)
184#define RX_SGE_MASK_ELEM_ONE_MASK ((u64)(~0))
185
186/* Number of u64 elements in SGE mask array */
187#define RX_SGE_MASK_LEN ((NUM_RX_SGE_PAGES * RX_SGE_CNT) / \
188 RX_SGE_MASK_ELEM_SZ)
189#define RX_SGE_MASK_LEN_MASK (RX_SGE_MASK_LEN - 1)
190#define NEXT_SGE_MASK_ELEM(el) (((el) + 1) & RX_SGE_MASK_LEN_MASK)
191
192
147struct bnx2x_fastpath { 193struct bnx2x_fastpath {
148 194
149 struct napi_struct napi; 195 struct napi_struct napi;
@@ -159,7 +205,8 @@ struct bnx2x_fastpath {
159 struct eth_tx_bd *tx_desc_ring; 205 struct eth_tx_bd *tx_desc_ring;
160 dma_addr_t tx_desc_mapping; 206 dma_addr_t tx_desc_mapping;
161 207
162 struct sw_rx_bd *rx_buf_ring; 208 struct sw_rx_bd *rx_buf_ring; /* BDs mappings ring */
209 struct sw_rx_page *rx_page_ring; /* SGE pages mappings ring */
163 210
164 struct eth_rx_bd *rx_desc_ring; 211 struct eth_rx_bd *rx_desc_ring;
165 dma_addr_t rx_desc_mapping; 212 dma_addr_t rx_desc_mapping;
@@ -167,6 +214,12 @@ struct bnx2x_fastpath {
167 union eth_rx_cqe *rx_comp_ring; 214 union eth_rx_cqe *rx_comp_ring;
168 dma_addr_t rx_comp_mapping; 215 dma_addr_t rx_comp_mapping;
169 216
217 /* SGE ring */
218 struct eth_rx_sge *rx_sge_ring;
219 dma_addr_t rx_sge_mapping;
220
221 u64 sge_mask[RX_SGE_MASK_LEN];
222
170 int state; 223 int state;
171#define BNX2X_FP_STATE_CLOSED 0 224#define BNX2X_FP_STATE_CLOSED 0
172#define BNX2X_FP_STATE_IRQ 0x80000 225#define BNX2X_FP_STATE_IRQ 0x80000
@@ -197,27 +250,152 @@ struct bnx2x_fastpath {
197 u16 rx_bd_cons; 250 u16 rx_bd_cons;
198 u16 rx_comp_prod; 251 u16 rx_comp_prod;
199 u16 rx_comp_cons; 252 u16 rx_comp_cons;
253 u16 rx_sge_prod;
254 /* The last maximal completed SGE */
255 u16 last_max_sge;
200 u16 *rx_cons_sb; 256 u16 *rx_cons_sb;
257 u16 *rx_bd_cons_sb;
201 258
202 unsigned long tx_pkt, 259 unsigned long tx_pkt,
203 rx_pkt, 260 rx_pkt,
204 rx_calls; 261 rx_calls,
262 rx_alloc_failed;
263 /* TPA related */
264 struct sw_rx_bd tpa_pool[ETH_MAX_AGGREGATION_QUEUES_E1H];
265 u8 tpa_state[ETH_MAX_AGGREGATION_QUEUES_E1H];
266#define BNX2X_TPA_START 1
267#define BNX2X_TPA_STOP 2
268 u8 disable_tpa;
269#ifdef BNX2X_STOP_ON_ERROR
270 u64 tpa_queue_used;
271#endif
205 272
206 struct bnx2x *bp; /* parent */ 273 struct bnx2x *bp; /* parent */
207}; 274};
208 275
209#define bnx2x_fp(bp, nr, var) (bp->fp[nr].var) 276#define bnx2x_fp(bp, nr, var) (bp->fp[nr].var)
277
278
279/* MC hsi */
280#define MAX_FETCH_BD 13 /* HW max BDs per packet */
281#define RX_COPY_THRESH 92
282
283#define NUM_TX_RINGS 16
284#define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
285#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1)
286#define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS)
287#define MAX_TX_BD (NUM_TX_BD - 1)
288#define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
289#define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \
290 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
291#define TX_BD(x) ((x) & MAX_TX_BD)
292#define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT)
293
294/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
295#define NUM_RX_RINGS 8
296#define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
297#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2)
298#define RX_DESC_MASK (RX_DESC_CNT - 1)
299#define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS)
300#define MAX_RX_BD (NUM_RX_BD - 1)
301#define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
302#define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \
303 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
304#define RX_BD(x) ((x) & MAX_RX_BD)
305
306/* As long as CQE is 4 times bigger than BD entry we have to allocate
307 4 times more pages for CQ ring in order to keep it balanced with
308 BD ring */
309#define NUM_RCQ_RINGS (NUM_RX_RINGS * 4)
310#define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
311#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1)
312#define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS)
313#define MAX_RCQ_BD (NUM_RCQ_BD - 1)
314#define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
315#define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \
316 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
317#define RCQ_BD(x) ((x) & MAX_RCQ_BD)
318
319
210/* This is needed for determening of last_max */ 320/* This is needed for determening of last_max */
211#define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b)) 321#define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b))
212 322
323#define __SGE_MASK_SET_BIT(el, bit) \
324 do { \
325 el = ((el) | ((u64)0x1 << (bit))); \
326 } while (0)
327
328#define __SGE_MASK_CLEAR_BIT(el, bit) \
329 do { \
330 el = ((el) & (~((u64)0x1 << (bit)))); \
331 } while (0)
332
333#define SGE_MASK_SET_BIT(fp, idx) \
334 __SGE_MASK_SET_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
335 ((idx) & RX_SGE_MASK_ELEM_MASK))
336
337#define SGE_MASK_CLEAR_BIT(fp, idx) \
338 __SGE_MASK_CLEAR_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
339 ((idx) & RX_SGE_MASK_ELEM_MASK))
340
341
342/* used on a CID received from the HW */
343#define SW_CID(x) (le32_to_cpu(x) & \
344 (COMMON_RAMROD_ETH_RX_CQE_CID >> 7))
345#define CQE_CMD(x) (le32_to_cpu(x) >> \
346 COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
347
213#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr_hi), \ 348#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr_hi), \
214 le32_to_cpu((bd)->addr_lo)) 349 le32_to_cpu((bd)->addr_lo))
215#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes)) 350#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes))
216 351
352
353#define DPM_TRIGER_TYPE 0x40
354#define DOORBELL(bp, cid, val) \
355 do { \
356 writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
357 DPM_TRIGER_TYPE); \
358 } while (0)
359
360
361/* TX CSUM helpers */
362#define SKB_CS_OFF(skb) (offsetof(struct tcphdr, check) - \
363 skb->csum_offset)
364#define SKB_CS(skb) (*(u16 *)(skb_transport_header(skb) + \
365 skb->csum_offset))
366
367#define pbd_tcp_flags(skb) (ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff)
368
369#define XMIT_PLAIN 0
370#define XMIT_CSUM_V4 0x1
371#define XMIT_CSUM_V6 0x2
372#define XMIT_CSUM_TCP 0x4
373#define XMIT_GSO_V4 0x8
374#define XMIT_GSO_V6 0x10
375
376#define XMIT_CSUM (XMIT_CSUM_V4 | XMIT_CSUM_V6)
377#define XMIT_GSO (XMIT_GSO_V4 | XMIT_GSO_V6)
378
379
217/* stuff added to make the code fit 80Col */ 380/* stuff added to make the code fit 80Col */
218 381
219#define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE) 382#define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE)
220 383
384#define TPA_TYPE_START ETH_FAST_PATH_RX_CQE_START_FLG
385#define TPA_TYPE_END ETH_FAST_PATH_RX_CQE_END_FLG
386#define TPA_TYPE(cqe_fp_flags) ((cqe_fp_flags) & \
387 (TPA_TYPE_START | TPA_TYPE_END))
388
389#define BNX2X_RX_SUM_OK(cqe) \
390 (!(cqe->fast_path_cqe.status_flags & \
391 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
392 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
393
394#define BNX2X_RX_SUM_FIX(cqe) \
395 ((le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & \
396 PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == \
397 (1 << PARSING_FLAGS_OVER_ETHERNET_PROTOCOL_SHIFT))
398
221#define ETH_RX_ERROR_FALGS (ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG | \ 399#define ETH_RX_ERROR_FALGS (ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG | \
222 ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | \ 400 ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | \
223 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG) 401 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)
@@ -247,6 +425,9 @@ struct bnx2x_fastpath {
247#define BNX2X_TX_SB_INDEX \ 425#define BNX2X_TX_SB_INDEX \
248 (&fp->status_blk->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX]) 426 (&fp->status_blk->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX])
249 427
428
429/* end of fast path */
430
250/* common */ 431/* common */
251 432
252struct bnx2x_common { 433struct bnx2x_common {
@@ -546,7 +727,7 @@ struct bnx2x {
546 struct pci_dev *pdev; 727 struct pci_dev *pdev;
547 728
548 atomic_t intr_sem; 729 atomic_t intr_sem;
549 struct msix_entry msix_table[MAX_CONTEXT+1]; 730 struct msix_entry msix_table[MAX_CONTEXT+1];
550 731
551 int tx_ring_size; 732 int tx_ring_size;
552 733
@@ -604,6 +785,7 @@ struct bnx2x {
604#define USING_DAC_FLAG 0x10 785#define USING_DAC_FLAG 0x10
605#define USING_MSIX_FLAG 0x20 786#define USING_MSIX_FLAG 0x20
606#define ASF_ENABLE_FLAG 0x40 787#define ASF_ENABLE_FLAG 0x40
788#define TPA_ENABLE_FLAG 0x80
607#define NO_MCP_FLAG 0x100 789#define NO_MCP_FLAG 0x100
608#define BP_NOMCP(bp) (bp->flags & NO_MCP_FLAG) 790#define BP_NOMCP(bp) (bp->flags & NO_MCP_FLAG)
609 791
@@ -725,76 +907,6 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
725 u32 len32); 907 u32 len32);
726int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode); 908int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode);
727 909
728
729/* MC hsi */
730#define RX_COPY_THRESH 92
731#define BCM_PAGE_SHIFT 12
732#define BCM_PAGE_SIZE (1 << BCM_PAGE_SHIFT)
733#define BCM_PAGE_MASK (~(BCM_PAGE_SIZE - 1))
734#define BCM_PAGE_ALIGN(addr) (((addr) + BCM_PAGE_SIZE - 1) & BCM_PAGE_MASK)
735
736#define NUM_TX_RINGS 16
737#define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
738#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1)
739#define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS)
740#define MAX_TX_BD (NUM_TX_BD - 1)
741#define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
742#define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \
743 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
744#define TX_BD(x) ((x) & MAX_TX_BD)
745#define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT)
746
747/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
748#define NUM_RX_RINGS 8
749#define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
750#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2)
751#define RX_DESC_MASK (RX_DESC_CNT - 1)
752#define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS)
753#define MAX_RX_BD (NUM_RX_BD - 1)
754#define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
755#define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \
756 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
757#define RX_BD(x) ((x) & MAX_RX_BD)
758
759#define NUM_RCQ_RINGS (NUM_RX_RINGS * 2)
760#define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
761#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1)
762#define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS)
763#define MAX_RCQ_BD (NUM_RCQ_BD - 1)
764#define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
765#define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \
766 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
767#define RCQ_BD(x) ((x) & MAX_RCQ_BD)
768
769
770/* used on a CID received from the HW */
771#define SW_CID(x) (le32_to_cpu(x) & \
772 (COMMON_RAMROD_ETH_RX_CQE_CID >> 1))
773#define CQE_CMD(x) (le32_to_cpu(x) >> \
774 COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
775
776#define STROM_ASSERT_ARRAY_SIZE 50
777
778
779
780/* must be used on a CID before placing it on a HW ring */
781#define HW_CID(bp, x) ((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
782
783#define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe))
784#define MAX_SP_DESC_CNT (SP_DESC_CNT - 1)
785
786
787#define BNX2X_BTR 3
788#define MAX_SPQ_PENDING 8
789
790
791#define DPM_TRIGER_TYPE 0x40
792#define DOORBELL(bp, cid, val) \
793 do { \
794 writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
795 DPM_TRIGER_TYPE); \
796 } while (0)
797
798static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, 910static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
799 int wait) 911 int wait)
800{ 912{
@@ -874,14 +986,20 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
874#define BNX2X_LOOPBACK_FAILED (BNX2X_MAC_LOOPBACK_FAILED | \ 986#define BNX2X_LOOPBACK_FAILED (BNX2X_MAC_LOOPBACK_FAILED | \
875 BNX2X_PHY_LOOPBACK_FAILED) 987 BNX2X_PHY_LOOPBACK_FAILED)
876 988
877#define pbd_tcp_flags(skb) (ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff) 989
990#define STROM_ASSERT_ARRAY_SIZE 50
991
878 992
879/* must be used on a CID before placing it on a HW ring */ 993/* must be used on a CID before placing it on a HW ring */
994#define HW_CID(bp, x) ((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
995
996#define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe))
997#define MAX_SP_DESC_CNT (SP_DESC_CNT - 1)
998
999
1000#define BNX2X_BTR 3
1001#define MAX_SPQ_PENDING 8
880 1002
881#define BNX2X_RX_SUM_OK(cqe) \
882 (!(cqe->fast_path_cqe.status_flags & \
883 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
884 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
885 1003
886/* CMNG constants 1004/* CMNG constants
887 derived from lab experiments, and not from system spec calculations !!! */ 1005 derived from lab experiments, and not from system spec calculations !!! */
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index ccfe33c110b4..fabde5555e32 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -79,6 +79,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
79static int use_inta; 79static int use_inta;
80static int poll; 80static int poll;
81static int debug; 81static int debug;
82static int disable_tpa;
82static int nomcp; 83static int nomcp;
83static int load_count[3]; /* 0-common, 1-port0, 2-port1 */ 84static int load_count[3]; /* 0-common, 1-port0, 2-port1 */
84static int use_multi; 85static int use_multi;
@@ -86,6 +87,7 @@ static int use_multi;
86module_param(use_inta, int, 0); 87module_param(use_inta, int, 0);
87module_param(poll, int, 0); 88module_param(poll, int, 0);
88module_param(debug, int, 0); 89module_param(debug, int, 0);
90module_param(disable_tpa, int, 0);
89module_param(nomcp, int, 0); 91module_param(nomcp, int, 0);
90MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X"); 92MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X");
91MODULE_PARM_DESC(poll, "use polling (for debug)"); 93MODULE_PARM_DESC(poll, "use polling (for debug)");
@@ -512,13 +514,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
512 i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod, 514 i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
513 fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb)); 515 fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb));
514 BNX2X_ERR(" rx_comp_prod(%x) rx_comp_cons(%x)" 516 BNX2X_ERR(" rx_comp_prod(%x) rx_comp_cons(%x)"
515 " *rx_cons_sb(%x)\n", 517 " *rx_cons_sb(%x) *rx_bd_cons_sb(%x)"
518 " rx_sge_prod(%x) last_max_sge(%x)\n",
516 fp->rx_comp_prod, fp->rx_comp_cons, 519 fp->rx_comp_prod, fp->rx_comp_cons,
517 le16_to_cpu(*fp->rx_cons_sb)); 520 le16_to_cpu(*fp->rx_cons_sb),
521 le16_to_cpu(*fp->rx_bd_cons_sb),
522 fp->rx_sge_prod, fp->last_max_sge);
518 BNX2X_ERR(" fp_c_idx(%x) fp_u_idx(%x)" 523 BNX2X_ERR(" fp_c_idx(%x) fp_u_idx(%x)"
519 " bd data(%x,%x)\n", 524 " bd data(%x,%x) rx_alloc_failed(%lx)\n",
520 fp->fp_c_idx, fp->fp_u_idx, hw_prods->packets_prod, 525 fp->fp_c_idx, fp->fp_u_idx, hw_prods->packets_prod,
521 hw_prods->bds_prod); 526 hw_prods->bds_prod, fp->rx_alloc_failed);
522 527
523 start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10); 528 start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
524 end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245); 529 end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
@@ -548,6 +553,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
548 j, rx_bd[1], rx_bd[0], sw_bd->skb); 553 j, rx_bd[1], rx_bd[0], sw_bd->skb);
549 } 554 }
550 555
556 start = 0;
557 end = RX_SGE_CNT*NUM_RX_SGE_PAGES;
558 for (j = start; j < end; j++) {
559 u32 *rx_sge = (u32 *)&fp->rx_sge_ring[j];
560 struct sw_rx_page *sw_page = &fp->rx_page_ring[j];
561
562 BNX2X_ERR("rx_sge[%x]=[%x:%x] sw_page=[%p]\n",
563 j, rx_sge[1], rx_sge[0], sw_page->page);
564 }
565
551 start = RCQ_BD(fp->rx_comp_cons - 10); 566 start = RCQ_BD(fp->rx_comp_cons - 10);
552 end = RCQ_BD(fp->rx_comp_cons + 503); 567 end = RCQ_BD(fp->rx_comp_cons + 503);
553 for (j = start; j < end; j++) { 568 for (j = start; j < end; j++) {
@@ -963,6 +978,62 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
963 mb(); /* force bnx2x_wait_ramrod() to see the change */ 978 mb(); /* force bnx2x_wait_ramrod() to see the change */
964} 979}
965 980
981static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
982 struct bnx2x_fastpath *fp, u16 index)
983{
984 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
985 struct page *page = sw_buf->page;
986 struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
987
988 /* Skip "next page" elements */
989 if (!page)
990 return;
991
992 pci_unmap_page(bp->pdev, pci_unmap_addr(sw_buf, mapping),
993 BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
994 __free_pages(page, PAGES_PER_SGE_SHIFT);
995
996 sw_buf->page = NULL;
997 sge->addr_hi = 0;
998 sge->addr_lo = 0;
999}
1000
1001static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp,
1002 struct bnx2x_fastpath *fp, int last)
1003{
1004 int i;
1005
1006 for (i = 0; i < last; i++)
1007 bnx2x_free_rx_sge(bp, fp, i);
1008}
1009
1010static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
1011 struct bnx2x_fastpath *fp, u16 index)
1012{
1013 struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT);
1014 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
1015 struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
1016 dma_addr_t mapping;
1017
1018 if (unlikely(page == NULL))
1019 return -ENOMEM;
1020
1021 mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE,
1022 PCI_DMA_FROMDEVICE);
1023 if (unlikely(dma_mapping_error(mapping))) {
1024 __free_pages(page, PAGES_PER_SGE_SHIFT);
1025 return -ENOMEM;
1026 }
1027
1028 sw_buf->page = page;
1029 pci_unmap_addr_set(sw_buf, mapping, mapping);
1030
1031 sge->addr_hi = cpu_to_le32(U64_HI(mapping));
1032 sge->addr_lo = cpu_to_le32(U64_LO(mapping));
1033
1034 return 0;
1035}
1036
966static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp, 1037static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
967 struct bnx2x_fastpath *fp, u16 index) 1038 struct bnx2x_fastpath *fp, u16 index)
968{ 1039{
@@ -1016,12 +1087,310 @@ static void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
1016 *prod_bd = *cons_bd; 1087 *prod_bd = *cons_bd;
1017} 1088}
1018 1089
1090static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
1091 u16 idx)
1092{
1093 u16 last_max = fp->last_max_sge;
1094
1095 if (SUB_S16(idx, last_max) > 0)
1096 fp->last_max_sge = idx;
1097}
1098
1099static void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
1100{
1101 int i, j;
1102
1103 for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
1104 int idx = RX_SGE_CNT * i - 1;
1105
1106 for (j = 0; j < 2; j++) {
1107 SGE_MASK_CLEAR_BIT(fp, idx);
1108 idx--;
1109 }
1110 }
1111}
1112
1113static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
1114 struct eth_fast_path_rx_cqe *fp_cqe)
1115{
1116 struct bnx2x *bp = fp->bp;
1117 u16 sge_len = BCM_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
1118 le16_to_cpu(fp_cqe->len_on_bd)) >>
1119 BCM_PAGE_SHIFT;
1120 u16 last_max, last_elem, first_elem;
1121 u16 delta = 0;
1122 u16 i;
1123
1124 if (!sge_len)
1125 return;
1126
1127 /* First mark all used pages */
1128 for (i = 0; i < sge_len; i++)
1129 SGE_MASK_CLEAR_BIT(fp, RX_SGE(le16_to_cpu(fp_cqe->sgl[i])));
1130
1131 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
1132 sge_len - 1, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
1133
1134 /* Here we assume that the last SGE index is the biggest */
1135 prefetch((void *)(fp->sge_mask));
1136 bnx2x_update_last_max_sge(fp, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
1137
1138 last_max = RX_SGE(fp->last_max_sge);
1139 last_elem = last_max >> RX_SGE_MASK_ELEM_SHIFT;
1140 first_elem = RX_SGE(fp->rx_sge_prod) >> RX_SGE_MASK_ELEM_SHIFT;
1141
1142 /* If ring is not full */
1143 if (last_elem + 1 != first_elem)
1144 last_elem++;
1145
1146 /* Now update the prod */
1147 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
1148 if (likely(fp->sge_mask[i]))
1149 break;
1150
1151 fp->sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK;
1152 delta += RX_SGE_MASK_ELEM_SZ;
1153 }
1154
1155 if (delta > 0) {
1156 fp->rx_sge_prod += delta;
1157 /* clear page-end entries */
1158 bnx2x_clear_sge_mask_next_elems(fp);
1159 }
1160
1161 DP(NETIF_MSG_RX_STATUS,
1162 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
1163 fp->last_max_sge, fp->rx_sge_prod);
1164}
1165
1166static inline void bnx2x_init_sge_ring_bit_mask(struct bnx2x_fastpath *fp)
1167{
1168 /* Set the mask to all 1-s: it's faster to compare to 0 than to 0xf-s */
1169 memset(fp->sge_mask, 0xff,
1170 (NUM_RX_SGE >> RX_SGE_MASK_ELEM_SHIFT)*sizeof(u64));
1171
1172 /* Clear the two last indeces in the page to 1:
1173 these are the indeces that correspond to the "next" element,
1174 hence will never be indicated and should be removed from
1175 the calculations. */
1176 bnx2x_clear_sge_mask_next_elems(fp);
1177}
1178
1179static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
1180 struct sk_buff *skb, u16 cons, u16 prod)
1181{
1182 struct bnx2x *bp = fp->bp;
1183 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
1184 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
1185 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
1186 dma_addr_t mapping;
1187
1188 /* move empty skb from pool to prod and map it */
1189 prod_rx_buf->skb = fp->tpa_pool[queue].skb;
1190 mapping = pci_map_single(bp->pdev, fp->tpa_pool[queue].skb->data,
1191 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
1192 pci_unmap_addr_set(prod_rx_buf, mapping, mapping);
1193
1194 /* move partial skb from cons to pool (don't unmap yet) */
1195 fp->tpa_pool[queue] = *cons_rx_buf;
1196
1197 /* mark bin state as start - print error if current state != stop */
1198 if (fp->tpa_state[queue] != BNX2X_TPA_STOP)
1199 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
1200
1201 fp->tpa_state[queue] = BNX2X_TPA_START;
1202
1203 /* point prod_bd to new skb */
1204 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
1205 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
1206
1207#ifdef BNX2X_STOP_ON_ERROR
1208 fp->tpa_queue_used |= (1 << queue);
1209#ifdef __powerpc64__
1210 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
1211#else
1212 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
1213#endif
1214 fp->tpa_queue_used);
1215#endif
1216}
1217
1218static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
1219 struct sk_buff *skb,
1220 struct eth_fast_path_rx_cqe *fp_cqe,
1221 u16 cqe_idx)
1222{
1223 struct sw_rx_page *rx_pg, old_rx_pg;
1224 struct page *sge;
1225 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
1226 u32 i, frag_len, frag_size, pages;
1227 int err;
1228 int j;
1229
1230 frag_size = le16_to_cpu(fp_cqe->pkt_len) - len_on_bd;
1231 pages = BCM_PAGE_ALIGN(frag_size) >> BCM_PAGE_SHIFT;
1232
1233 /* This is needed in order to enable forwarding support */
1234 if (frag_size)
1235 skb_shinfo(skb)->gso_size = min((u32)BCM_PAGE_SIZE,
1236 max(frag_size, (u32)len_on_bd));
1237
1238#ifdef BNX2X_STOP_ON_ERROR
1239 if (pages > 8*PAGES_PER_SGE) {
1240 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
1241 pages, cqe_idx);
1242 BNX2X_ERR("fp_cqe->pkt_len = %d fp_cqe->len_on_bd = %d\n",
1243 fp_cqe->pkt_len, len_on_bd);
1244 bnx2x_panic();
1245 return -EINVAL;
1246 }
1247#endif
1248
1249 /* Run through the SGL and compose the fragmented skb */
1250 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
1251 u16 sge_idx = RX_SGE(le16_to_cpu(fp_cqe->sgl[j]));
1252
1253 /* FW gives the indices of the SGE as if the ring is an array
1254 (meaning that "next" element will consume 2 indices) */
1255 frag_len = min(frag_size, (u32)(BCM_PAGE_SIZE*PAGES_PER_SGE));
1256 rx_pg = &fp->rx_page_ring[sge_idx];
1257 sge = rx_pg->page;
1258 old_rx_pg = *rx_pg;
1259
1260 /* If we fail to allocate a substitute page, we simply stop
1261 where we are and drop the whole packet */
1262 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
1263 if (unlikely(err)) {
1264 fp->rx_alloc_failed++;
1265 return err;
1266 }
1267
1268 /* Unmap the page as we r going to pass it to the stack */
1269 pci_unmap_page(bp->pdev, pci_unmap_addr(&old_rx_pg, mapping),
1270 BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
1271
1272 /* Add one frag and update the appropriate fields in the skb */
1273 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
1274
1275 skb->data_len += frag_len;
1276 skb->truesize += frag_len;
1277 skb->len += frag_len;
1278
1279 frag_size -= frag_len;
1280 }
1281
1282 return 0;
1283}
1284
1285static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
1286 u16 queue, int pad, int len, union eth_rx_cqe *cqe,
1287 u16 cqe_idx)
1288{
1289 struct sw_rx_bd *rx_buf = &fp->tpa_pool[queue];
1290 struct sk_buff *skb = rx_buf->skb;
1291 /* alloc new skb */
1292 struct sk_buff *new_skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
1293
1294 /* Unmap skb in the pool anyway, as we are going to change
1295 pool entry status to BNX2X_TPA_STOP even if new skb allocation
1296 fails. */
1297 pci_unmap_single(bp->pdev, pci_unmap_addr(rx_buf, mapping),
1298 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
1299
1300 /* if alloc failed drop the packet and keep the buffer in the bin */
1301 if (likely(new_skb)) {
1302
1303 prefetch(skb);
1304 prefetch(((char *)(skb)) + 128);
1305
1306 /* else fix ip xsum and give it to the stack */
1307 /* (no need to map the new skb) */
1308#ifdef BNX2X_STOP_ON_ERROR
1309 if (pad + len > bp->rx_buf_size) {
1310 BNX2X_ERR("skb_put is about to fail... "
1311 "pad %d len %d rx_buf_size %d\n",
1312 pad, len, bp->rx_buf_size);
1313 bnx2x_panic();
1314 return;
1315 }
1316#endif
1317
1318 skb_reserve(skb, pad);
1319 skb_put(skb, len);
1320
1321 skb->protocol = eth_type_trans(skb, bp->dev);
1322 skb->ip_summed = CHECKSUM_UNNECESSARY;
1323
1324 {
1325 struct iphdr *iph;
1326
1327 iph = (struct iphdr *)skb->data;
1328 iph->check = 0;
1329 iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
1330 }
1331
1332 if (!bnx2x_fill_frag_skb(bp, fp, skb,
1333 &cqe->fast_path_cqe, cqe_idx)) {
1334#ifdef BCM_VLAN
1335 if ((bp->vlgrp != NULL) &&
1336 (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) &
1337 PARSING_FLAGS_VLAN))
1338 vlan_hwaccel_receive_skb(skb, bp->vlgrp,
1339 le16_to_cpu(cqe->fast_path_cqe.
1340 vlan_tag));
1341 else
1342#endif
1343 netif_receive_skb(skb);
1344 } else {
1345 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
1346 " - dropping packet!\n");
1347 dev_kfree_skb(skb);
1348 }
1349
1350 bp->dev->last_rx = jiffies;
1351
1352 /* put new skb in bin */
1353 fp->tpa_pool[queue].skb = new_skb;
1354
1355 } else {
1356 DP(NETIF_MSG_RX_STATUS,
1357 "Failed to allocate new skb - dropping packet!\n");
1358 fp->rx_alloc_failed++;
1359 }
1360
1361 fp->tpa_state[queue] = BNX2X_TPA_STOP;
1362}
1363
1364static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
1365 struct bnx2x_fastpath *fp,
1366 u16 bd_prod, u16 rx_comp_prod,
1367 u16 rx_sge_prod)
1368{
1369 struct tstorm_eth_rx_producers rx_prods = {0};
1370 int i;
1371
1372 /* Update producers */
1373 rx_prods.bd_prod = bd_prod;
1374 rx_prods.cqe_prod = rx_comp_prod;
1375 rx_prods.sge_prod = rx_sge_prod;
1376
1377 for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++)
1378 REG_WR(bp, BAR_TSTRORM_INTMEM +
1379 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
1380 ((u32 *)&rx_prods)[i]);
1381
1382 DP(NETIF_MSG_RX_STATUS,
1383 "Wrote: bd_prod %u cqe_prod %u sge_prod %u\n",
1384 bd_prod, rx_comp_prod, rx_sge_prod);
1385}
1386
1019static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) 1387static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1020{ 1388{
1021 struct bnx2x *bp = fp->bp; 1389 struct bnx2x *bp = fp->bp;
1022 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; 1390 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
1023 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod; 1391 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
1024 int rx_pkt = 0; 1392 int rx_pkt = 0;
1393 u16 queue;
1025 1394
1026#ifdef BNX2X_STOP_ON_ERROR 1395#ifdef BNX2X_STOP_ON_ERROR
1027 if (unlikely(bp->panic)) 1396 if (unlikely(bp->panic))
@@ -1082,6 +1451,49 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1082 len = le16_to_cpu(cqe->fast_path_cqe.pkt_len); 1451 len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
1083 pad = cqe->fast_path_cqe.placement_offset; 1452 pad = cqe->fast_path_cqe.placement_offset;
1084 1453
1454 /* If CQE is marked both TPA_START and TPA_END
1455 it is a non-TPA CQE */
1456 if ((!fp->disable_tpa) &&
1457 (TPA_TYPE(cqe_fp_flags) !=
1458 (TPA_TYPE_START | TPA_TYPE_END))) {
1459 queue = cqe->fast_path_cqe.queue_index;
1460
1461 if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_START) {
1462 DP(NETIF_MSG_RX_STATUS,
1463 "calling tpa_start on queue %d\n",
1464 queue);
1465
1466 bnx2x_tpa_start(fp, queue, skb,
1467 bd_cons, bd_prod);
1468 goto next_rx;
1469 }
1470
1471 if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_END) {
1472 DP(NETIF_MSG_RX_STATUS,
1473 "calling tpa_stop on queue %d\n",
1474 queue);
1475
1476 if (!BNX2X_RX_SUM_FIX(cqe))
1477 BNX2X_ERR("STOP on none TCP "
1478 "data\n");
1479
1480 /* This is a size of the linear data
1481 on this skb */
1482 len = le16_to_cpu(cqe->fast_path_cqe.
1483 len_on_bd);
1484 bnx2x_tpa_stop(bp, fp, queue, pad,
1485 len, cqe, comp_ring_cons);
1486#ifdef BNX2X_STOP_ON_ERROR
1487 if (bp->panic)
1488 return -EINVAL;
1489#endif
1490
1491 bnx2x_update_sge_prod(fp,
1492 &cqe->fast_path_cqe);
1493 goto next_cqe;
1494 }
1495 }
1496
1085 pci_dma_sync_single_for_device(bp->pdev, 1497 pci_dma_sync_single_for_device(bp->pdev,
1086 pci_unmap_addr(rx_buf, mapping), 1498 pci_unmap_addr(rx_buf, mapping),
1087 pad + RX_COPY_THRESH, 1499 pad + RX_COPY_THRESH,
@@ -1112,7 +1524,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1112 DP(NETIF_MSG_RX_ERR, 1524 DP(NETIF_MSG_RX_ERR,
1113 "ERROR packet dropped " 1525 "ERROR packet dropped "
1114 "because of alloc failure\n"); 1526 "because of alloc failure\n");
1115 /* TBD count this as a drop? */ 1527 fp->rx_alloc_failed++;
1116 goto reuse_rx; 1528 goto reuse_rx;
1117 } 1529 }
1118 1530
@@ -1138,6 +1550,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1138 DP(NETIF_MSG_RX_ERR, 1550 DP(NETIF_MSG_RX_ERR,
1139 "ERROR packet dropped because " 1551 "ERROR packet dropped because "
1140 "of alloc failure\n"); 1552 "of alloc failure\n");
1553 fp->rx_alloc_failed++;
1141reuse_rx: 1554reuse_rx:
1142 bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod); 1555 bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod);
1143 goto next_rx; 1556 goto next_rx;
@@ -1184,11 +1597,9 @@ next_cqe:
1184 fp->rx_comp_cons = sw_comp_cons; 1597 fp->rx_comp_cons = sw_comp_cons;
1185 fp->rx_comp_prod = sw_comp_prod; 1598 fp->rx_comp_prod = sw_comp_prod;
1186 1599
1187 REG_WR(bp, BAR_TSTRORM_INTMEM + 1600 /* Update producers */
1188 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)), 1601 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
1189 sw_comp_prod); 1602 fp->rx_sge_prod);
1190
1191
1192 mmiowb(); /* keep prod updates ordered */ 1603 mmiowb(); /* keep prod updates ordered */
1193 1604
1194 fp->rx_pkt += rx_pkt; 1605 fp->rx_pkt += rx_pkt;
@@ -2745,10 +3156,10 @@ static void bnx2x_stats_pmf_update(struct bnx2x *bp)
2745 dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI); 3156 dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI);
2746 dmae->src_addr_lo = (bp->port.port_stx >> 2) + DMAE_LEN32_RD_MAX; 3157 dmae->src_addr_lo = (bp->port.port_stx >> 2) + DMAE_LEN32_RD_MAX;
2747 dmae->src_addr_hi = 0; 3158 dmae->src_addr_hi = 0;
2748 dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats) 3159 dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats) +
2749 + DMAE_LEN32_RD_MAX * 4); 3160 DMAE_LEN32_RD_MAX * 4);
2750 dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats) 3161 dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats) +
2751 + DMAE_LEN32_RD_MAX * 4); 3162 DMAE_LEN32_RD_MAX * 4);
2752 dmae->len = (sizeof(struct host_port_stats) >> 2) - DMAE_LEN32_RD_MAX; 3163 dmae->len = (sizeof(struct host_port_stats) >> 2) - DMAE_LEN32_RD_MAX;
2753 dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp)); 3164 dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
2754 dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp)); 3165 dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
@@ -3365,11 +3776,12 @@ static void bnx2x_stats_update(struct bnx2x *bp)
3365 printk(KERN_DEBUG " tx avail (%4x) tx hc idx (%x)" 3776 printk(KERN_DEBUG " tx avail (%4x) tx hc idx (%x)"
3366 " tx pkt (%lx)\n", 3777 " tx pkt (%lx)\n",
3367 bnx2x_tx_avail(bp->fp), 3778 bnx2x_tx_avail(bp->fp),
3368 *bp->fp->tx_cons_sb, nstats->tx_packets); 3779 le16_to_cpu(*bp->fp->tx_cons_sb), nstats->tx_packets);
3369 printk(KERN_DEBUG " rx usage (%4x) rx hc idx (%x)" 3780 printk(KERN_DEBUG " rx usage (%4x) rx hc idx (%x)"
3370 " rx pkt (%lx)\n", 3781 " rx pkt (%lx)\n",
3371 (u16)(*bp->fp->rx_cons_sb - bp->fp->rx_comp_cons), 3782 (u16)(le16_to_cpu(*bp->fp->rx_cons_sb) -
3372 *bp->fp->rx_cons_sb, nstats->rx_packets); 3783 bp->fp->rx_comp_cons),
3784 le16_to_cpu(*bp->fp->rx_cons_sb), nstats->rx_packets);
3373 printk(KERN_DEBUG " %s (Xoff events %u) brb drops %u\n", 3785 printk(KERN_DEBUG " %s (Xoff events %u) brb drops %u\n",
3374 netif_queue_stopped(bp->dev)? "Xoff" : "Xon", 3786 netif_queue_stopped(bp->dev)? "Xoff" : "Xon",
3375 estats->driver_xoff, estats->brb_drop_lo); 3787 estats->driver_xoff, estats->brb_drop_lo);
@@ -3623,6 +4035,8 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
3623 REG_WR(bp, BAR_CSTRORM_INTMEM + 4035 REG_WR(bp, BAR_CSTRORM_INTMEM +
3624 ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4), 4036 ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
3625 U64_HI(section)); 4037 U64_HI(section));
4038 REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_CSB_FUNC_OFF +
4039 CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
3626 4040
3627 for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++) 4041 for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++)
3628 REG_WR16(bp, BAR_CSTRORM_INTMEM + 4042 REG_WR16(bp, BAR_CSTRORM_INTMEM +
@@ -3814,22 +4228,94 @@ static void bnx2x_update_coalesce(struct bnx2x *bp)
3814 } 4228 }
3815} 4229}
3816 4230
4231static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
4232 struct bnx2x_fastpath *fp, int last)
4233{
4234 int i;
4235
4236 for (i = 0; i < last; i++) {
4237 struct sw_rx_bd *rx_buf = &(fp->tpa_pool[i]);
4238 struct sk_buff *skb = rx_buf->skb;
4239
4240 if (skb == NULL) {
4241 DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
4242 continue;
4243 }
4244
4245 if (fp->tpa_state[i] == BNX2X_TPA_START)
4246 pci_unmap_single(bp->pdev,
4247 pci_unmap_addr(rx_buf, mapping),
4248 bp->rx_buf_use_size,
4249 PCI_DMA_FROMDEVICE);
4250
4251 dev_kfree_skb(skb);
4252 rx_buf->skb = NULL;
4253 }
4254}
4255
3817static void bnx2x_init_rx_rings(struct bnx2x *bp) 4256static void bnx2x_init_rx_rings(struct bnx2x *bp)
3818{ 4257{
3819 u16 ring_prod; 4258 int func = BP_FUNC(bp);
4259 u16 ring_prod, cqe_ring_prod = 0;
3820 int i, j; 4260 int i, j;
3821 4261
3822 bp->rx_buf_use_size = bp->dev->mtu; 4262 bp->rx_buf_use_size = bp->dev->mtu;
3823
3824 bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD; 4263 bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD;
3825 bp->rx_buf_size = bp->rx_buf_use_size + 64; 4264 bp->rx_buf_size = bp->rx_buf_use_size + 64;
3826 4265
4266 if (bp->flags & TPA_ENABLE_FLAG) {
4267 DP(NETIF_MSG_IFUP,
4268 "rx_buf_use_size %d rx_buf_size %d effective_mtu %d\n",
4269 bp->rx_buf_use_size, bp->rx_buf_size,
4270 bp->dev->mtu + ETH_OVREHEAD);
4271
4272 for_each_queue(bp, j) {
4273 for (i = 0; i < ETH_MAX_AGGREGATION_QUEUES_E1H; i++) {
4274 struct bnx2x_fastpath *fp = &bp->fp[j];
4275
4276 fp->tpa_pool[i].skb =
4277 netdev_alloc_skb(bp->dev, bp->rx_buf_size);
4278 if (!fp->tpa_pool[i].skb) {
4279 BNX2X_ERR("Failed to allocate TPA "
4280 "skb pool for queue[%d] - "
4281 "disabling TPA on this "
4282 "queue!\n", j);
4283 bnx2x_free_tpa_pool(bp, fp, i);
4284 fp->disable_tpa = 1;
4285 break;
4286 }
4287 pci_unmap_addr_set((struct sw_rx_bd *)
4288 &bp->fp->tpa_pool[i],
4289 mapping, 0);
4290 fp->tpa_state[i] = BNX2X_TPA_STOP;
4291 }
4292 }
4293 }
4294
3827 for_each_queue(bp, j) { 4295 for_each_queue(bp, j) {
3828 struct bnx2x_fastpath *fp = &bp->fp[j]; 4296 struct bnx2x_fastpath *fp = &bp->fp[j];
3829 4297
3830 fp->rx_bd_cons = 0; 4298 fp->rx_bd_cons = 0;
3831 fp->rx_cons_sb = BNX2X_RX_SB_INDEX; 4299 fp->rx_cons_sb = BNX2X_RX_SB_INDEX;
4300 fp->rx_bd_cons_sb = BNX2X_RX_SB_BD_INDEX;
4301
4302 /* "next page" elements initialization */
4303 /* SGE ring */
4304 for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
4305 struct eth_rx_sge *sge;
4306
4307 sge = &fp->rx_sge_ring[RX_SGE_CNT * i - 2];
4308 sge->addr_hi =
4309 cpu_to_le32(U64_HI(fp->rx_sge_mapping +
4310 BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
4311 sge->addr_lo =
4312 cpu_to_le32(U64_LO(fp->rx_sge_mapping +
4313 BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
4314 }
4315
4316 bnx2x_init_sge_ring_bit_mask(fp);
3832 4317
4318 /* RX BD ring */
3833 for (i = 1; i <= NUM_RX_RINGS; i++) { 4319 for (i = 1; i <= NUM_RX_RINGS; i++) {
3834 struct eth_rx_bd *rx_bd; 4320 struct eth_rx_bd *rx_bd;
3835 4321
@@ -3856,35 +4342,61 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
3856 BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS))); 4342 BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS)));
3857 } 4343 }
3858 4344
3859 /* rx completion queue */ 4345 /* Allocate SGEs and initialize the ring elements */
3860 fp->rx_comp_cons = ring_prod = 0; 4346 for (i = 0, ring_prod = 0;
4347 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
3861 4348
4349 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
4350 BNX2X_ERR("was only able to allocate "
4351 "%d rx sges\n", i);
4352 BNX2X_ERR("disabling TPA for queue[%d]\n", j);
4353 /* Cleanup already allocated elements */
4354 bnx2x_free_rx_sge_range(bp, fp, ring_prod);
4355 bnx2x_free_tpa_pool(bp, fp,
4356 ETH_MAX_AGGREGATION_QUEUES_E1H);
4357 fp->disable_tpa = 1;
4358 ring_prod = 0;
4359 break;
4360 }
4361 ring_prod = NEXT_SGE_IDX(ring_prod);
4362 }
4363 fp->rx_sge_prod = ring_prod;
4364
4365 /* Allocate BDs and initialize BD ring */
4366 fp->rx_comp_cons = fp->rx_alloc_failed = 0;
4367 cqe_ring_prod = ring_prod = 0;
3862 for (i = 0; i < bp->rx_ring_size; i++) { 4368 for (i = 0; i < bp->rx_ring_size; i++) {
3863 if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) { 4369 if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
3864 BNX2X_ERR("was only able to allocate " 4370 BNX2X_ERR("was only able to allocate "
3865 "%d rx skbs\n", i); 4371 "%d rx skbs\n", i);
4372 fp->rx_alloc_failed++;
3866 break; 4373 break;
3867 } 4374 }
3868 ring_prod = NEXT_RX_IDX(ring_prod); 4375 ring_prod = NEXT_RX_IDX(ring_prod);
4376 cqe_ring_prod = NEXT_RCQ_IDX(cqe_ring_prod);
3869 BUG_TRAP(ring_prod > i); 4377 BUG_TRAP(ring_prod > i);
3870 } 4378 }
3871 4379
3872 fp->rx_bd_prod = fp->rx_comp_prod = ring_prod; 4380 fp->rx_bd_prod = ring_prod;
4381 /* must not have more available CQEs than BDs */
4382 fp->rx_comp_prod = min((u16)(NUM_RCQ_RINGS*RCQ_DESC_CNT),
4383 cqe_ring_prod);
3873 fp->rx_pkt = fp->rx_calls = 0; 4384 fp->rx_pkt = fp->rx_calls = 0;
3874 4385
3875 /* Warning! this will generate an interrupt (to the TSTORM) */ 4386 /* Warning!
3876 /* must only be done when chip is initialized */ 4387 * this will generate an interrupt (to the TSTORM)
3877 REG_WR(bp, BAR_TSTRORM_INTMEM + 4388 * must only be done after chip is initialized
3878 TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)), 4389 */
3879 ring_prod); 4390 bnx2x_update_rx_prod(bp, fp, ring_prod, fp->rx_comp_prod,
4391 fp->rx_sge_prod);
3880 if (j != 0) 4392 if (j != 0)
3881 continue; 4393 continue;
3882 4394
3883 REG_WR(bp, BAR_USTRORM_INTMEM + 4395 REG_WR(bp, BAR_USTRORM_INTMEM +
3884 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)), 4396 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
3885 U64_LO(fp->rx_comp_mapping)); 4397 U64_LO(fp->rx_comp_mapping));
3886 REG_WR(bp, BAR_USTRORM_INTMEM + 4398 REG_WR(bp, BAR_USTRORM_INTMEM +
3887 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)) + 4, 4399 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
3888 U64_HI(fp->rx_comp_mapping)); 4400 U64_HI(fp->rx_comp_mapping));
3889 } 4401 }
3890} 4402}
@@ -3972,6 +4484,18 @@ static void bnx2x_init_context(struct bnx2x *bp)
3972 U64_HI(fp->rx_desc_mapping); 4484 U64_HI(fp->rx_desc_mapping);
3973 context->ustorm_st_context.common.bd_page_base_lo = 4485 context->ustorm_st_context.common.bd_page_base_lo =
3974 U64_LO(fp->rx_desc_mapping); 4486 U64_LO(fp->rx_desc_mapping);
4487 if (!fp->disable_tpa) {
4488 context->ustorm_st_context.common.flags |=
4489 (USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA |
4490 USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_SGE_RING);
4491 context->ustorm_st_context.common.sge_buff_size =
4492 (u16)(BCM_PAGE_SIZE*PAGES_PER_SGE);
4493 context->ustorm_st_context.common.sge_page_base_hi =
4494 U64_HI(fp->rx_sge_mapping);
4495 context->ustorm_st_context.common.sge_page_base_lo =
4496 U64_LO(fp->rx_sge_mapping);
4497 }
4498
3975 context->cstorm_st_context.sb_index_number = 4499 context->cstorm_st_context.sb_index_number =
3976 HC_INDEX_C_ETH_TX_CQ_CONS; 4500 HC_INDEX_C_ETH_TX_CQ_CONS;
3977 context->cstorm_st_context.status_block_id = sb_id; 4501 context->cstorm_st_context.status_block_id = sb_id;
@@ -4022,6 +4546,18 @@ static void bnx2x_set_client_config(struct bnx2x *bp)
4022 } 4546 }
4023#endif 4547#endif
4024 4548
4549 if (bp->flags & TPA_ENABLE_FLAG) {
4550 tstorm_client.max_sges_for_packet =
4551 BCM_PAGE_ALIGN(tstorm_client.mtu) >> BCM_PAGE_SHIFT;
4552 tstorm_client.max_sges_for_packet =
4553 ((tstorm_client.max_sges_for_packet +
4554 PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >>
4555 PAGES_PER_SGE_SHIFT;
4556
4557 tstorm_client.config_flags |=
4558 TSTORM_ETH_CLIENT_CONFIG_ENABLE_SGE_RING;
4559 }
4560
4025 for_each_queue(bp, i) { 4561 for_each_queue(bp, i) {
4026 REG_WR(bp, BAR_TSTRORM_INTMEM + 4562 REG_WR(bp, BAR_TSTRORM_INTMEM +
4027 TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id), 4563 TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id),
@@ -4136,8 +4672,8 @@ static void bnx2x_init_internal(struct bnx2x *bp)
4136 REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNCTION_MODE_OFFSET, 4672 REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNCTION_MODE_OFFSET,
4137 IS_E1HMF(bp)); 4673 IS_E1HMF(bp));
4138 4674
4139 REG_WR16(bp, BAR_XSTRORM_INTMEM + 4675 REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_E1HOV_OFFSET(func),
4140 XSTORM_E1HOV_OFFSET(func), bp->e1hov); 4676 bp->e1hov);
4141 } 4677 }
4142 4678
4143 /* Zero this manualy as its initialization is 4679 /* Zero this manualy as its initialization is
@@ -4145,6 +4681,25 @@ static void bnx2x_init_internal(struct bnx2x *bp)
4145 for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++) 4681 for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++)
4146 REG_WR(bp, BAR_USTRORM_INTMEM + 4682 REG_WR(bp, BAR_USTRORM_INTMEM +
4147 USTORM_AGG_DATA_OFFSET + 4*i, 0); 4683 USTORM_AGG_DATA_OFFSET + 4*i, 0);
4684
4685 for_each_queue(bp, i) {
4686 struct bnx2x_fastpath *fp = &bp->fp[i];
4687 u16 max_agg_size;
4688
4689 REG_WR(bp, BAR_USTRORM_INTMEM +
4690 USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)),
4691 U64_LO(fp->rx_comp_mapping));
4692 REG_WR(bp, BAR_USTRORM_INTMEM +
4693 USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)) + 4,
4694 U64_HI(fp->rx_comp_mapping));
4695
4696 max_agg_size = min((u32)(bp->rx_buf_use_size +
4697 8*BCM_PAGE_SIZE*PAGES_PER_SGE),
4698 (u32)0xffff);
4699 REG_WR16(bp, BAR_USTRORM_INTMEM +
4700 USTORM_MAX_AGG_SIZE_OFFSET(port, FP_CL_ID(fp)),
4701 max_agg_size);
4702 }
4148} 4703}
4149 4704
4150static void bnx2x_nic_init(struct bnx2x *bp) 4705static void bnx2x_nic_init(struct bnx2x *bp)
@@ -4767,6 +5322,17 @@ static int bnx2x_init_common(struct bnx2x *bp)
4767 5322
4768 enable_blocks_attention(bp); 5323 enable_blocks_attention(bp);
4769 5324
5325 if (bp->flags & TPA_ENABLE_FLAG) {
5326 struct tstorm_eth_tpa_exist tmp = {0};
5327
5328 tmp.tpa_exist = 1;
5329
5330 REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET,
5331 ((u32 *)&tmp)[0]);
5332 REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET + 4,
5333 ((u32 *)&tmp)[1]);
5334 }
5335
4770 return 0; 5336 return 0;
4771} 5337}
4772 5338
@@ -5145,8 +5711,12 @@ static void bnx2x_free_mem(struct bnx2x *bp)
5145 bnx2x_fp(bp, i, rx_comp_mapping), 5711 bnx2x_fp(bp, i, rx_comp_mapping),
5146 sizeof(struct eth_fast_path_rx_cqe) * 5712 sizeof(struct eth_fast_path_rx_cqe) *
5147 NUM_RCQ_BD); 5713 NUM_RCQ_BD);
5148 }
5149 5714
5715 /* SGE ring */
5716 BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_sge_ring),
5717 bnx2x_fp(bp, i, rx_sge_mapping),
5718 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
5719 }
5150 /* end of fastpath */ 5720 /* end of fastpath */
5151 5721
5152 BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, 5722 BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping,
@@ -5161,7 +5731,7 @@ static void bnx2x_free_mem(struct bnx2x *bp)
5161 BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024); 5731 BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024);
5162 BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024); 5732 BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024);
5163#endif 5733#endif
5164 BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, PAGE_SIZE); 5734 BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, BCM_PAGE_SIZE);
5165 5735
5166#undef BNX2X_PCI_FREE 5736#undef BNX2X_PCI_FREE
5167#undef BNX2X_KFREE 5737#undef BNX2X_KFREE
@@ -5223,6 +5793,12 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
5223 sizeof(struct eth_fast_path_rx_cqe) * 5793 sizeof(struct eth_fast_path_rx_cqe) *
5224 NUM_RCQ_BD); 5794 NUM_RCQ_BD);
5225 5795
5796 /* SGE ring */
5797 BNX2X_ALLOC(bnx2x_fp(bp, i, rx_page_ring),
5798 sizeof(struct sw_rx_page) * NUM_RX_SGE);
5799 BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_sge_ring),
5800 &bnx2x_fp(bp, i, rx_sge_mapping),
5801 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
5226 } 5802 }
5227 /* end of fastpath */ 5803 /* end of fastpath */
5228 5804
@@ -5313,6 +5889,9 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
5313 rx_buf->skb = NULL; 5889 rx_buf->skb = NULL;
5314 dev_kfree_skb(skb); 5890 dev_kfree_skb(skb);
5315 } 5891 }
5892 if (!fp->disable_tpa)
5893 bnx2x_free_tpa_pool(bp, fp,
5894 ETH_MAX_AGGREGATION_QUEUES_E1H);
5316 } 5895 }
5317} 5896}
5318 5897
@@ -5664,6 +6243,10 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
5664 if (bnx2x_alloc_mem(bp)) 6243 if (bnx2x_alloc_mem(bp))
5665 return -ENOMEM; 6244 return -ENOMEM;
5666 6245
6246 for_each_queue(bp, i)
6247 bnx2x_fp(bp, i, disable_tpa) =
6248 ((bp->flags & TPA_ENABLE_FLAG) == 0);
6249
5667 /* Disable interrupt handling until HW is initialized */ 6250 /* Disable interrupt handling until HW is initialized */
5668 atomic_set(&bp->intr_sem, 1); 6251 atomic_set(&bp->intr_sem, 1);
5669 6252
@@ -5792,6 +6375,11 @@ load_int_disable:
5792 /* Release IRQs */ 6375 /* Release IRQs */
5793 bnx2x_free_irq(bp); 6376 bnx2x_free_irq(bp);
5794 6377
6378 /* Free SKBs, SGEs, TPA pool and driver internals */
6379 bnx2x_free_skbs(bp);
6380 for_each_queue(bp, i)
6381 bnx2x_free_rx_sge_range(bp, bp->fp + i,
6382 RX_SGE_CNT*NUM_RX_SGE_PAGES);
5795load_error: 6383load_error:
5796 bnx2x_free_mem(bp); 6384 bnx2x_free_mem(bp);
5797 6385
@@ -6090,8 +6678,11 @@ unload_error:
6090 if (!BP_NOMCP(bp)) 6678 if (!BP_NOMCP(bp))
6091 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE); 6679 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE);
6092 6680
6093 /* Free SKBs and driver internals */ 6681 /* Free SKBs, SGEs, TPA pool and driver internals */
6094 bnx2x_free_skbs(bp); 6682 bnx2x_free_skbs(bp);
6683 for_each_queue(bp, i)
6684 bnx2x_free_rx_sge_range(bp, bp->fp + i,
6685 RX_SGE_CNT*NUM_RX_SGE_PAGES);
6095 bnx2x_free_mem(bp); 6686 bnx2x_free_mem(bp);
6096 6687
6097 bp->state = BNX2X_STATE_CLOSED; 6688 bp->state = BNX2X_STATE_CLOSED;
@@ -6767,6 +7358,16 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
6767 printk(KERN_ERR PFX 7358 printk(KERN_ERR PFX
6768 "MCP disabled, must load devices in order!\n"); 7359 "MCP disabled, must load devices in order!\n");
6769 7360
7361 /* Set TPA flags */
7362 if (disable_tpa) {
7363 bp->flags &= ~TPA_ENABLE_FLAG;
7364 bp->dev->features &= ~NETIF_F_LRO;
7365 } else {
7366 bp->flags |= TPA_ENABLE_FLAG;
7367 bp->dev->features |= NETIF_F_LRO;
7368 }
7369
7370
6770 bp->tx_ring_size = MAX_TX_AVAIL; 7371 bp->tx_ring_size = MAX_TX_AVAIL;
6771 bp->rx_ring_size = MAX_RX_AVAIL; 7372 bp->rx_ring_size = MAX_RX_AVAIL;
6772 7373
@@ -7556,6 +8157,33 @@ static int bnx2x_set_coalesce(struct net_device *dev,
7556 return 0; 8157 return 0;
7557} 8158}
7558 8159
8160static int bnx2x_set_flags(struct net_device *dev, u32 data)
8161{
8162 struct bnx2x *bp = netdev_priv(dev);
8163 int changed = 0;
8164 int rc = 0;
8165
8166 if (data & ETH_FLAG_LRO) {
8167 if (!(dev->features & NETIF_F_LRO)) {
8168 dev->features |= NETIF_F_LRO;
8169 bp->flags |= TPA_ENABLE_FLAG;
8170 changed = 1;
8171 }
8172
8173 } else if (dev->features & NETIF_F_LRO) {
8174 dev->features &= ~NETIF_F_LRO;
8175 bp->flags &= ~TPA_ENABLE_FLAG;
8176 changed = 1;
8177 }
8178
8179 if (changed && netif_running(dev)) {
8180 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
8181 rc = bnx2x_nic_load(bp, LOAD_NORMAL);
8182 }
8183
8184 return rc;
8185}
8186
7559static void bnx2x_get_ringparam(struct net_device *dev, 8187static void bnx2x_get_ringparam(struct net_device *dev,
7560 struct ethtool_ringparam *ering) 8188 struct ethtool_ringparam *ering)
7561{ 8189{
@@ -7896,35 +8524,37 @@ static int bnx2x_phys_id(struct net_device *dev, u32 data)
7896} 8524}
7897 8525
7898static struct ethtool_ops bnx2x_ethtool_ops = { 8526static struct ethtool_ops bnx2x_ethtool_ops = {
7899 .get_settings = bnx2x_get_settings, 8527 .get_settings = bnx2x_get_settings,
7900 .set_settings = bnx2x_set_settings, 8528 .set_settings = bnx2x_set_settings,
7901 .get_drvinfo = bnx2x_get_drvinfo, 8529 .get_drvinfo = bnx2x_get_drvinfo,
7902 .get_wol = bnx2x_get_wol, 8530 .get_wol = bnx2x_get_wol,
7903 .set_wol = bnx2x_set_wol, 8531 .set_wol = bnx2x_set_wol,
7904 .get_msglevel = bnx2x_get_msglevel, 8532 .get_msglevel = bnx2x_get_msglevel,
7905 .set_msglevel = bnx2x_set_msglevel, 8533 .set_msglevel = bnx2x_set_msglevel,
7906 .nway_reset = bnx2x_nway_reset, 8534 .nway_reset = bnx2x_nway_reset,
7907 .get_link = ethtool_op_get_link, 8535 .get_link = ethtool_op_get_link,
7908 .get_eeprom_len = bnx2x_get_eeprom_len, 8536 .get_eeprom_len = bnx2x_get_eeprom_len,
7909 .get_eeprom = bnx2x_get_eeprom, 8537 .get_eeprom = bnx2x_get_eeprom,
7910 .set_eeprom = bnx2x_set_eeprom, 8538 .set_eeprom = bnx2x_set_eeprom,
7911 .get_coalesce = bnx2x_get_coalesce, 8539 .get_coalesce = bnx2x_get_coalesce,
7912 .set_coalesce = bnx2x_set_coalesce, 8540 .set_coalesce = bnx2x_set_coalesce,
7913 .get_ringparam = bnx2x_get_ringparam, 8541 .get_ringparam = bnx2x_get_ringparam,
7914 .set_ringparam = bnx2x_set_ringparam, 8542 .set_ringparam = bnx2x_set_ringparam,
7915 .get_pauseparam = bnx2x_get_pauseparam, 8543 .get_pauseparam = bnx2x_get_pauseparam,
7916 .set_pauseparam = bnx2x_set_pauseparam, 8544 .set_pauseparam = bnx2x_set_pauseparam,
7917 .get_rx_csum = bnx2x_get_rx_csum, 8545 .get_rx_csum = bnx2x_get_rx_csum,
7918 .set_rx_csum = bnx2x_set_rx_csum, 8546 .set_rx_csum = bnx2x_set_rx_csum,
7919 .get_tx_csum = ethtool_op_get_tx_csum, 8547 .get_tx_csum = ethtool_op_get_tx_csum,
7920 .set_tx_csum = ethtool_op_set_tx_csum, 8548 .set_tx_csum = ethtool_op_set_tx_csum,
7921 .get_sg = ethtool_op_get_sg, 8549 .set_flags = bnx2x_set_flags,
7922 .set_sg = ethtool_op_set_sg, 8550 .get_flags = ethtool_op_get_flags,
8551 .get_sg = ethtool_op_get_sg,
8552 .set_sg = ethtool_op_set_sg,
7923 .get_tso = ethtool_op_get_tso, 8553 .get_tso = ethtool_op_get_tso,
7924 .set_tso = bnx2x_set_tso, 8554 .set_tso = bnx2x_set_tso,
7925 .self_test_count = bnx2x_self_test_count, 8555 .self_test_count = bnx2x_self_test_count,
7926 .self_test = bnx2x_self_test, 8556 .self_test = bnx2x_self_test,
7927 .get_strings = bnx2x_get_strings, 8557 .get_strings = bnx2x_get_strings,
7928 .phys_id = bnx2x_phys_id, 8558 .phys_id = bnx2x_phys_id,
7929 .get_stats_count = bnx2x_get_stats_count, 8559 .get_stats_count = bnx2x_get_stats_count,
7930 .get_ethtool_stats = bnx2x_get_ethtool_stats, 8560 .get_ethtool_stats = bnx2x_get_ethtool_stats,