aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/igc
diff options
context:
space:
mode:
authorSasha Neftin <sasha.neftin@intel.com>2018-10-11 03:17:22 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2018-10-17 16:46:51 -0400
commit0507ef8a0372b80c30555bbeec7215f2cf874ecd (patch)
tree89c60a0b221b615b602a1c2d2ddfd512f6a59106 /drivers/net/ethernet/intel/igc
parent13b5b7fd6a4a96dffe604f25e7b64cfbd9520924 (diff)
igc: Add transmit and receive fastpath and interrupt handlers
This patch adds support for allocating, configuring, and freeing Tx/Rx ring resources. With these changes in place the descriptor queues are in a state where they are ready to transmit or receive if provided buffers. This also adds the transmit and receive fastpath and interrupt handlers. With this code in place the network device is now able to send and receive frames over the network interface using a single queue. Signed-off-by: Sasha Neftin <sasha.neftin@intel.com> Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/igc')
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h66
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.h15
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h45
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c1123
4 files changed, 1205 insertions, 44 deletions
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 7bb19328b899..88ee451e36fd 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -32,13 +32,31 @@ extern char igc_driver_version[];
32#define IGC_START_ITR 648 /* ~6000 ints/sec */ 32#define IGC_START_ITR 648 /* ~6000 ints/sec */
33#define IGC_FLAG_HAS_MSI BIT(0) 33#define IGC_FLAG_HAS_MSI BIT(0)
34#define IGC_FLAG_QUEUE_PAIRS BIT(4) 34#define IGC_FLAG_QUEUE_PAIRS BIT(4)
35#define IGC_FLAG_NEED_LINK_UPDATE BIT(9)
35#define IGC_FLAG_HAS_MSIX BIT(13) 36#define IGC_FLAG_HAS_MSIX BIT(13)
37#define IGC_FLAG_VLAN_PROMISC BIT(15)
36 38
37#define IGC_START_ITR 648 /* ~6000 ints/sec */ 39#define IGC_START_ITR 648 /* ~6000 ints/sec */
38#define IGC_4K_ITR 980 40#define IGC_4K_ITR 980
39#define IGC_20K_ITR 196 41#define IGC_20K_ITR 196
40#define IGC_70K_ITR 56 42#define IGC_70K_ITR 56
41 43
44#define IGC_DEFAULT_ITR 3 /* dynamic */
45#define IGC_MAX_ITR_USECS 10000
46#define IGC_MIN_ITR_USECS 10
47#define NON_Q_VECTORS 1
48#define MAX_MSIX_ENTRIES 10
49
50/* TX/RX descriptor defines */
51#define IGC_DEFAULT_TXD 256
52#define IGC_DEFAULT_TX_WORK 128
53#define IGC_MIN_TXD 80
54#define IGC_MAX_TXD 4096
55
56#define IGC_DEFAULT_RXD 256
57#define IGC_MIN_RXD 80
58#define IGC_MAX_RXD 4096
59
42/* Transmit and receive queues */ 60/* Transmit and receive queues */
43#define IGC_MAX_RX_QUEUES 4 61#define IGC_MAX_RX_QUEUES 4
44#define IGC_MAX_TX_QUEUES 4 62#define IGC_MAX_TX_QUEUES 4
@@ -85,6 +103,16 @@ extern char igc_driver_version[];
85#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN) 103#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN)
86#endif 104#endif
87 105
106/* How many Rx Buffers do we bundle into one write to the hardware ? */
107#define IGC_RX_BUFFER_WRITE 16 /* Must be power of 2 */
108
109/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
110static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
111 const u32 stat_err_bits)
112{
113 return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
114}
115
88enum igc_state_t { 116enum igc_state_t {
89 __IGC_TESTING, 117 __IGC_TESTING,
90 __IGC_RESETTING, 118 __IGC_RESETTING,
@@ -92,6 +120,27 @@ enum igc_state_t {
92 __IGC_PTP_TX_IN_PROGRESS, 120 __IGC_PTP_TX_IN_PROGRESS,
93}; 121};
94 122
123enum igc_tx_flags {
124 /* cmd_type flags */
125 IGC_TX_FLAGS_VLAN = 0x01,
126 IGC_TX_FLAGS_TSO = 0x02,
127 IGC_TX_FLAGS_TSTAMP = 0x04,
128
129 /* olinfo flags */
130 IGC_TX_FLAGS_IPV4 = 0x10,
131 IGC_TX_FLAGS_CSUM = 0x20,
132};
133
134/* The largest size we can write to the descriptor is 65535. In order to
135 * maintain a power of two alignment we have to limit ourselves to 32K.
136 */
137#define IGC_MAX_TXD_PWR 15
138#define IGC_MAX_DATA_PER_TXD BIT(IGC_MAX_TXD_PWR)
139
140/* Tx Descriptors needed, worst case */
141#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
142#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
143
95/* wrapper around a pointer to a socket buffer, 144/* wrapper around a pointer to a socket buffer,
96 * so a DMA handle can be stored along with the buffer 145 * so a DMA handle can be stored along with the buffer
97 */ 146 */
@@ -123,6 +172,7 @@ struct igc_tx_queue_stats {
123 u64 packets; 172 u64 packets;
124 u64 bytes; 173 u64 bytes;
125 u64 restart_queue; 174 u64 restart_queue;
175 u64 restart_queue2;
126}; 176};
127 177
128struct igc_rx_queue_stats { 178struct igc_rx_queue_stats {
@@ -181,11 +231,14 @@ struct igc_ring {
181 /* TX */ 231 /* TX */
182 struct { 232 struct {
183 struct igc_tx_queue_stats tx_stats; 233 struct igc_tx_queue_stats tx_stats;
234 struct u64_stats_sync tx_syncp;
235 struct u64_stats_sync tx_syncp2;
184 }; 236 };
185 /* RX */ 237 /* RX */
186 struct { 238 struct {
187 struct igc_rx_queue_stats rx_stats; 239 struct igc_rx_queue_stats rx_stats;
188 struct igc_rx_packet_stats pkt_stats; 240 struct igc_rx_packet_stats pkt_stats;
241 struct u64_stats_sync rx_syncp;
189 struct sk_buff *skb; 242 struct sk_buff *skb;
190 }; 243 };
191 }; 244 };
@@ -258,11 +311,17 @@ struct igc_adapter {
258 struct work_struct watchdog_task; 311 struct work_struct watchdog_task;
259 struct work_struct dma_err_task; 312 struct work_struct dma_err_task;
260 313
314 u8 tx_timeout_factor;
315
261 int msg_enable; 316 int msg_enable;
262 u32 max_frame_size; 317 u32 max_frame_size;
318 u32 min_frame_size;
263 319
264 /* OS defined structs */ 320 /* OS defined structs */
265 struct pci_dev *pdev; 321 struct pci_dev *pdev;
322 /* lock for statistics */
323 spinlock_t stats64_lock;
324 struct rtnl_link_stats64 stats64;
266 325
267 /* structs defined in igc_hw.h */ 326 /* structs defined in igc_hw.h */
268 struct igc_hw hw; 327 struct igc_hw hw;
@@ -275,8 +334,13 @@ struct igc_adapter {
275 u16 tx_ring_count; 334 u16 tx_ring_count;
276 u16 rx_ring_count; 335 u16 rx_ring_count;
277 336
337 u32 *shadow_vfta;
338
278 u32 rss_queues; 339 u32 rss_queues;
279 340
341 /* lock for RX network flow classification filter */
342 spinlock_t nfc_lock;
343
280 struct igc_mac_addr *mac_table; 344 struct igc_mac_addr *mac_table;
281}; 345};
282 346
@@ -332,6 +396,8 @@ static inline unsigned int igc_rx_pg_order(struct igc_ring *ring)
332 396
333#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) 397#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
334 398
399#define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
400
335#define IGC_RX_DESC(R, i) \ 401#define IGC_RX_DESC(R, i) \
336 (&(((union igc_adv_rx_desc *)((R)->desc))[i])) 402 (&(((union igc_adv_rx_desc *)((R)->desc))[i]))
337#define IGC_TX_DESC(R, i) \ 403#define IGC_TX_DESC(R, i) \
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index 4bdb4ecf3bc8..3078a18f70a9 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -21,6 +21,18 @@ union igc_adv_tx_desc {
21 } wb; 21 } wb;
22}; 22};
23 23
24/* Adv Transmit Descriptor Config Masks */
25#define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */
26#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
27#define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
28#define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
29#define IGC_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
30#define IGC_ADVTXD_DCMD_RS 0x08000000 /* Report Status */
31#define IGC_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
32#define IGC_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
33#define IGC_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
34#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
35
24struct igc_adv_data_desc { 36struct igc_adv_data_desc {
25 __le64 buffer_addr; /* Address of the descriptor's data buffer */ 37 __le64 buffer_addr; /* Address of the descriptor's data buffer */
26 union { 38 union {
@@ -75,6 +87,9 @@ union igc_adv_rx_desc {
75 } wb; /* writeback */ 87 } wb; /* writeback */
76}; 88};
77 89
90/* Adv Transmit Descriptor Config Masks */
91#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
92
78/* Additional Transmit Descriptor Control definitions */ 93/* Additional Transmit Descriptor Control definitions */
79#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ 94#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */
80 95
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index dbc30dead461..c8a321358cf6 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -84,6 +84,29 @@
84#define IGC_GPIE_EIAME 0x40000000 84#define IGC_GPIE_EIAME 0x40000000
85#define IGC_GPIE_PBA 0x80000000 85#define IGC_GPIE_PBA 0x80000000
86 86
87/* Transmit Descriptor bit definitions */
88#define IGC_TXD_DTYP_D 0x00100000 /* Data Descriptor */
89#define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */
90#define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
91#define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
92#define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */
93#define IGC_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
94#define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */
95#define IGC_TXD_CMD_RS 0x08000000 /* Report Status */
96#define IGC_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */
97#define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */
98#define IGC_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
99#define IGC_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */
100#define IGC_TXD_STAT_DD 0x00000001 /* Descriptor Done */
101#define IGC_TXD_STAT_EC 0x00000002 /* Excess Collisions */
102#define IGC_TXD_STAT_LC 0x00000004 /* Late Collisions */
103#define IGC_TXD_STAT_TU 0x00000008 /* Transmit underrun */
104#define IGC_TXD_CMD_TCP 0x01000000 /* TCP packet */
105#define IGC_TXD_CMD_IP 0x02000000 /* IP packet */
106#define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */
107#define IGC_TXD_STAT_TC 0x00000004 /* Tx Underrun */
108#define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */
109
87/* Transmit Control */ 110/* Transmit Control */
88#define IGC_TCTL_EN 0x00000002 /* enable Tx */ 111#define IGC_TCTL_EN 0x00000002 /* enable Tx */
89#define IGC_TCTL_PSP 0x00000008 /* pad short packets */ 112#define IGC_TCTL_PSP 0x00000008 /* pad short packets */
@@ -111,6 +134,25 @@
111#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ 134#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */
112#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */ 135#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */
113 136
137/* Receive Descriptor bit definitions */
138#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */
139
140#define IGC_RXDEXT_STATERR_CE 0x01000000
141#define IGC_RXDEXT_STATERR_SE 0x02000000
142#define IGC_RXDEXT_STATERR_SEQ 0x04000000
143#define IGC_RXDEXT_STATERR_CXE 0x10000000
144#define IGC_RXDEXT_STATERR_TCPE 0x20000000
145#define IGC_RXDEXT_STATERR_IPE 0x40000000
146#define IGC_RXDEXT_STATERR_RXE 0x80000000
147
148/* Same mask, but for extended and packet split descriptors */
149#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \
150 IGC_RXDEXT_STATERR_CE | \
151 IGC_RXDEXT_STATERR_SE | \
152 IGC_RXDEXT_STATERR_SEQ | \
153 IGC_RXDEXT_STATERR_CXE | \
154 IGC_RXDEXT_STATERR_RXE)
155
114/* Header split receive */ 156/* Header split receive */
115#define IGC_RFCTL_IPV6_EX_DIS 0x00010000 157#define IGC_RFCTL_IPV6_EX_DIS 0x00010000
116#define IGC_RFCTL_LEF 0x00040000 158#define IGC_RFCTL_LEF 0x00040000
@@ -123,6 +165,9 @@
123#define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */ 165#define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */
124#define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ 166#define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */
125 167
168#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */
169#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */
170
126#define IGC_N0_QUEUE -1 171#define IGC_N0_QUEUE -1
127 172
128#endif /* _IGC_DEFINES_H_ */ 173#endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 373ccea86fb0..db7b6820e0f0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -52,6 +52,8 @@ static void igc_free_q_vectors(struct igc_adapter *adapter);
52static void igc_irq_disable(struct igc_adapter *adapter); 52static void igc_irq_disable(struct igc_adapter *adapter);
53static void igc_irq_enable(struct igc_adapter *adapter); 53static void igc_irq_enable(struct igc_adapter *adapter);
54static void igc_configure_msix(struct igc_adapter *adapter); 54static void igc_configure_msix(struct igc_adapter *adapter);
55static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
56 struct igc_rx_buffer *bi);
55 57
56enum latency_range { 58enum latency_range {
57 lowest_latency = 0, 59 lowest_latency = 0,
@@ -219,6 +221,19 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
219} 221}
220 222
221/** 223/**
224 * igc_clean_all_tx_rings - Free Tx Buffers for all queues
225 * @adapter: board private structure
226 */
227static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
228{
229 int i;
230
231 for (i = 0; i < adapter->num_tx_queues; i++)
232 if (adapter->tx_ring[i])
233 igc_clean_tx_ring(adapter->tx_ring[i]);
234}
235
236/**
222 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 237 * igc_setup_tx_resources - allocate Tx resources (Descriptors)
223 * @tx_ring: tx descriptor ring (for a specific queue) to setup 238 * @tx_ring: tx descriptor ring (for a specific queue) to setup
224 * 239 *
@@ -326,6 +341,19 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring)
326} 341}
327 342
328/** 343/**
344 * igc_clean_all_rx_rings - Free Rx Buffers for all queues
345 * @adapter: board private structure
346 */
347static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
348{
349 int i;
350
351 for (i = 0; i < adapter->num_rx_queues; i++)
352 if (adapter->rx_ring[i])
353 igc_clean_rx_ring(adapter->rx_ring[i]);
354}
355
356/**
329 * igc_free_rx_resources - Free Rx Resources 357 * igc_free_rx_resources - Free Rx Resources
330 * @rx_ring: ring to clean the resources from 358 * @rx_ring: ring to clean the resources from
331 * 359 *
@@ -666,60 +694,613 @@ static int igc_set_mac(struct net_device *netdev, void *p)
666 return 0; 694 return 0;
667} 695}
668 696
697static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
698{
699}
700
701static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
702{
703 struct net_device *netdev = tx_ring->netdev;
704
705 netif_stop_subqueue(netdev, tx_ring->queue_index);
706
707 /* memory barriier comment */
708 smp_mb();
709
710 /* We need to check again in a case another CPU has just
711 * made room available.
712 */
713 if (igc_desc_unused(tx_ring) < size)
714 return -EBUSY;
715
716 /* A reprieve! */
717 netif_wake_subqueue(netdev, tx_ring->queue_index);
718
719 u64_stats_update_begin(&tx_ring->tx_syncp2);
720 tx_ring->tx_stats.restart_queue2++;
721 u64_stats_update_end(&tx_ring->tx_syncp2);
722
723 return 0;
724}
725
726static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
727{
728 if (igc_desc_unused(tx_ring) >= size)
729 return 0;
730 return __igc_maybe_stop_tx(tx_ring, size);
731}
732
733static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
734{
735 /* set type for advanced descriptor with frame checksum insertion */
736 u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
737 IGC_ADVTXD_DCMD_DEXT |
738 IGC_ADVTXD_DCMD_IFCS;
739
740 return cmd_type;
741}
742
743static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
744 union igc_adv_tx_desc *tx_desc,
745 u32 tx_flags, unsigned int paylen)
746{
747 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
748
749 /* insert L4 checksum */
750 olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
751 ((IGC_TXD_POPTS_TXSM << 8) /
752 IGC_TX_FLAGS_CSUM);
753
754 /* insert IPv4 checksum */
755 olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
756 (((IGC_TXD_POPTS_IXSM << 8)) /
757 IGC_TX_FLAGS_IPV4);
758
759 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
760}
761
762static int igc_tx_map(struct igc_ring *tx_ring,
763 struct igc_tx_buffer *first,
764 const u8 hdr_len)
765{
766 struct sk_buff *skb = first->skb;
767 struct igc_tx_buffer *tx_buffer;
768 union igc_adv_tx_desc *tx_desc;
769 u32 tx_flags = first->tx_flags;
770 struct skb_frag_struct *frag;
771 u16 i = tx_ring->next_to_use;
772 unsigned int data_len, size;
773 dma_addr_t dma;
774 u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
775
776 tx_desc = IGC_TX_DESC(tx_ring, i);
777
778 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
779
780 size = skb_headlen(skb);
781 data_len = skb->data_len;
782
783 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
784
785 tx_buffer = first;
786
787 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
788 if (dma_mapping_error(tx_ring->dev, dma))
789 goto dma_error;
790
791 /* record length, and DMA address */
792 dma_unmap_len_set(tx_buffer, len, size);
793 dma_unmap_addr_set(tx_buffer, dma, dma);
794
795 tx_desc->read.buffer_addr = cpu_to_le64(dma);
796
797 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
798 tx_desc->read.cmd_type_len =
799 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
800
801 i++;
802 tx_desc++;
803 if (i == tx_ring->count) {
804 tx_desc = IGC_TX_DESC(tx_ring, 0);
805 i = 0;
806 }
807 tx_desc->read.olinfo_status = 0;
808
809 dma += IGC_MAX_DATA_PER_TXD;
810 size -= IGC_MAX_DATA_PER_TXD;
811
812 tx_desc->read.buffer_addr = cpu_to_le64(dma);
813 }
814
815 if (likely(!data_len))
816 break;
817
818 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
819
820 i++;
821 tx_desc++;
822 if (i == tx_ring->count) {
823 tx_desc = IGC_TX_DESC(tx_ring, 0);
824 i = 0;
825 }
826 tx_desc->read.olinfo_status = 0;
827
828 size = skb_frag_size(frag);
829 data_len -= size;
830
831 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
832 size, DMA_TO_DEVICE);
833
834 tx_buffer = &tx_ring->tx_buffer_info[i];
835 }
836
837 /* write last descriptor with RS and EOP bits */
838 cmd_type |= size | IGC_TXD_DCMD;
839 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
840
841 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
842
843 /* set the timestamp */
844 first->time_stamp = jiffies;
845
846 /* Force memory writes to complete before letting h/w know there
847 * are new descriptors to fetch. (Only applicable for weak-ordered
848 * memory model archs, such as IA-64).
849 *
850 * We also need this memory barrier to make certain all of the
851 * status bits have been updated before next_to_watch is written.
852 */
853 wmb();
854
855 /* set next_to_watch value indicating a packet is present */
856 first->next_to_watch = tx_desc;
857
858 i++;
859 if (i == tx_ring->count)
860 i = 0;
861
862 tx_ring->next_to_use = i;
863
864 /* Make sure there is space in the ring for the next send. */
865 igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
866
867 if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
868 writel(i, tx_ring->tail);
869
870 /* we need this if more than one processor can write to our tail
871 * at a time, it synchronizes IO on IA64/Altix systems
872 */
873 mmiowb();
874 }
875
876 return 0;
877dma_error:
878 dev_err(tx_ring->dev, "TX DMA map failed\n");
879 tx_buffer = &tx_ring->tx_buffer_info[i];
880
881 /* clear dma mappings for failed tx_buffer_info map */
882 while (tx_buffer != first) {
883 if (dma_unmap_len(tx_buffer, len))
884 dma_unmap_page(tx_ring->dev,
885 dma_unmap_addr(tx_buffer, dma),
886 dma_unmap_len(tx_buffer, len),
887 DMA_TO_DEVICE);
888 dma_unmap_len_set(tx_buffer, len, 0);
889
890 if (i-- == 0)
891 i += tx_ring->count;
892 tx_buffer = &tx_ring->tx_buffer_info[i];
893 }
894
895 if (dma_unmap_len(tx_buffer, len))
896 dma_unmap_single(tx_ring->dev,
897 dma_unmap_addr(tx_buffer, dma),
898 dma_unmap_len(tx_buffer, len),
899 DMA_TO_DEVICE);
900 dma_unmap_len_set(tx_buffer, len, 0);
901
902 dev_kfree_skb_any(tx_buffer->skb);
903 tx_buffer->skb = NULL;
904
905 tx_ring->next_to_use = i;
906
907 return -1;
908}
909
910static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
911 struct igc_ring *tx_ring)
912{
913 u16 count = TXD_USE_COUNT(skb_headlen(skb));
914 __be16 protocol = vlan_get_protocol(skb);
915 struct igc_tx_buffer *first;
916 u32 tx_flags = 0;
917 unsigned short f;
918 u8 hdr_len = 0;
919
920 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
921 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
922 * + 2 desc gap to keep tail from touching head,
923 * + 1 desc for context descriptor,
924 * otherwise try next time
925 */
926 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
927 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
928
929 if (igc_maybe_stop_tx(tx_ring, count + 3)) {
930 /* this is a hard error */
931 return NETDEV_TX_BUSY;
932 }
933
934 /* record the location of the first descriptor for this packet */
935 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
936 first->skb = skb;
937 first->bytecount = skb->len;
938 first->gso_segs = 1;
939
940 skb_tx_timestamp(skb);
941
942 /* record initial flags and protocol */
943 first->tx_flags = tx_flags;
944 first->protocol = protocol;
945
946 igc_tx_csum(tx_ring, first);
947
948 igc_tx_map(tx_ring, first, hdr_len);
949
950 return NETDEV_TX_OK;
951}
952
953static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
954 struct sk_buff *skb)
955{
956 unsigned int r_idx = skb->queue_mapping;
957
958 if (r_idx >= adapter->num_tx_queues)
959 r_idx = r_idx % adapter->num_tx_queues;
960
961 return adapter->tx_ring[r_idx];
962}
963
669static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 964static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
670 struct net_device *netdev) 965 struct net_device *netdev)
671{ 966{
672 dev_kfree_skb_any(skb); 967 struct igc_adapter *adapter = netdev_priv(netdev);
673 return NETDEV_TX_OK; 968
969 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
970 * in order to meet this minimum size requirement.
971 */
972 if (skb->len < 17) {
973 if (skb_padto(skb, 17))
974 return NETDEV_TX_OK;
975 skb->len = 17;
976 }
977
978 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
674} 979}
675 980
676static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 981static inline void igc_rx_hash(struct igc_ring *ring,
982 union igc_adv_rx_desc *rx_desc,
983 struct sk_buff *skb)
677{ 984{
678 return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; 985 if (ring->netdev->features & NETIF_F_RXHASH)
986 skb_set_hash(skb,
987 le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
988 PKT_HASH_TYPE_L3);
679} 989}
680 990
681static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 991/**
682 struct igc_rx_buffer *bi) 992 * igc_process_skb_fields - Populate skb header fields from Rx descriptor
993 * @rx_ring: rx descriptor ring packet is being transacted on
994 * @rx_desc: pointer to the EOP Rx descriptor
995 * @skb: pointer to current skb being populated
996 *
997 * This function checks the ring, descriptor, and packet information in
998 * order to populate the hash, checksum, VLAN, timestamp, protocol, and
999 * other fields within the skb.
1000 */
1001static void igc_process_skb_fields(struct igc_ring *rx_ring,
1002 union igc_adv_rx_desc *rx_desc,
1003 struct sk_buff *skb)
683{ 1004{
684 struct page *page = bi->page; 1005 igc_rx_hash(rx_ring, rx_desc, skb);
685 dma_addr_t dma;
686 1006
687 /* since we are recycling buffers we should seldom need to alloc */ 1007 skb_record_rx_queue(skb, rx_ring->queue_index);
688 if (likely(page))
689 return true;
690 1008
691 /* alloc new page for storage */ 1009 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
692 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 1010}
693 if (unlikely(!page)) { 1011
694 rx_ring->rx_stats.alloc_failed++; 1012static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
695 return false; 1013 const unsigned int size)
1014{
1015 struct igc_rx_buffer *rx_buffer;
1016
1017 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1018 prefetchw(rx_buffer->page);
1019
1020 /* we are reusing so sync this buffer for CPU use */
1021 dma_sync_single_range_for_cpu(rx_ring->dev,
1022 rx_buffer->dma,
1023 rx_buffer->page_offset,
1024 size,
1025 DMA_FROM_DEVICE);
1026
1027 rx_buffer->pagecnt_bias--;
1028
1029 return rx_buffer;
1030}
1031
1032/**
1033 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1034 * @rx_ring: rx descriptor ring to transact packets on
1035 * @rx_buffer: buffer containing page to add
1036 * @skb: sk_buff to place the data into
1037 * @size: size of buffer to be added
1038 *
1039 * This function will add the data contained in rx_buffer->page to the skb.
1040 */
1041static void igc_add_rx_frag(struct igc_ring *rx_ring,
1042 struct igc_rx_buffer *rx_buffer,
1043 struct sk_buff *skb,
1044 unsigned int size)
1045{
1046#if (PAGE_SIZE < 8192)
1047 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1048
1049 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1050 rx_buffer->page_offset, size, truesize);
1051 rx_buffer->page_offset ^= truesize;
1052#else
1053 unsigned int truesize = ring_uses_build_skb(rx_ring) ?
1054 SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1055 SKB_DATA_ALIGN(size);
1056 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1057 rx_buffer->page_offset, size, truesize);
1058 rx_buffer->page_offset += truesize;
1059#endif
1060}
1061
1062static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1063 struct igc_rx_buffer *rx_buffer,
1064 union igc_adv_rx_desc *rx_desc,
1065 unsigned int size)
1066{
1067 void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1068#if (PAGE_SIZE < 8192)
1069 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1070#else
1071 unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1072 SKB_DATA_ALIGN(IGC_SKB_PAD + size);
1073#endif
1074 struct sk_buff *skb;
1075
1076 /* prefetch first cache line of first page */
1077 prefetch(va);
1078#if L1_CACHE_BYTES < 128
1079 prefetch(va + L1_CACHE_BYTES);
1080#endif
1081
1082 /* build an skb around the page buffer */
1083 skb = build_skb(va - IGC_SKB_PAD, truesize);
1084 if (unlikely(!skb))
1085 return NULL;
1086
1087 /* update pointers within the skb to store the data */
1088 skb_reserve(skb, IGC_SKB_PAD);
1089 __skb_put(skb, size);
1090
1091 /* update buffer offset */
1092#if (PAGE_SIZE < 8192)
1093 rx_buffer->page_offset ^= truesize;
1094#else
1095 rx_buffer->page_offset += truesize;
1096#endif
1097
1098 return skb;
1099}
1100
1101static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1102 struct igc_rx_buffer *rx_buffer,
1103 union igc_adv_rx_desc *rx_desc,
1104 unsigned int size)
1105{
1106 void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1107#if (PAGE_SIZE < 8192)
1108 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1109#else
1110 unsigned int truesize = SKB_DATA_ALIGN(size);
1111#endif
1112 unsigned int headlen;
1113 struct sk_buff *skb;
1114
1115 /* prefetch first cache line of first page */
1116 prefetch(va);
1117#if L1_CACHE_BYTES < 128
1118 prefetch(va + L1_CACHE_BYTES);
1119#endif
1120
1121 /* allocate a skb to store the frags */
1122 skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
1123 if (unlikely(!skb))
1124 return NULL;
1125
1126 /* Determine available headroom for copy */
1127 headlen = size;
1128 if (headlen > IGC_RX_HDR_LEN)
1129 headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
1130
1131 /* align pull length to size of long to optimize memcpy performance */
1132 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
1133
1134 /* update all of the pointers */
1135 size -= headlen;
1136 if (size) {
1137 skb_add_rx_frag(skb, 0, rx_buffer->page,
1138 (va + headlen) - page_address(rx_buffer->page),
1139 size, truesize);
1140#if (PAGE_SIZE < 8192)
1141 rx_buffer->page_offset ^= truesize;
1142#else
1143 rx_buffer->page_offset += truesize;
1144#endif
1145 } else {
1146 rx_buffer->pagecnt_bias++;
696 } 1147 }
697 1148
698 /* map page for use */ 1149 return skb;
699 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 1150}
700 igc_rx_pg_size(rx_ring),
701 DMA_FROM_DEVICE,
702 IGC_RX_DMA_ATTR);
703 1151
704 /* if mapping failed free memory back to system since 1152/**
705 * there isn't much point in holding memory we can't use 1153 * igc_reuse_rx_page - page flip buffer and store it back on the ring
1154 * @rx_ring: rx descriptor ring to store buffers on
1155 * @old_buff: donor buffer to have page reused
1156 *
1157 * Synchronizes page for reuse by the adapter
1158 */
1159static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1160 struct igc_rx_buffer *old_buff)
1161{
1162 u16 nta = rx_ring->next_to_alloc;
1163 struct igc_rx_buffer *new_buff;
1164
1165 new_buff = &rx_ring->rx_buffer_info[nta];
1166
1167 /* update, and store next to alloc */
1168 nta++;
1169 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1170
1171 /* Transfer page from old buffer to new buffer.
1172 * Move each member individually to avoid possible store
1173 * forwarding stalls.
706 */ 1174 */
707 if (dma_mapping_error(rx_ring->dev, dma)) { 1175 new_buff->dma = old_buff->dma;
708 __free_page(page); 1176 new_buff->page = old_buff->page;
1177 new_buff->page_offset = old_buff->page_offset;
1178 new_buff->pagecnt_bias = old_buff->pagecnt_bias;
1179}
709 1180
710 rx_ring->rx_stats.alloc_failed++; 1181static inline bool igc_page_is_reserved(struct page *page)
1182{
1183 return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
1184}
1185
1186static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
1187{
1188 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1189 struct page *page = rx_buffer->page;
1190
1191 /* avoid re-using remote pages */
1192 if (unlikely(igc_page_is_reserved(page)))
1193 return false;
1194
1195#if (PAGE_SIZE < 8192)
1196 /* if we are only owner of page we can reuse it */
1197 if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
711 return false; 1198 return false;
1199#else
1200#define IGC_LAST_OFFSET \
1201 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1202
1203 if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1204 return false;
1205#endif
1206
1207 /* If we have drained the page fragment pool we need to update
1208 * the pagecnt_bias and page count so that we fully restock the
1209 * number of references the driver holds.
1210 */
1211 if (unlikely(!pagecnt_bias)) {
1212 page_ref_add(page, USHRT_MAX);
1213 rx_buffer->pagecnt_bias = USHRT_MAX;
712 } 1214 }
713 1215
714 bi->dma = dma; 1216 return true;
715 bi->page = page; 1217}
716 bi->page_offset = igc_rx_offset(rx_ring); 1218
717 bi->pagecnt_bias = 1; 1219/**
1220 * igc_is_non_eop - process handling of non-EOP buffers
1221 * @rx_ring: Rx ring being processed
1222 * @rx_desc: Rx descriptor for current buffer
1223 * @skb: current socket buffer containing buffer in progress
1224 *
1225 * This function updates next to clean. If the buffer is an EOP buffer
1226 * this function exits returning false, otherwise it will place the
1227 * sk_buff in the next buffer to be chained and return true indicating
1228 * that this is in fact a non-EOP buffer.
1229 */
1230static bool igc_is_non_eop(struct igc_ring *rx_ring,
1231 union igc_adv_rx_desc *rx_desc)
1232{
1233 u32 ntc = rx_ring->next_to_clean + 1;
1234
1235 /* fetch, update, and store next to clean */
1236 ntc = (ntc < rx_ring->count) ? ntc : 0;
1237 rx_ring->next_to_clean = ntc;
1238
1239 prefetch(IGC_RX_DESC(rx_ring, ntc));
1240
1241 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1242 return false;
718 1243
719 return true; 1244 return true;
720} 1245}
721 1246
722/** 1247/**
1248 * igc_cleanup_headers - Correct corrupted or empty headers
1249 * @rx_ring: rx descriptor ring packet is being transacted on
1250 * @rx_desc: pointer to the EOP Rx descriptor
1251 * @skb: pointer to current skb being fixed
1252 *
1253 * Address the case where we are pulling data in on pages only
1254 * and as such no data is present in the skb header.
1255 *
1256 * In addition if skb is not at least 60 bytes we need to pad it so that
1257 * it is large enough to qualify as a valid Ethernet frame.
1258 *
1259 * Returns true if an error was encountered and skb was freed.
1260 */
1261static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1262 union igc_adv_rx_desc *rx_desc,
1263 struct sk_buff *skb)
1264{
1265 if (unlikely((igc_test_staterr(rx_desc,
1266 IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
1267 struct net_device *netdev = rx_ring->netdev;
1268
1269 if (!(netdev->features & NETIF_F_RXALL)) {
1270 dev_kfree_skb_any(skb);
1271 return true;
1272 }
1273 }
1274
1275 /* if eth_skb_pad returns an error the skb was freed */
1276 if (eth_skb_pad(skb))
1277 return true;
1278
1279 return false;
1280}
1281
1282static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1283 struct igc_rx_buffer *rx_buffer)
1284{
1285 if (igc_can_reuse_rx_page(rx_buffer)) {
1286 /* hand second half of page back to the ring */
1287 igc_reuse_rx_page(rx_ring, rx_buffer);
1288 } else {
1289 /* We are not reusing the buffer so unmap it and free
1290 * any references we are holding to it
1291 */
1292 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1293 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1294 IGC_RX_DMA_ATTR);
1295 __page_frag_cache_drain(rx_buffer->page,
1296 rx_buffer->pagecnt_bias);
1297 }
1298
1299 /* clear contents of rx_buffer */
1300 rx_buffer->page = NULL;
1301}
1302
1303/**
723 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 1304 * igc_alloc_rx_buffers - Replace used receive buffers; packet split
724 * @adapter: address of board private structure 1305 * @adapter: address of board private structure
725 */ 1306 */
@@ -788,6 +1369,314 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
788 } 1369 }
789} 1370}
790 1371
1372static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
1373{
1374 unsigned int total_bytes = 0, total_packets = 0;
1375 struct igc_ring *rx_ring = q_vector->rx.ring;
1376 struct sk_buff *skb = rx_ring->skb;
1377 u16 cleaned_count = igc_desc_unused(rx_ring);
1378
1379 while (likely(total_packets < budget)) {
1380 union igc_adv_rx_desc *rx_desc;
1381 struct igc_rx_buffer *rx_buffer;
1382 unsigned int size;
1383
1384 /* return some buffers to hardware, one at a time is too slow */
1385 if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
1386 igc_alloc_rx_buffers(rx_ring, cleaned_count);
1387 cleaned_count = 0;
1388 }
1389
1390 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
1391 size = le16_to_cpu(rx_desc->wb.upper.length);
1392 if (!size)
1393 break;
1394
1395 /* This memory barrier is needed to keep us from reading
1396 * any other fields out of the rx_desc until we know the
1397 * descriptor has been written back
1398 */
1399 dma_rmb();
1400
1401 rx_buffer = igc_get_rx_buffer(rx_ring, size);
1402
1403 /* retrieve a buffer from the ring */
1404 if (skb)
1405 igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
1406 else if (ring_uses_build_skb(rx_ring))
1407 skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
1408 else
1409 skb = igc_construct_skb(rx_ring, rx_buffer,
1410 rx_desc, size);
1411
1412 /* exit if we failed to retrieve a buffer */
1413 if (!skb) {
1414 rx_ring->rx_stats.alloc_failed++;
1415 rx_buffer->pagecnt_bias++;
1416 break;
1417 }
1418
1419 igc_put_rx_buffer(rx_ring, rx_buffer);
1420 cleaned_count++;
1421
1422 /* fetch next buffer in frame if non-eop */
1423 if (igc_is_non_eop(rx_ring, rx_desc))
1424 continue;
1425
1426 /* verify the packet layout is correct */
1427 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
1428 skb = NULL;
1429 continue;
1430 }
1431
1432 /* probably a little skewed due to removing CRC */
1433 total_bytes += skb->len;
1434
1435 /* populate checksum, timestamp, VLAN, and protocol */
1436 igc_process_skb_fields(rx_ring, rx_desc, skb);
1437
1438 napi_gro_receive(&q_vector->napi, skb);
1439
1440 /* reset skb pointer */
1441 skb = NULL;
1442
1443 /* update budget accounting */
1444 total_packets++;
1445 }
1446
1447 /* place incomplete frames back on ring for completion */
1448 rx_ring->skb = skb;
1449
1450 u64_stats_update_begin(&rx_ring->rx_syncp);
1451 rx_ring->rx_stats.packets += total_packets;
1452 rx_ring->rx_stats.bytes += total_bytes;
1453 u64_stats_update_end(&rx_ring->rx_syncp);
1454 q_vector->rx.total_packets += total_packets;
1455 q_vector->rx.total_bytes += total_bytes;
1456
1457 if (cleaned_count)
1458 igc_alloc_rx_buffers(rx_ring, cleaned_count);
1459
1460 return total_packets;
1461}
1462
1463static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1464{
1465 return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
1466}
1467
1468static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1469 struct igc_rx_buffer *bi)
1470{
1471 struct page *page = bi->page;
1472 dma_addr_t dma;
1473
1474 /* since we are recycling buffers we should seldom need to alloc */
1475 if (likely(page))
1476 return true;
1477
1478 /* alloc new page for storage */
1479 page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1480 if (unlikely(!page)) {
1481 rx_ring->rx_stats.alloc_failed++;
1482 return false;
1483 }
1484
1485 /* map page for use */
1486 dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1487 igc_rx_pg_size(rx_ring),
1488 DMA_FROM_DEVICE,
1489 IGC_RX_DMA_ATTR);
1490
1491 /* if mapping failed free memory back to system since
1492 * there isn't much point in holding memory we can't use
1493 */
1494 if (dma_mapping_error(rx_ring->dev, dma)) {
1495 __free_page(page);
1496
1497 rx_ring->rx_stats.alloc_failed++;
1498 return false;
1499 }
1500
1501 bi->dma = dma;
1502 bi->page = page;
1503 bi->page_offset = igc_rx_offset(rx_ring);
1504 bi->pagecnt_bias = 1;
1505
1506 return true;
1507}
1508
1509/**
1510 * igc_clean_tx_irq - Reclaim resources after transmit completes
1511 * @q_vector: pointer to q_vector containing needed info
1512 * @napi_budget: Used to determine if we are in netpoll
1513 *
1514 * returns true if ring is completely cleaned
1515 */
1516static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
1517{
1518 struct igc_adapter *adapter = q_vector->adapter;
1519 unsigned int total_bytes = 0, total_packets = 0;
1520 unsigned int budget = q_vector->tx.work_limit;
1521 struct igc_ring *tx_ring = q_vector->tx.ring;
1522 unsigned int i = tx_ring->next_to_clean;
1523 struct igc_tx_buffer *tx_buffer;
1524 union igc_adv_tx_desc *tx_desc;
1525
1526 if (test_bit(__IGC_DOWN, &adapter->state))
1527 return true;
1528
1529 tx_buffer = &tx_ring->tx_buffer_info[i];
1530 tx_desc = IGC_TX_DESC(tx_ring, i);
1531 i -= tx_ring->count;
1532
1533 do {
1534 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
1535
1536 /* if next_to_watch is not set then there is no work pending */
1537 if (!eop_desc)
1538 break;
1539
1540 /* prevent any other reads prior to eop_desc */
1541 smp_rmb();
1542
1543 /* if DD is not set pending work has not been completed */
1544 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
1545 break;
1546
1547 /* clear next_to_watch to prevent false hangs */
1548 tx_buffer->next_to_watch = NULL;
1549
1550 /* update the statistics for this packet */
1551 total_bytes += tx_buffer->bytecount;
1552 total_packets += tx_buffer->gso_segs;
1553
1554 /* free the skb */
1555 napi_consume_skb(tx_buffer->skb, napi_budget);
1556
1557 /* unmap skb header data */
1558 dma_unmap_single(tx_ring->dev,
1559 dma_unmap_addr(tx_buffer, dma),
1560 dma_unmap_len(tx_buffer, len),
1561 DMA_TO_DEVICE);
1562
1563 /* clear tx_buffer data */
1564 dma_unmap_len_set(tx_buffer, len, 0);
1565
1566 /* clear last DMA location and unmap remaining buffers */
1567 while (tx_desc != eop_desc) {
1568 tx_buffer++;
1569 tx_desc++;
1570 i++;
1571 if (unlikely(!i)) {
1572 i -= tx_ring->count;
1573 tx_buffer = tx_ring->tx_buffer_info;
1574 tx_desc = IGC_TX_DESC(tx_ring, 0);
1575 }
1576
1577 /* unmap any remaining paged data */
1578 if (dma_unmap_len(tx_buffer, len)) {
1579 dma_unmap_page(tx_ring->dev,
1580 dma_unmap_addr(tx_buffer, dma),
1581 dma_unmap_len(tx_buffer, len),
1582 DMA_TO_DEVICE);
1583 dma_unmap_len_set(tx_buffer, len, 0);
1584 }
1585 }
1586
1587 /* move us one more past the eop_desc for start of next pkt */
1588 tx_buffer++;
1589 tx_desc++;
1590 i++;
1591 if (unlikely(!i)) {
1592 i -= tx_ring->count;
1593 tx_buffer = tx_ring->tx_buffer_info;
1594 tx_desc = IGC_TX_DESC(tx_ring, 0);
1595 }
1596
1597 /* issue prefetch for next Tx descriptor */
1598 prefetch(tx_desc);
1599
1600 /* update budget accounting */
1601 budget--;
1602 } while (likely(budget));
1603
1604 netdev_tx_completed_queue(txring_txq(tx_ring),
1605 total_packets, total_bytes);
1606
1607 i += tx_ring->count;
1608 tx_ring->next_to_clean = i;
1609 u64_stats_update_begin(&tx_ring->tx_syncp);
1610 tx_ring->tx_stats.bytes += total_bytes;
1611 tx_ring->tx_stats.packets += total_packets;
1612 u64_stats_update_end(&tx_ring->tx_syncp);
1613 q_vector->tx.total_bytes += total_bytes;
1614 q_vector->tx.total_packets += total_packets;
1615
1616 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
1617 struct igc_hw *hw = &adapter->hw;
1618
1619 /* Detect a transmit hang in hardware, this serializes the
1620 * check with the clearing of time_stamp and movement of i
1621 */
1622 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
1623 if (tx_buffer->next_to_watch &&
1624 time_after(jiffies, tx_buffer->time_stamp +
1625 (adapter->tx_timeout_factor * HZ)) &&
1626 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
1627 /* detected Tx unit hang */
1628 dev_err(tx_ring->dev,
1629 "Detected Tx Unit Hang\n"
1630 " Tx Queue <%d>\n"
1631 " TDH <%x>\n"
1632 " TDT <%x>\n"
1633 " next_to_use <%x>\n"
1634 " next_to_clean <%x>\n"
1635 "buffer_info[next_to_clean]\n"
1636 " time_stamp <%lx>\n"
1637 " next_to_watch <%p>\n"
1638 " jiffies <%lx>\n"
1639 " desc.status <%x>\n",
1640 tx_ring->queue_index,
1641 rd32(IGC_TDH(tx_ring->reg_idx)),
1642 readl(tx_ring->tail),
1643 tx_ring->next_to_use,
1644 tx_ring->next_to_clean,
1645 tx_buffer->time_stamp,
1646 tx_buffer->next_to_watch,
1647 jiffies,
1648 tx_buffer->next_to_watch->wb.status);
1649 netif_stop_subqueue(tx_ring->netdev,
1650 tx_ring->queue_index);
1651
1652 /* we are about to reset, no point in enabling stuff */
1653 return true;
1654 }
1655 }
1656
1657#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
1658 if (unlikely(total_packets &&
1659 netif_carrier_ok(tx_ring->netdev) &&
1660 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
1661 /* Make sure that anybody stopping the queue after this
1662 * sees the new next_to_clean.
1663 */
1664 smp_mb();
1665 if (__netif_subqueue_stopped(tx_ring->netdev,
1666 tx_ring->queue_index) &&
1667 !(test_bit(__IGC_DOWN, &adapter->state))) {
1668 netif_wake_subqueue(tx_ring->netdev,
1669 tx_ring->queue_index);
1670
1671 u64_stats_update_begin(&tx_ring->tx_syncp);
1672 tx_ring->tx_stats.restart_queue++;
1673 u64_stats_update_end(&tx_ring->tx_syncp);
1674 }
1675 }
1676
1677 return !!budget;
1678}
1679
791/** 1680/**
792 * igc_ioctl - I/O control method 1681 * igc_ioctl - I/O control method
793 * @netdev: network interface device structure 1682 * @netdev: network interface device structure
@@ -842,6 +1731,10 @@ static void igc_update_stats(struct igc_adapter *adapter)
842{ 1731{
843} 1732}
844 1733
1734static void igc_nfc_filter_exit(struct igc_adapter *adapter)
1735{
1736}
1737
845/** 1738/**
846 * igc_down - Close the interface 1739 * igc_down - Close the interface
847 * @adapter: board private structure 1740 * @adapter: board private structure
@@ -849,21 +1742,83 @@ static void igc_update_stats(struct igc_adapter *adapter)
849static void igc_down(struct igc_adapter *adapter) 1742static void igc_down(struct igc_adapter *adapter)
850{ 1743{
851 struct net_device *netdev = adapter->netdev; 1744 struct net_device *netdev = adapter->netdev;
1745 struct igc_hw *hw = &adapter->hw;
1746 u32 tctl, rctl;
852 int i = 0; 1747 int i = 0;
853 1748
854 set_bit(__IGC_DOWN, &adapter->state); 1749 set_bit(__IGC_DOWN, &adapter->state);
855 1750
1751 /* disable receives in the hardware */
1752 rctl = rd32(IGC_RCTL);
1753 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
1754 /* flush and sleep below */
1755
1756 igc_nfc_filter_exit(adapter);
1757
856 /* set trans_start so we don't get spurious watchdogs during reset */ 1758 /* set trans_start so we don't get spurious watchdogs during reset */
857 netif_trans_update(netdev); 1759 netif_trans_update(netdev);
858 1760
859 netif_carrier_off(netdev); 1761 netif_carrier_off(netdev);
860 netif_tx_stop_all_queues(netdev); 1762 netif_tx_stop_all_queues(netdev);
861 1763
862 for (i = 0; i < adapter->num_q_vectors; i++) 1764 /* disable transmits in the hardware */
863 napi_disable(&adapter->q_vector[i]->napi); 1765 tctl = rd32(IGC_TCTL);
1766 tctl &= ~IGC_TCTL_EN;
1767 wr32(IGC_TCTL, tctl);
1768 /* flush both disables and wait for them to finish */
1769 wrfl();
1770 usleep_range(10000, 20000);
1771
1772 igc_irq_disable(adapter);
1773
1774 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
1775
1776 for (i = 0; i < adapter->num_q_vectors; i++) {
1777 if (adapter->q_vector[i]) {
1778 napi_synchronize(&adapter->q_vector[i]->napi);
1779 napi_disable(&adapter->q_vector[i]->napi);
1780 }
1781 }
1782
1783 del_timer_sync(&adapter->watchdog_timer);
1784 del_timer_sync(&adapter->phy_info_timer);
1785
1786 /* record the stats before reset*/
1787 spin_lock(&adapter->stats64_lock);
1788 igc_update_stats(adapter);
1789 spin_unlock(&adapter->stats64_lock);
864 1790
865 adapter->link_speed = 0; 1791 adapter->link_speed = 0;
866 adapter->link_duplex = 0; 1792 adapter->link_duplex = 0;
1793
1794 if (!pci_channel_offline(adapter->pdev))
1795 igc_reset(adapter);
1796
1797 /* clear VLAN promisc flag so VFTA will be updated if necessary */
1798 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
1799
1800 igc_clean_all_tx_rings(adapter);
1801 igc_clean_all_rx_rings(adapter);
1802}
1803
1804static void igc_reinit_locked(struct igc_adapter *adapter)
1805{
1806 WARN_ON(in_interrupt());
1807 while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
1808 usleep_range(1000, 2000);
1809 igc_down(adapter);
1810 igc_up(adapter);
1811 clear_bit(__IGC_RESETTING, &adapter->state);
1812}
1813
1814static void igc_reset_task(struct work_struct *work)
1815{
1816 struct igc_adapter *adapter;
1817
1818 adapter = container_of(work, struct igc_adapter, reset_task);
1819
1820 netdev_err(adapter->netdev, "Reset adapter\n");
1821 igc_reinit_locked(adapter);
867} 1822}
868 1823
869/** 1824/**
@@ -1321,6 +2276,15 @@ static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
1321} 2276}
1322 2277
1323/** 2278/**
2279 * igc_watchdog - Timer Call-back
2280 * @data: pointer to adapter cast into an unsigned long
2281 */
2282static void igc_watchdog(struct timer_list *t)
2283{
2284 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
2285}
2286
2287/**
1324 * igc_update_ring_itr - update the dynamic ITR value based on packet size 2288 * igc_update_ring_itr - update the dynamic ITR value based on packet size
1325 * @q_vector: pointer to q_vector 2289 * @q_vector: pointer to q_vector
1326 * 2290 *
@@ -1637,9 +2601,13 @@ static int igc_poll(struct napi_struct *napi, int budget)
1637 napi); 2601 napi);
1638 bool clean_complete = true; 2602 bool clean_complete = true;
1639 int work_done = 0; 2603 int work_done = 0;
1640 int cleaned = 0; 2604
2605 if (q_vector->tx.ring)
2606 clean_complete = igc_clean_tx_irq(q_vector, budget);
1641 2607
1642 if (q_vector->rx.ring) { 2608 if (q_vector->rx.ring) {
2609 int cleaned = igc_clean_rx_irq(q_vector, budget);
2610
1643 work_done += cleaned; 2611 work_done += cleaned;
1644 if (cleaned >= budget) 2612 if (cleaned >= budget)
1645 clean_complete = false; 2613 clean_complete = false;
@@ -2403,6 +3371,14 @@ static int igc_probe(struct pci_dev *pdev,
2403 netdev->min_mtu = ETH_MIN_MTU; 3371 netdev->min_mtu = ETH_MIN_MTU;
2404 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 3372 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
2405 3373
3374 /* configure RXPBSIZE and TXPBSIZE */
3375 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
3376 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
3377
3378 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
3379
3380 INIT_WORK(&adapter->reset_task, igc_reset_task);
3381
2406 /* reset the hardware with the new settings */ 3382 /* reset the hardware with the new settings */
2407 igc_reset(adapter); 3383 igc_reset(adapter);
2408 3384
@@ -2456,7 +3432,10 @@ static void igc_remove(struct pci_dev *pdev)
2456 struct igc_adapter *adapter = netdev_priv(netdev); 3432 struct igc_adapter *adapter = netdev_priv(netdev);
2457 3433
2458 set_bit(__IGC_DOWN, &adapter->state); 3434 set_bit(__IGC_DOWN, &adapter->state);
2459 flush_scheduled_work(); 3435
3436 del_timer_sync(&adapter->watchdog_timer);
3437
3438 cancel_work_sync(&adapter->reset_task);
2460 3439
2461 /* Release control of h/w to f/w. If f/w is AMT enabled, this 3440 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2462 * would have already happened in close and is redundant. 3441 * would have already happened in close and is redundant.
@@ -2464,10 +3443,16 @@ static void igc_remove(struct pci_dev *pdev)
2464 igc_release_hw_control(adapter); 3443 igc_release_hw_control(adapter);
2465 unregister_netdev(netdev); 3444 unregister_netdev(netdev);
2466 3445
2467 pci_release_selected_regions(pdev, 3446 igc_clear_interrupt_scheme(adapter);
2468 pci_select_bars(pdev, IORESOURCE_MEM)); 3447 pci_iounmap(pdev, adapter->io_addr);
3448 pci_release_mem_regions(pdev);
2469 3449
3450 kfree(adapter->mac_table);
3451 kfree(adapter->shadow_vfta);
2470 free_netdev(netdev); 3452 free_netdev(netdev);
3453
3454 pci_disable_pcie_error_reporting(pdev);
3455
2471 pci_disable_device(pdev); 3456 pci_disable_device(pdev);
2472} 3457}
2473 3458
@@ -2478,6 +3463,39 @@ static struct pci_driver igc_driver = {
2478 .remove = igc_remove, 3463 .remove = igc_remove,
2479}; 3464};
2480 3465
3466static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3467 const u32 max_rss_queues)
3468{
3469 /* Determine if we need to pair queues. */
3470 /* If rss_queues > half of max_rss_queues, pair the queues in
3471 * order to conserve interrupts due to limited supply.
3472 */
3473 if (adapter->rss_queues > (max_rss_queues / 2))
3474 adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3475 else
3476 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3477}
3478
3479static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3480{
3481 unsigned int max_rss_queues;
3482
3483 /* Determine the maximum number of RSS queues supported. */
3484 max_rss_queues = IGC_MAX_RX_QUEUES;
3485
3486 return max_rss_queues;
3487}
3488
3489static void igc_init_queue_configuration(struct igc_adapter *adapter)
3490{
3491 u32 max_rss_queues;
3492
3493 max_rss_queues = igc_get_max_rss_queues(adapter);
3494 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3495
3496 igc_set_flag_queue_pairs(adapter, max_rss_queues);
3497}
3498
2481/** 3499/**
2482 * igc_sw_init - Initialize general software structures (struct igc_adapter) 3500 * igc_sw_init - Initialize general software structures (struct igc_adapter)
2483 * @adapter: board private structure to initialize 3501 * @adapter: board private structure to initialize
@@ -2492,21 +3510,38 @@ static int igc_sw_init(struct igc_adapter *adapter)
2492 struct pci_dev *pdev = adapter->pdev; 3510 struct pci_dev *pdev = adapter->pdev;
2493 struct igc_hw *hw = &adapter->hw; 3511 struct igc_hw *hw = &adapter->hw;
2494 3512
2495 /* PCI config space info */ 3513 int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
2496 3514
2497 hw->vendor_id = pdev->vendor; 3515 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2498 hw->device_id = pdev->device;
2499 hw->subsystem_vendor_id = pdev->subsystem_vendor;
2500 hw->subsystem_device_id = pdev->subsystem_device;
2501 3516
2502 pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); 3517 /* set default ring sizes */
3518 adapter->tx_ring_count = IGC_DEFAULT_TXD;
3519 adapter->rx_ring_count = IGC_DEFAULT_RXD;
2503 3520
2504 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 3521 /* set default ITR values */
3522 adapter->rx_itr_setting = IGC_DEFAULT_ITR;
3523 adapter->tx_itr_setting = IGC_DEFAULT_ITR;
3524
3525 /* set default work limits */
3526 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
2505 3527
2506 /* adjust max frame to be at least the size of a standard frame */ 3528 /* adjust max frame to be at least the size of a standard frame */
2507 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 3529 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2508 VLAN_HLEN; 3530 VLAN_HLEN;
3531 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
3532
3533 spin_lock_init(&adapter->nfc_lock);
3534 spin_lock_init(&adapter->stats64_lock);
3535 /* Assume MSI-X interrupts, will be checked during IRQ allocation */
3536 adapter->flags |= IGC_FLAG_HAS_MSIX;
3537
3538 adapter->mac_table = kzalloc(size, GFP_ATOMIC);
3539 if (!adapter->mac_table)
3540 return -ENOMEM;
3541
3542 igc_init_queue_configuration(adapter);
2509 3543
3544 /* This call may decrease the number of queues */
2510 if (igc_init_interrupt_scheme(adapter, true)) { 3545 if (igc_init_interrupt_scheme(adapter, true)) {
2511 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 3546 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2512 return -ENOMEM; 3547 return -ENOMEM;