aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-08-24 16:35:43 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-24 16:35:43 -0400
commit255e87657a84e21986e5d9070f3dee4aa8d1d531 (patch)
treec20e26ce24779333d927fa4b172deb2d9df59ebf
parent85c21049fc588e0a51b443fe2bad348d18f4992c (diff)
parent8f8b3d518999fd1c342310910aa1e49112c86d05 (diff)
Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next
Ben Hutchings says: ==================== 1. Change the TX path to stop queues earlier and avoid returning NETDEV_TX_BUSY. 2. Remove some inefficiencies in soft-TSO. 3. Fix various bugs involving device state transitions and/or reset scheduling by error handlers. 4. Take advantage of my previous change to operstate initialisation. 5. Miscellaneous cleanup. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/sfc/efx.c235
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon_boards.c2
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h49
-rw-r--r--drivers/net/ethernet/sfc/nic.c6
-rw-r--r--drivers/net/ethernet/sfc/tx.c621
6 files changed, 410 insertions, 507 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 65a8d49106a4..a606db43c5ba 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx);
202 202
203#define EFX_ASSERT_RESET_SERIALISED(efx) \ 203#define EFX_ASSERT_RESET_SERIALISED(efx) \
204 do { \ 204 do { \
205 if ((efx->state == STATE_RUNNING) || \ 205 if ((efx->state == STATE_READY) || \
206 (efx->state == STATE_DISABLED)) \ 206 (efx->state == STATE_DISABLED)) \
207 ASSERT_RTNL(); \ 207 ASSERT_RTNL(); \
208 } while (0) 208 } while (0)
209 209
210static int efx_check_disabled(struct efx_nic *efx)
211{
212 if (efx->state == STATE_DISABLED) {
213 netif_err(efx, drv, efx->net_dev,
214 "device is disabled due to earlier errors\n");
215 return -EIO;
216 }
217 return 0;
218}
219
210/************************************************************************** 220/**************************************************************************
211 * 221 *
212 * Event queue processing 222 * Event queue processing
@@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx)
630 efx->rx_buffer_order = get_order(efx->rx_buffer_len + 640 efx->rx_buffer_order = get_order(efx->rx_buffer_len +
631 sizeof(struct efx_rx_page_state)); 641 sizeof(struct efx_rx_page_state));
632 642
643 /* We must keep at least one descriptor in a TX ring empty.
644 * We could avoid this when the queue size does not exactly
645 * match the hardware ring size, but it's not that important.
646 * Therefore we stop the queue when one more skb might fill
647 * the ring completely. We wake it when half way back to
648 * empty.
649 */
650 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
651 efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
652
633 /* Initialise the channels */ 653 /* Initialise the channels */
634 efx_for_each_channel(channel, efx) { 654 efx_for_each_channel(channel, efx) {
635 efx_for_each_channel_tx_queue(tx_queue, channel) 655 efx_for_each_channel_tx_queue(tx_queue, channel)
@@ -730,7 +750,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
730 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 750 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
731 u32 old_rxq_entries, old_txq_entries; 751 u32 old_rxq_entries, old_txq_entries;
732 unsigned i, next_buffer_table = 0; 752 unsigned i, next_buffer_table = 0;
733 int rc = 0; 753 int rc;
754
755 rc = efx_check_disabled(efx);
756 if (rc)
757 return rc;
734 758
735 /* Not all channels should be reallocated. We must avoid 759 /* Not all channels should be reallocated. We must avoid
736 * reallocating their buffer table entries. 760 * reallocating their buffer table entries.
@@ -1365,6 +1389,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq)
1365{ 1389{
1366 struct efx_channel *channel; 1390 struct efx_channel *channel;
1367 1391
1392 BUG_ON(efx->state == STATE_DISABLED);
1393
1368 if (efx->legacy_irq) 1394 if (efx->legacy_irq)
1369 efx->legacy_irq_enabled = true; 1395 efx->legacy_irq_enabled = true;
1370 efx_nic_enable_interrupts(efx); 1396 efx_nic_enable_interrupts(efx);
@@ -1382,6 +1408,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq)
1382{ 1408{
1383 struct efx_channel *channel; 1409 struct efx_channel *channel;
1384 1410
1411 if (efx->state == STATE_DISABLED)
1412 return;
1413
1385 efx_mcdi_mode_poll(efx); 1414 efx_mcdi_mode_poll(efx);
1386 1415
1387 efx_nic_disable_interrupts(efx); 1416 efx_nic_disable_interrupts(efx);
@@ -1533,22 +1562,21 @@ static int efx_probe_all(struct efx_nic *efx)
1533 return rc; 1562 return rc;
1534} 1563}
1535 1564
1536/* Called after previous invocation(s) of efx_stop_all, restarts the port, 1565/* If the interface is supposed to be running but is not, start
1537 * kernel transmit queues and NAPI processing, and ensures that the port is 1566 * the hardware and software data path, regular activity for the port
1538 * scheduled to be reconfigured. This function is safe to call multiple 1567 * (MAC statistics, link polling, etc.) and schedule the port to be
1539 * times when the NIC is in any state. 1568 * reconfigured. Interrupts must already be enabled. This function
1569 * is safe to call multiple times, so long as the NIC is not disabled.
1570 * Requires the RTNL lock.
1540 */ 1571 */
1541static void efx_start_all(struct efx_nic *efx) 1572static void efx_start_all(struct efx_nic *efx)
1542{ 1573{
1543 EFX_ASSERT_RESET_SERIALISED(efx); 1574 EFX_ASSERT_RESET_SERIALISED(efx);
1575 BUG_ON(efx->state == STATE_DISABLED);
1544 1576
1545 /* Check that it is appropriate to restart the interface. All 1577 /* Check that it is appropriate to restart the interface. All
1546 * of these flags are safe to read under just the rtnl lock */ 1578 * of these flags are safe to read under just the rtnl lock */
1547 if (efx->port_enabled) 1579 if (efx->port_enabled || !netif_running(efx->net_dev))
1548 return;
1549 if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT))
1550 return;
1551 if (!netif_running(efx->net_dev))
1552 return; 1580 return;
1553 1581
1554 efx_start_port(efx); 1582 efx_start_port(efx);
@@ -1582,11 +1610,11 @@ static void efx_flush_all(struct efx_nic *efx)
1582 cancel_work_sync(&efx->mac_work); 1610 cancel_work_sync(&efx->mac_work);
1583} 1611}
1584 1612
1585/* Quiesce hardware and software without bringing the link down. 1613/* Quiesce the hardware and software data path, and regular activity
1586 * Safe to call multiple times, when the nic and interface is in any 1614 * for the port without bringing the link down. Safe to call multiple
1587 * state. The caller is guaranteed to subsequently be in a position 1615 * times with the NIC in almost any state, but interrupts should be
1588 * to modify any hardware and software state they see fit without 1616 * enabled. Requires the RTNL lock.
1589 * taking locks. */ 1617 */
1590static void efx_stop_all(struct efx_nic *efx) 1618static void efx_stop_all(struct efx_nic *efx)
1591{ 1619{
1592 EFX_ASSERT_RESET_SERIALISED(efx); 1620 EFX_ASSERT_RESET_SERIALISED(efx);
@@ -1739,8 +1767,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
1739 struct efx_nic *efx = netdev_priv(net_dev); 1767 struct efx_nic *efx = netdev_priv(net_dev);
1740 struct mii_ioctl_data *data = if_mii(ifr); 1768 struct mii_ioctl_data *data = if_mii(ifr);
1741 1769
1742 EFX_ASSERT_RESET_SERIALISED(efx);
1743
1744 /* Convert phy_id from older PRTAD/DEVAD format */ 1770 /* Convert phy_id from older PRTAD/DEVAD format */
1745 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 1771 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
1746 (data->phy_id & 0xfc00) == 0x0400) 1772 (data->phy_id & 0xfc00) == 0x0400)
@@ -1820,13 +1846,14 @@ static void efx_netpoll(struct net_device *net_dev)
1820static int efx_net_open(struct net_device *net_dev) 1846static int efx_net_open(struct net_device *net_dev)
1821{ 1847{
1822 struct efx_nic *efx = netdev_priv(net_dev); 1848 struct efx_nic *efx = netdev_priv(net_dev);
1823 EFX_ASSERT_RESET_SERIALISED(efx); 1849 int rc;
1824 1850
1825 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 1851 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
1826 raw_smp_processor_id()); 1852 raw_smp_processor_id());
1827 1853
1828 if (efx->state == STATE_DISABLED) 1854 rc = efx_check_disabled(efx);
1829 return -EIO; 1855 if (rc)
1856 return rc;
1830 if (efx->phy_mode & PHY_MODE_SPECIAL) 1857 if (efx->phy_mode & PHY_MODE_SPECIAL)
1831 return -EBUSY; 1858 return -EBUSY;
1832 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 1859 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
@@ -1852,10 +1879,8 @@ static int efx_net_stop(struct net_device *net_dev)
1852 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 1879 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
1853 raw_smp_processor_id()); 1880 raw_smp_processor_id());
1854 1881
1855 if (efx->state != STATE_DISABLED) { 1882 /* Stop the device and flush all the channels */
1856 /* Stop the device and flush all the channels */ 1883 efx_stop_all(efx);
1857 efx_stop_all(efx);
1858 }
1859 1884
1860 return 0; 1885 return 0;
1861} 1886}
@@ -1915,9 +1940,11 @@ static void efx_watchdog(struct net_device *net_dev)
1915static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 1940static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
1916{ 1941{
1917 struct efx_nic *efx = netdev_priv(net_dev); 1942 struct efx_nic *efx = netdev_priv(net_dev);
1943 int rc;
1918 1944
1919 EFX_ASSERT_RESET_SERIALISED(efx); 1945 rc = efx_check_disabled(efx);
1920 1946 if (rc)
1947 return rc;
1921 if (new_mtu > EFX_MAX_MTU) 1948 if (new_mtu > EFX_MAX_MTU)
1922 return -EINVAL; 1949 return -EINVAL;
1923 1950
@@ -1926,8 +1953,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
1926 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 1953 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
1927 1954
1928 mutex_lock(&efx->mac_lock); 1955 mutex_lock(&efx->mac_lock);
1929 /* Reconfigure the MAC before enabling the dma queues so that
1930 * the RX buffers don't overflow */
1931 net_dev->mtu = new_mtu; 1956 net_dev->mtu = new_mtu;
1932 efx->type->reconfigure_mac(efx); 1957 efx->type->reconfigure_mac(efx);
1933 mutex_unlock(&efx->mac_lock); 1958 mutex_unlock(&efx->mac_lock);
@@ -1942,8 +1967,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
1942 struct sockaddr *addr = data; 1967 struct sockaddr *addr = data;
1943 char *new_addr = addr->sa_data; 1968 char *new_addr = addr->sa_data;
1944 1969
1945 EFX_ASSERT_RESET_SERIALISED(efx);
1946
1947 if (!is_valid_ether_addr(new_addr)) { 1970 if (!is_valid_ether_addr(new_addr)) {
1948 netif_err(efx, drv, efx->net_dev, 1971 netif_err(efx, drv, efx->net_dev,
1949 "invalid ethernet MAC address requested: %pM\n", 1972 "invalid ethernet MAC address requested: %pM\n",
@@ -2079,11 +2102,27 @@ static int efx_register_netdev(struct efx_nic *efx)
2079 2102
2080 rtnl_lock(); 2103 rtnl_lock();
2081 2104
2105 /* Enable resets to be scheduled and check whether any were
2106 * already requested. If so, the NIC is probably hosed so we
2107 * abort.
2108 */
2109 efx->state = STATE_READY;
2110 smp_mb(); /* ensure we change state before checking reset_pending */
2111 if (efx->reset_pending) {
2112 netif_err(efx, probe, efx->net_dev,
2113 "aborting probe due to scheduled reset\n");
2114 rc = -EIO;
2115 goto fail_locked;
2116 }
2117
2082 rc = dev_alloc_name(net_dev, net_dev->name); 2118 rc = dev_alloc_name(net_dev, net_dev->name);
2083 if (rc < 0) 2119 if (rc < 0)
2084 goto fail_locked; 2120 goto fail_locked;
2085 efx_update_name(efx); 2121 efx_update_name(efx);
2086 2122
2123 /* Always start with carrier off; PHY events will detect the link */
2124 netif_carrier_off(net_dev);
2125
2087 rc = register_netdevice(net_dev); 2126 rc = register_netdevice(net_dev);
2088 if (rc) 2127 if (rc)
2089 goto fail_locked; 2128 goto fail_locked;
@@ -2094,9 +2133,6 @@ static int efx_register_netdev(struct efx_nic *efx)
2094 efx_init_tx_queue_core_txq(tx_queue); 2133 efx_init_tx_queue_core_txq(tx_queue);
2095 } 2134 }
2096 2135
2097 /* Always start with carrier off; PHY events will detect the link */
2098 netif_carrier_off(net_dev);
2099
2100 rtnl_unlock(); 2136 rtnl_unlock();
2101 2137
2102 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2138 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
@@ -2108,14 +2144,14 @@ static int efx_register_netdev(struct efx_nic *efx)
2108 2144
2109 return 0; 2145 return 0;
2110 2146
2147fail_registered:
2148 rtnl_lock();
2149 unregister_netdevice(net_dev);
2111fail_locked: 2150fail_locked:
2151 efx->state = STATE_UNINIT;
2112 rtnl_unlock(); 2152 rtnl_unlock();
2113 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 2153 netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
2114 return rc; 2154 return rc;
2115
2116fail_registered:
2117 unregister_netdev(net_dev);
2118 return rc;
2119} 2155}
2120 2156
2121static void efx_unregister_netdev(struct efx_nic *efx) 2157static void efx_unregister_netdev(struct efx_nic *efx)
@@ -2138,7 +2174,11 @@ static void efx_unregister_netdev(struct efx_nic *efx)
2138 2174
2139 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 2175 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
2140 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2176 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2141 unregister_netdev(efx->net_dev); 2177
2178 rtnl_lock();
2179 unregister_netdevice(efx->net_dev);
2180 efx->state = STATE_UNINIT;
2181 rtnl_unlock();
2142} 2182}
2143 2183
2144/************************************************************************** 2184/**************************************************************************
@@ -2154,9 +2194,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2154 EFX_ASSERT_RESET_SERIALISED(efx); 2194 EFX_ASSERT_RESET_SERIALISED(efx);
2155 2195
2156 efx_stop_all(efx); 2196 efx_stop_all(efx);
2157 mutex_lock(&efx->mac_lock);
2158
2159 efx_stop_interrupts(efx, false); 2197 efx_stop_interrupts(efx, false);
2198
2199 mutex_lock(&efx->mac_lock);
2160 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) 2200 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
2161 efx->phy_op->fini(efx); 2201 efx->phy_op->fini(efx);
2162 efx->type->fini(efx); 2202 efx->type->fini(efx);
@@ -2276,16 +2316,15 @@ static void efx_reset_work(struct work_struct *data)
2276 if (!pending) 2316 if (!pending)
2277 return; 2317 return;
2278 2318
2279 /* If we're not RUNNING then don't reset. Leave the reset_pending
2280 * flags set so that efx_pci_probe_main will be retried */
2281 if (efx->state != STATE_RUNNING) {
2282 netif_info(efx, drv, efx->net_dev,
2283 "scheduled reset quenched. NIC not RUNNING\n");
2284 return;
2285 }
2286
2287 rtnl_lock(); 2319 rtnl_lock();
2288 (void)efx_reset(efx, fls(pending) - 1); 2320
2321 /* We checked the state in efx_schedule_reset() but it may
2322 * have changed by now. Now that we have the RTNL lock,
2323 * it cannot change again.
2324 */
2325 if (efx->state == STATE_READY)
2326 (void)efx_reset(efx, fls(pending) - 1);
2327
2289 rtnl_unlock(); 2328 rtnl_unlock();
2290} 2329}
2291 2330
@@ -2311,6 +2350,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2311 } 2350 }
2312 2351
2313 set_bit(method, &efx->reset_pending); 2352 set_bit(method, &efx->reset_pending);
2353 smp_mb(); /* ensure we change reset_pending before checking state */
2354
2355 /* If we're not READY then just leave the flags set as the cue
2356 * to abort probing or reschedule the reset later.
2357 */
2358 if (ACCESS_ONCE(efx->state) != STATE_READY)
2359 return;
2314 2360
2315 /* efx_process_channel() will no longer read events once a 2361 /* efx_process_channel() will no longer read events once a
2316 * reset is scheduled. So switch back to poll'd MCDI completions. */ 2362 * reset is scheduled. So switch back to poll'd MCDI completions. */
@@ -2376,13 +2422,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = {
2376/* This zeroes out and then fills in the invariants in a struct 2422/* This zeroes out and then fills in the invariants in a struct
2377 * efx_nic (including all sub-structures). 2423 * efx_nic (including all sub-structures).
2378 */ 2424 */
2379static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, 2425static int efx_init_struct(struct efx_nic *efx,
2380 struct pci_dev *pci_dev, struct net_device *net_dev) 2426 struct pci_dev *pci_dev, struct net_device *net_dev)
2381{ 2427{
2382 int i; 2428 int i;
2383 2429
2384 /* Initialise common structures */ 2430 /* Initialise common structures */
2385 memset(efx, 0, sizeof(*efx));
2386 spin_lock_init(&efx->biu_lock); 2431 spin_lock_init(&efx->biu_lock);
2387#ifdef CONFIG_SFC_MTD 2432#ifdef CONFIG_SFC_MTD
2388 INIT_LIST_HEAD(&efx->mtd_list); 2433 INIT_LIST_HEAD(&efx->mtd_list);
@@ -2392,7 +2437,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
2392 INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); 2437 INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
2393 efx->pci_dev = pci_dev; 2438 efx->pci_dev = pci_dev;
2394 efx->msg_enable = debug; 2439 efx->msg_enable = debug;
2395 efx->state = STATE_INIT; 2440 efx->state = STATE_UNINIT;
2396 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 2441 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
2397 2442
2398 efx->net_dev = net_dev; 2443 efx->net_dev = net_dev;
@@ -2409,8 +2454,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
2409 goto fail; 2454 goto fail;
2410 } 2455 }
2411 2456
2412 efx->type = type;
2413
2414 EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); 2457 EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS);
2415 2458
2416 /* Higher numbered interrupt modes are less capable! */ 2459 /* Higher numbered interrupt modes are less capable! */
@@ -2455,6 +2498,12 @@ static void efx_fini_struct(struct efx_nic *efx)
2455 */ 2498 */
2456static void efx_pci_remove_main(struct efx_nic *efx) 2499static void efx_pci_remove_main(struct efx_nic *efx)
2457{ 2500{
2501 /* Flush reset_work. It can no longer be scheduled since we
2502 * are not READY.
2503 */
2504 BUG_ON(efx->state == STATE_READY);
2505 cancel_work_sync(&efx->reset_work);
2506
2458#ifdef CONFIG_RFS_ACCEL 2507#ifdef CONFIG_RFS_ACCEL
2459 free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); 2508 free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
2460 efx->net_dev->rx_cpu_rmap = NULL; 2509 efx->net_dev->rx_cpu_rmap = NULL;
@@ -2480,24 +2529,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
2480 2529
2481 /* Mark the NIC as fini, then stop the interface */ 2530 /* Mark the NIC as fini, then stop the interface */
2482 rtnl_lock(); 2531 rtnl_lock();
2483 efx->state = STATE_FINI;
2484 dev_close(efx->net_dev); 2532 dev_close(efx->net_dev);
2485 2533 efx_stop_interrupts(efx, false);
2486 /* Allow any queued efx_resets() to complete */
2487 rtnl_unlock(); 2534 rtnl_unlock();
2488 2535
2489 efx_stop_interrupts(efx, false);
2490 efx_sriov_fini(efx); 2536 efx_sriov_fini(efx);
2491 efx_unregister_netdev(efx); 2537 efx_unregister_netdev(efx);
2492 2538
2493 efx_mtd_remove(efx); 2539 efx_mtd_remove(efx);
2494 2540
2495 /* Wait for any scheduled resets to complete. No more will be
2496 * scheduled from this point because efx_stop_all() has been
2497 * called, we are no longer registered with driverlink, and
2498 * the net_device's have been removed. */
2499 cancel_work_sync(&efx->reset_work);
2500
2501 efx_pci_remove_main(efx); 2541 efx_pci_remove_main(efx);
2502 2542
2503 efx_fini_io(efx); 2543 efx_fini_io(efx);
@@ -2617,7 +2657,6 @@ static int efx_pci_probe_main(struct efx_nic *efx)
2617static int __devinit efx_pci_probe(struct pci_dev *pci_dev, 2657static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2618 const struct pci_device_id *entry) 2658 const struct pci_device_id *entry)
2619{ 2659{
2620 const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data;
2621 struct net_device *net_dev; 2660 struct net_device *net_dev;
2622 struct efx_nic *efx; 2661 struct efx_nic *efx;
2623 int rc; 2662 int rc;
@@ -2627,10 +2666,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2627 EFX_MAX_RX_QUEUES); 2666 EFX_MAX_RX_QUEUES);
2628 if (!net_dev) 2667 if (!net_dev)
2629 return -ENOMEM; 2668 return -ENOMEM;
2630 net_dev->features |= (type->offload_features | NETIF_F_SG | 2669 efx = netdev_priv(net_dev);
2670 efx->type = (const struct efx_nic_type *) entry->driver_data;
2671 net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
2631 NETIF_F_HIGHDMA | NETIF_F_TSO | 2672 NETIF_F_HIGHDMA | NETIF_F_TSO |
2632 NETIF_F_RXCSUM); 2673 NETIF_F_RXCSUM);
2633 if (type->offload_features & NETIF_F_V6_CSUM) 2674 if (efx->type->offload_features & NETIF_F_V6_CSUM)
2634 net_dev->features |= NETIF_F_TSO6; 2675 net_dev->features |= NETIF_F_TSO6;
2635 /* Mask for features that also apply to VLAN devices */ 2676 /* Mask for features that also apply to VLAN devices */
2636 net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | 2677 net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG |
@@ -2638,10 +2679,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2638 NETIF_F_RXCSUM); 2679 NETIF_F_RXCSUM);
2639 /* All offloads can be toggled */ 2680 /* All offloads can be toggled */
2640 net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; 2681 net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA;
2641 efx = netdev_priv(net_dev);
2642 pci_set_drvdata(pci_dev, efx); 2682 pci_set_drvdata(pci_dev, efx);
2643 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 2683 SET_NETDEV_DEV(net_dev, &pci_dev->dev);
2644 rc = efx_init_struct(efx, type, pci_dev, net_dev); 2684 rc = efx_init_struct(efx, pci_dev, net_dev);
2645 if (rc) 2685 if (rc)
2646 goto fail1; 2686 goto fail1;
2647 2687
@@ -2656,28 +2696,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2656 goto fail2; 2696 goto fail2;
2657 2697
2658 rc = efx_pci_probe_main(efx); 2698 rc = efx_pci_probe_main(efx);
2659
2660 /* Serialise against efx_reset(). No more resets will be
2661 * scheduled since efx_stop_all() has been called, and we have
2662 * not and never have been registered.
2663 */
2664 cancel_work_sync(&efx->reset_work);
2665
2666 if (rc) 2699 if (rc)
2667 goto fail3; 2700 goto fail3;
2668 2701
2669 /* If there was a scheduled reset during probe, the NIC is
2670 * probably hosed anyway.
2671 */
2672 if (efx->reset_pending) {
2673 rc = -EIO;
2674 goto fail4;
2675 }
2676
2677 /* Switch to the running state before we expose the device to the OS,
2678 * so that dev_open()|efx_start_all() will actually start the device */
2679 efx->state = STATE_RUNNING;
2680
2681 rc = efx_register_netdev(efx); 2702 rc = efx_register_netdev(efx);
2682 if (rc) 2703 if (rc)
2683 goto fail4; 2704 goto fail4;
@@ -2717,12 +2738,18 @@ static int efx_pm_freeze(struct device *dev)
2717{ 2738{
2718 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2739 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2719 2740
2720 efx->state = STATE_FINI; 2741 rtnl_lock();
2721 2742
2722 netif_device_detach(efx->net_dev); 2743 if (efx->state != STATE_DISABLED) {
2744 efx->state = STATE_UNINIT;
2723 2745
2724 efx_stop_all(efx); 2746 netif_device_detach(efx->net_dev);
2725 efx_stop_interrupts(efx, false); 2747
2748 efx_stop_all(efx);
2749 efx_stop_interrupts(efx, false);
2750 }
2751
2752 rtnl_unlock();
2726 2753
2727 return 0; 2754 return 0;
2728} 2755}
@@ -2731,21 +2758,25 @@ static int efx_pm_thaw(struct device *dev)
2731{ 2758{
2732 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2759 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2733 2760
2734 efx->state = STATE_INIT; 2761 rtnl_lock();
2735 2762
2736 efx_start_interrupts(efx, false); 2763 if (efx->state != STATE_DISABLED) {
2764 efx_start_interrupts(efx, false);
2737 2765
2738 mutex_lock(&efx->mac_lock); 2766 mutex_lock(&efx->mac_lock);
2739 efx->phy_op->reconfigure(efx); 2767 efx->phy_op->reconfigure(efx);
2740 mutex_unlock(&efx->mac_lock); 2768 mutex_unlock(&efx->mac_lock);
2741 2769
2742 efx_start_all(efx); 2770 efx_start_all(efx);
2743 2771
2744 netif_device_attach(efx->net_dev); 2772 netif_device_attach(efx->net_dev);
2745 2773
2746 efx->state = STATE_RUNNING; 2774 efx->state = STATE_READY;
2747 2775
2748 efx->type->resume_wol(efx); 2776 efx->type->resume_wol(efx);
2777 }
2778
2779 rtnl_unlock();
2749 2780
2750 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 2781 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */
2751 queue_work(reset_workqueue, &efx->reset_work); 2782 queue_work(reset_workqueue, &efx->reset_work);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 8cba2df82b18..2bd5c2d35e5d 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -529,9 +529,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
529 if (!efx_tests) 529 if (!efx_tests)
530 goto fail; 530 goto fail;
531 531
532 532 if (efx->state != STATE_READY) {
533 ASSERT_RTNL();
534 if (efx->state != STATE_RUNNING) {
535 rc = -EIO; 533 rc = -EIO;
536 goto fail1; 534 goto fail1;
537 } 535 }
diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c
index 8687a6c3db0d..ec1e99d0dcad 100644
--- a/drivers/net/ethernet/sfc/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon_boards.c
@@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
380 new_mode = PHY_MODE_SPECIAL; 380 new_mode = PHY_MODE_SPECIAL;
381 if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) { 381 if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) {
382 err = 0; 382 err = 0;
383 } else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) { 383 } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) {
384 err = -EBUSY; 384 err = -EBUSY;
385 } else { 385 } else {
386 /* Reset the PHY, reconfigure the MAC and enable/disable 386 /* Reset the PHY, reconfigure the MAC and enable/disable
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index cd9c0a989692..7ab1232494ef 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -91,29 +91,31 @@ struct efx_special_buffer {
91}; 91};
92 92
93/** 93/**
94 * struct efx_tx_buffer - An Efx TX buffer 94 * struct efx_tx_buffer - buffer state for a TX descriptor
95 * @skb: The associated socket buffer. 95 * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
96 * Set only on the final fragment of a packet; %NULL for all other 96 * freed when descriptor completes
97 * fragments. When this fragment completes, then we can free this 97 * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be
98 * skb. 98 * freed when descriptor completes.
99 * @tsoh: The associated TSO header structure, or %NULL if this
100 * buffer is not a TSO header.
101 * @dma_addr: DMA address of the fragment. 99 * @dma_addr: DMA address of the fragment.
100 * @flags: Flags for allocation and DMA mapping type
102 * @len: Length of this fragment. 101 * @len: Length of this fragment.
103 * This field is zero when the queue slot is empty. 102 * This field is zero when the queue slot is empty.
104 * @continuation: True if this fragment is not the end of a packet.
105 * @unmap_single: True if dma_unmap_single should be used.
106 * @unmap_len: Length of this fragment to unmap 103 * @unmap_len: Length of this fragment to unmap
107 */ 104 */
108struct efx_tx_buffer { 105struct efx_tx_buffer {
109 const struct sk_buff *skb; 106 union {
110 struct efx_tso_header *tsoh; 107 const struct sk_buff *skb;
108 void *heap_buf;
109 };
111 dma_addr_t dma_addr; 110 dma_addr_t dma_addr;
111 unsigned short flags;
112 unsigned short len; 112 unsigned short len;
113 bool continuation;
114 bool unmap_single;
115 unsigned short unmap_len; 113 unsigned short unmap_len;
116}; 114};
115#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */
116#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */
117#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */
118#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */
117 119
118/** 120/**
119 * struct efx_tx_queue - An Efx TX queue 121 * struct efx_tx_queue - An Efx TX queue
@@ -133,6 +135,7 @@ struct efx_tx_buffer {
133 * @channel: The associated channel 135 * @channel: The associated channel
134 * @core_txq: The networking core TX queue structure 136 * @core_txq: The networking core TX queue structure
135 * @buffer: The software buffer ring 137 * @buffer: The software buffer ring
138 * @tsoh_page: Array of pages of TSO header buffers
136 * @txd: The hardware descriptor ring 139 * @txd: The hardware descriptor ring
137 * @ptr_mask: The size of the ring minus 1. 140 * @ptr_mask: The size of the ring minus 1.
138 * @initialised: Has hardware queue been initialised? 141 * @initialised: Has hardware queue been initialised?
@@ -156,9 +159,6 @@ struct efx_tx_buffer {
156 * variable indicates that the queue is full. This is to 159 * variable indicates that the queue is full. This is to
157 * avoid cache-line ping-pong between the xmit path and the 160 * avoid cache-line ping-pong between the xmit path and the
158 * completion path. 161 * completion path.
159 * @tso_headers_free: A list of TSO headers allocated for this TX queue
160 * that are not in use, and so available for new TSO sends. The list
161 * is protected by the TX queue lock.
162 * @tso_bursts: Number of times TSO xmit invoked by kernel 162 * @tso_bursts: Number of times TSO xmit invoked by kernel
163 * @tso_long_headers: Number of packets with headers too long for standard 163 * @tso_long_headers: Number of packets with headers too long for standard
164 * blocks 164 * blocks
@@ -175,6 +175,7 @@ struct efx_tx_queue {
175 struct efx_channel *channel; 175 struct efx_channel *channel;
176 struct netdev_queue *core_txq; 176 struct netdev_queue *core_txq;
177 struct efx_tx_buffer *buffer; 177 struct efx_tx_buffer *buffer;
178 struct efx_buffer *tsoh_page;
178 struct efx_special_buffer txd; 179 struct efx_special_buffer txd;
179 unsigned int ptr_mask; 180 unsigned int ptr_mask;
180 bool initialised; 181 bool initialised;
@@ -187,7 +188,6 @@ struct efx_tx_queue {
187 unsigned int insert_count ____cacheline_aligned_in_smp; 188 unsigned int insert_count ____cacheline_aligned_in_smp;
188 unsigned int write_count; 189 unsigned int write_count;
189 unsigned int old_read_count; 190 unsigned int old_read_count;
190 struct efx_tso_header *tso_headers_free;
191 unsigned int tso_bursts; 191 unsigned int tso_bursts;
192 unsigned int tso_long_headers; 192 unsigned int tso_long_headers;
193 unsigned int tso_packets; 193 unsigned int tso_packets;
@@ -430,11 +430,9 @@ enum efx_int_mode {
430#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) 430#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI)
431 431
432enum nic_state { 432enum nic_state {
433 STATE_INIT = 0, 433 STATE_UNINIT = 0, /* device being probed/removed or is frozen */
434 STATE_RUNNING = 1, 434 STATE_READY = 1, /* hardware ready and netdev registered */
435 STATE_FINI = 2, 435 STATE_DISABLED = 2, /* device disabled due to hardware errors */
436 STATE_DISABLED = 3,
437 STATE_MAX,
438}; 436};
439 437
440/* 438/*
@@ -654,7 +652,7 @@ struct vfdi_status;
654 * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues 652 * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
655 * @irq_rx_moderation: IRQ moderation time for RX event queues 653 * @irq_rx_moderation: IRQ moderation time for RX event queues
656 * @msg_enable: Log message enable flags 654 * @msg_enable: Log message enable flags
657 * @state: Device state flag. Serialised by the rtnl_lock. 655 * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
658 * @reset_pending: Bitmask for pending resets 656 * @reset_pending: Bitmask for pending resets
659 * @tx_queue: TX DMA queues 657 * @tx_queue: TX DMA queues
660 * @rx_queue: RX DMA queues 658 * @rx_queue: RX DMA queues
@@ -664,6 +662,8 @@ struct vfdi_status;
664 * should be allocated for this NIC 662 * should be allocated for this NIC
665 * @rxq_entries: Size of receive queues requested by user. 663 * @rxq_entries: Size of receive queues requested by user.
666 * @txq_entries: Size of transmit queues requested by user. 664 * @txq_entries: Size of transmit queues requested by user.
665 * @txq_stop_thresh: TX queue fill level at or above which we stop it.
666 * @txq_wake_thresh: TX queue fill level at or below which we wake it.
667 * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches 667 * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
668 * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches 668 * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
669 * @sram_lim_qw: Qword address limit of SRAM 669 * @sram_lim_qw: Qword address limit of SRAM
@@ -774,6 +774,9 @@ struct efx_nic {
774 774
775 unsigned rxq_entries; 775 unsigned rxq_entries;
776 unsigned txq_entries; 776 unsigned txq_entries;
777 unsigned int txq_stop_thresh;
778 unsigned int txq_wake_thresh;
779
777 unsigned tx_dc_base; 780 unsigned tx_dc_base;
778 unsigned rx_dc_base; 781 unsigned rx_dc_base;
779 unsigned sram_lim_qw; 782 unsigned sram_lim_qw;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 326d799762d6..cdff40b65729 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
298/************************************************************************** 298/**************************************************************************
299 * 299 *
300 * Generic buffer handling 300 * Generic buffer handling
301 * These buffers are used for interrupt status and MAC stats 301 * These buffers are used for interrupt status, MAC stats, etc.
302 * 302 *
303 **************************************************************************/ 303 **************************************************************************/
304 304
@@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
401 ++tx_queue->write_count; 401 ++tx_queue->write_count;
402 402
403 /* Create TX descriptor ring entry */ 403 /* Create TX descriptor ring entry */
404 BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
404 EFX_POPULATE_QWORD_4(*txd, 405 EFX_POPULATE_QWORD_4(*txd,
405 FSF_AZ_TX_KER_CONT, buffer->continuation, 406 FSF_AZ_TX_KER_CONT,
407 buffer->flags & EFX_TX_BUF_CONT,
406 FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, 408 FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
407 FSF_AZ_TX_KER_BUF_REGION, 0, 409 FSF_AZ_TX_KER_BUF_REGION, 0,
408 FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); 410 FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 18713436b443..ebca75ed78dc 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -22,14 +22,6 @@
22#include "nic.h" 22#include "nic.h"
23#include "workarounds.h" 23#include "workarounds.h"
24 24
25/*
26 * TX descriptor ring full threshold
27 *
28 * The tx_queue descriptor ring fill-level must fall below this value
29 * before we restart the netif queue
30 */
31#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
32
33static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 25static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
34 struct efx_tx_buffer *buffer, 26 struct efx_tx_buffer *buffer,
35 unsigned int *pkts_compl, 27 unsigned int *pkts_compl,
@@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
39 struct device *dma_dev = &tx_queue->efx->pci_dev->dev; 31 struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
40 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - 32 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
41 buffer->unmap_len); 33 buffer->unmap_len);
42 if (buffer->unmap_single) 34 if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
43 dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, 35 dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
44 DMA_TO_DEVICE); 36 DMA_TO_DEVICE);
45 else 37 else
46 dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, 38 dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
47 DMA_TO_DEVICE); 39 DMA_TO_DEVICE);
48 buffer->unmap_len = 0; 40 buffer->unmap_len = 0;
49 buffer->unmap_single = false;
50 } 41 }
51 42
52 if (buffer->skb) { 43 if (buffer->flags & EFX_TX_BUF_SKB) {
53 (*pkts_compl)++; 44 (*pkts_compl)++;
54 (*bytes_compl) += buffer->skb->len; 45 (*bytes_compl) += buffer->skb->len;
55 dev_kfree_skb_any((struct sk_buff *) buffer->skb); 46 dev_kfree_skb_any((struct sk_buff *) buffer->skb);
56 buffer->skb = NULL;
57 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, 47 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
58 "TX queue %d transmission id %x complete\n", 48 "TX queue %d transmission id %x complete\n",
59 tx_queue->queue, tx_queue->read_count); 49 tx_queue->queue, tx_queue->read_count);
50 } else if (buffer->flags & EFX_TX_BUF_HEAP) {
51 kfree(buffer->heap_buf);
60 } 52 }
61}
62 53
63/** 54 buffer->len = 0;
64 * struct efx_tso_header - a DMA mapped buffer for packet headers 55 buffer->flags = 0;
65 * @next: Linked list of free ones. 56}
66 * The list is protected by the TX queue lock.
67 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
68 * @dma_addr: The DMA address of the header below.
69 *
70 * This controls the memory used for a TSO header. Use TSOH_DATA()
71 * to find the packet header data. Use TSOH_SIZE() to calculate the
72 * total size required for a given packet header length. TSO headers
73 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
74 */
75struct efx_tso_header {
76 union {
77 struct efx_tso_header *next;
78 size_t unmap_len;
79 };
80 dma_addr_t dma_addr;
81};
82 57
83static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 58static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
84 struct sk_buff *skb); 59 struct sk_buff *skb);
85static void efx_fini_tso(struct efx_tx_queue *tx_queue);
86static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
87 struct efx_tso_header *tsoh);
88
89static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
90 struct efx_tx_buffer *buffer)
91{
92 if (buffer->tsoh) {
93 if (likely(!buffer->tsoh->unmap_len)) {
94 buffer->tsoh->next = tx_queue->tso_headers_free;
95 tx_queue->tso_headers_free = buffer->tsoh;
96 } else {
97 efx_tsoh_heap_free(tx_queue, buffer->tsoh);
98 }
99 buffer->tsoh = NULL;
100 }
101}
102
103 60
104static inline unsigned 61static inline unsigned
105efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) 62efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
@@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
138 return max_descs; 95 return max_descs;
139} 96}
140 97
98/* Get partner of a TX queue, seen as part of the same net core queue */
99static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
100{
101 if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
102 return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
103 else
104 return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
105}
106
107static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
108{
109 /* We need to consider both queues that the net core sees as one */
110 struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
111 struct efx_nic *efx = txq1->efx;
112 unsigned int fill_level;
113
114 fill_level = max(txq1->insert_count - txq1->old_read_count,
115 txq2->insert_count - txq2->old_read_count);
116 if (likely(fill_level < efx->txq_stop_thresh))
117 return;
118
119 /* We used the stale old_read_count above, which gives us a
120 * pessimistic estimate of the fill level (which may even
121 * validly be >= efx->txq_entries). Now try again using
122 * read_count (more likely to be a cache miss).
123 *
124 * If we read read_count and then conditionally stop the
125 * queue, it is possible for the completion path to race with
126 * us and complete all outstanding descriptors in the middle,
127 * after which there will be no more completions to wake it.
128 * Therefore we stop the queue first, then read read_count
129 * (with a memory barrier to ensure the ordering), then
130 * restart the queue if the fill level turns out to be low
131 * enough.
132 */
133 netif_tx_stop_queue(txq1->core_txq);
134 smp_mb();
135 txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
136 txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
137
138 fill_level = max(txq1->insert_count - txq1->old_read_count,
139 txq2->insert_count - txq2->old_read_count);
140 EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
141 if (likely(fill_level < efx->txq_stop_thresh)) {
142 smp_mb();
143 if (likely(!efx->loopback_selftest))
144 netif_tx_start_queue(txq1->core_txq);
145 }
146}
147
141/* 148/*
142 * Add a socket buffer to a TX queue 149 * Add a socket buffer to a TX queue
143 * 150 *
@@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
151 * This function is split out from efx_hard_start_xmit to allow the 158 * This function is split out from efx_hard_start_xmit to allow the
152 * loopback test to direct packets via specific TX queues. 159 * loopback test to direct packets via specific TX queues.
153 * 160 *
154 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY 161 * Returns NETDEV_TX_OK.
155 * You must hold netif_tx_lock() to call this function. 162 * You must hold netif_tx_lock() to call this function.
156 */ 163 */
157netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) 164netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
@@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
160 struct device *dma_dev = &efx->pci_dev->dev; 167 struct device *dma_dev = &efx->pci_dev->dev;
161 struct efx_tx_buffer *buffer; 168 struct efx_tx_buffer *buffer;
162 skb_frag_t *fragment; 169 skb_frag_t *fragment;
163 unsigned int len, unmap_len = 0, fill_level, insert_ptr; 170 unsigned int len, unmap_len = 0, insert_ptr;
164 dma_addr_t dma_addr, unmap_addr = 0; 171 dma_addr_t dma_addr, unmap_addr = 0;
165 unsigned int dma_len; 172 unsigned int dma_len;
166 bool unmap_single; 173 unsigned short dma_flags;
167 int q_space, i = 0; 174 int i = 0;
168 netdev_tx_t rc = NETDEV_TX_OK;
169 175
170 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); 176 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
171 177
@@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
183 return NETDEV_TX_OK; 189 return NETDEV_TX_OK;
184 } 190 }
185 191
186 fill_level = tx_queue->insert_count - tx_queue->old_read_count;
187 q_space = efx->txq_entries - 1 - fill_level;
188
189 /* Map for DMA. Use dma_map_single rather than dma_map_page 192 /* Map for DMA. Use dma_map_single rather than dma_map_page
190 * since this is more efficient on machines with sparse 193 * since this is more efficient on machines with sparse
191 * memory. 194 * memory.
192 */ 195 */
193 unmap_single = true; 196 dma_flags = EFX_TX_BUF_MAP_SINGLE;
194 dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); 197 dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
195 198
196 /* Process all fragments */ 199 /* Process all fragments */
@@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
205 208
206 /* Add to TX queue, splitting across DMA boundaries */ 209 /* Add to TX queue, splitting across DMA boundaries */
207 do { 210 do {
208 if (unlikely(q_space-- <= 0)) {
209 /* It might be that completions have
210 * happened since the xmit path last
211 * checked. Update the xmit path's
212 * copy of read_count.
213 */
214 netif_tx_stop_queue(tx_queue->core_txq);
215 /* This memory barrier protects the
216 * change of queue state from the access
217 * of read_count. */
218 smp_mb();
219 tx_queue->old_read_count =
220 ACCESS_ONCE(tx_queue->read_count);
221 fill_level = (tx_queue->insert_count
222 - tx_queue->old_read_count);
223 q_space = efx->txq_entries - 1 - fill_level;
224 if (unlikely(q_space-- <= 0)) {
225 rc = NETDEV_TX_BUSY;
226 goto unwind;
227 }
228 smp_mb();
229 if (likely(!efx->loopback_selftest))
230 netif_tx_start_queue(
231 tx_queue->core_txq);
232 }
233
234 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 211 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
235 buffer = &tx_queue->buffer[insert_ptr]; 212 buffer = &tx_queue->buffer[insert_ptr];
236 efx_tsoh_free(tx_queue, buffer); 213 EFX_BUG_ON_PARANOID(buffer->flags);
237 EFX_BUG_ON_PARANOID(buffer->tsoh);
238 EFX_BUG_ON_PARANOID(buffer->skb);
239 EFX_BUG_ON_PARANOID(buffer->len); 214 EFX_BUG_ON_PARANOID(buffer->len);
240 EFX_BUG_ON_PARANOID(!buffer->continuation);
241 EFX_BUG_ON_PARANOID(buffer->unmap_len); 215 EFX_BUG_ON_PARANOID(buffer->unmap_len);
242 216
243 dma_len = efx_max_tx_len(efx, dma_addr); 217 dma_len = efx_max_tx_len(efx, dma_addr);
@@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
247 /* Fill out per descriptor fields */ 221 /* Fill out per descriptor fields */
248 buffer->len = dma_len; 222 buffer->len = dma_len;
249 buffer->dma_addr = dma_addr; 223 buffer->dma_addr = dma_addr;
224 buffer->flags = EFX_TX_BUF_CONT;
250 len -= dma_len; 225 len -= dma_len;
251 dma_addr += dma_len; 226 dma_addr += dma_len;
252 ++tx_queue->insert_count; 227 ++tx_queue->insert_count;
253 } while (len); 228 } while (len);
254 229
255 /* Transfer ownership of the unmapping to the final buffer */ 230 /* Transfer ownership of the unmapping to the final buffer */
256 buffer->unmap_single = unmap_single; 231 buffer->flags = EFX_TX_BUF_CONT | dma_flags;
257 buffer->unmap_len = unmap_len; 232 buffer->unmap_len = unmap_len;
258 unmap_len = 0; 233 unmap_len = 0;
259 234
@@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
264 len = skb_frag_size(fragment); 239 len = skb_frag_size(fragment);
265 i++; 240 i++;
266 /* Map for DMA */ 241 /* Map for DMA */
267 unmap_single = false; 242 dma_flags = 0;
268 dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, 243 dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
269 DMA_TO_DEVICE); 244 DMA_TO_DEVICE);
270 } 245 }
271 246
272 /* Transfer ownership of the skb to the final buffer */ 247 /* Transfer ownership of the skb to the final buffer */
273 buffer->skb = skb; 248 buffer->skb = skb;
274 buffer->continuation = false; 249 buffer->flags = EFX_TX_BUF_SKB | dma_flags;
275 250
276 netdev_tx_sent_queue(tx_queue->core_txq, skb->len); 251 netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
277 252
278 /* Pass off to hardware */ 253 /* Pass off to hardware */
279 efx_nic_push_buffers(tx_queue); 254 efx_nic_push_buffers(tx_queue);
280 255
256 efx_tx_maybe_stop_queue(tx_queue);
257
281 return NETDEV_TX_OK; 258 return NETDEV_TX_OK;
282 259
283 dma_err: 260 dma_err:
@@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
289 /* Mark the packet as transmitted, and free the SKB ourselves */ 266 /* Mark the packet as transmitted, and free the SKB ourselves */
290 dev_kfree_skb_any(skb); 267 dev_kfree_skb_any(skb);
291 268
292 unwind:
293 /* Work backwards until we hit the original insert pointer value */ 269 /* Work backwards until we hit the original insert pointer value */
294 while (tx_queue->insert_count != tx_queue->write_count) { 270 while (tx_queue->insert_count != tx_queue->write_count) {
295 unsigned int pkts_compl = 0, bytes_compl = 0; 271 unsigned int pkts_compl = 0, bytes_compl = 0;
@@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
297 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 273 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
298 buffer = &tx_queue->buffer[insert_ptr]; 274 buffer = &tx_queue->buffer[insert_ptr];
299 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 275 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
300 buffer->len = 0;
301 } 276 }
302 277
303 /* Free the fragment we were mid-way through pushing */ 278 /* Free the fragment we were mid-way through pushing */
304 if (unmap_len) { 279 if (unmap_len) {
305 if (unmap_single) 280 if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
306 dma_unmap_single(dma_dev, unmap_addr, unmap_len, 281 dma_unmap_single(dma_dev, unmap_addr, unmap_len,
307 DMA_TO_DEVICE); 282 DMA_TO_DEVICE);
308 else 283 else
@@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
310 DMA_TO_DEVICE); 285 DMA_TO_DEVICE);
311 } 286 }
312 287
313 return rc; 288 return NETDEV_TX_OK;
314} 289}
315 290
316/* Remove packets from the TX queue 291/* Remove packets from the TX queue
@@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
340 } 315 }
341 316
342 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); 317 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
343 buffer->continuation = true;
344 buffer->len = 0;
345 318
346 ++tx_queue->read_count; 319 ++tx_queue->read_count;
347 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 320 read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -450,6 +423,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
450{ 423{
451 unsigned fill_level; 424 unsigned fill_level;
452 struct efx_nic *efx = tx_queue->efx; 425 struct efx_nic *efx = tx_queue->efx;
426 struct efx_tx_queue *txq2;
453 unsigned int pkts_compl = 0, bytes_compl = 0; 427 unsigned int pkts_compl = 0, bytes_compl = 0;
454 428
455 EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); 429 EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
@@ -457,15 +431,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
457 efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); 431 efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
458 netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); 432 netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
459 433
460 /* See if we need to restart the netif queue. This barrier 434 /* See if we need to restart the netif queue. This memory
461 * separates the update of read_count from the test of the 435 * barrier ensures that we write read_count (inside
462 * queue state. */ 436 * efx_dequeue_buffers()) before reading the queue status.
437 */
463 smp_mb(); 438 smp_mb();
464 if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && 439 if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
465 likely(efx->port_enabled) && 440 likely(efx->port_enabled) &&
466 likely(netif_device_present(efx->net_dev))) { 441 likely(netif_device_present(efx->net_dev))) {
467 fill_level = tx_queue->insert_count - tx_queue->read_count; 442 txq2 = efx_tx_queue_partner(tx_queue);
468 if (fill_level < EFX_TXQ_THRESHOLD(efx)) 443 fill_level = max(tx_queue->insert_count - tx_queue->read_count,
444 txq2->insert_count - txq2->read_count);
445 if (fill_level <= efx->txq_wake_thresh)
469 netif_tx_wake_queue(tx_queue->core_txq); 446 netif_tx_wake_queue(tx_queue->core_txq);
470 } 447 }
471 448
@@ -480,11 +457,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
480 } 457 }
481} 458}
482 459
460/* Size of page-based TSO header buffers. Larger blocks must be
461 * allocated from the heap.
462 */
463#define TSOH_STD_SIZE 128
464#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE)
465
466/* At most half the descriptors in the queue at any time will refer to
467 * a TSO header buffer, since they must always be followed by a
468 * payload descriptor referring to an skb.
469 */
470static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
471{
472 return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
473}
474
483int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) 475int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
484{ 476{
485 struct efx_nic *efx = tx_queue->efx; 477 struct efx_nic *efx = tx_queue->efx;
486 unsigned int entries; 478 unsigned int entries;
487 int i, rc; 479 int rc;
488 480
489 /* Create the smallest power-of-two aligned ring */ 481 /* Create the smallest power-of-two aligned ring */
490 entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); 482 entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
@@ -500,17 +492,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
500 GFP_KERNEL); 492 GFP_KERNEL);
501 if (!tx_queue->buffer) 493 if (!tx_queue->buffer)
502 return -ENOMEM; 494 return -ENOMEM;
503 for (i = 0; i <= tx_queue->ptr_mask; ++i) 495
504 tx_queue->buffer[i].continuation = true; 496 if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
497 tx_queue->tsoh_page =
498 kcalloc(efx_tsoh_page_count(tx_queue),
499 sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
500 if (!tx_queue->tsoh_page) {
501 rc = -ENOMEM;
502 goto fail1;
503 }
504 }
505 505
506 /* Allocate hardware ring */ 506 /* Allocate hardware ring */
507 rc = efx_nic_probe_tx(tx_queue); 507 rc = efx_nic_probe_tx(tx_queue);
508 if (rc) 508 if (rc)
509 goto fail; 509 goto fail2;
510 510
511 return 0; 511 return 0;
512 512
513 fail: 513fail2:
514 kfree(tx_queue->tsoh_page);
515 tx_queue->tsoh_page = NULL;
516fail1:
514 kfree(tx_queue->buffer); 517 kfree(tx_queue->buffer);
515 tx_queue->buffer = NULL; 518 tx_queue->buffer = NULL;
516 return rc; 519 return rc;
@@ -546,8 +549,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
546 unsigned int pkts_compl = 0, bytes_compl = 0; 549 unsigned int pkts_compl = 0, bytes_compl = 0;
547 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; 550 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
548 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 551 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
549 buffer->continuation = true;
550 buffer->len = 0;
551 552
552 ++tx_queue->read_count; 553 ++tx_queue->read_count;
553 } 554 }
@@ -568,13 +569,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
568 efx_nic_fini_tx(tx_queue); 569 efx_nic_fini_tx(tx_queue);
569 570
570 efx_release_tx_buffers(tx_queue); 571 efx_release_tx_buffers(tx_queue);
571
572 /* Free up TSO header cache */
573 efx_fini_tso(tx_queue);
574} 572}
575 573
576void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 574void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
577{ 575{
576 int i;
577
578 if (!tx_queue->buffer) 578 if (!tx_queue->buffer)
579 return; 579 return;
580 580
@@ -582,6 +582,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
582 "destroying TX queue %d\n", tx_queue->queue); 582 "destroying TX queue %d\n", tx_queue->queue);
583 efx_nic_remove_tx(tx_queue); 583 efx_nic_remove_tx(tx_queue);
584 584
585 if (tx_queue->tsoh_page) {
586 for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
587 efx_nic_free_buffer(tx_queue->efx,
588 &tx_queue->tsoh_page[i]);
589 kfree(tx_queue->tsoh_page);
590 tx_queue->tsoh_page = NULL;
591 }
592
585 kfree(tx_queue->buffer); 593 kfree(tx_queue->buffer);
586 tx_queue->buffer = NULL; 594 tx_queue->buffer = NULL;
587} 595}
@@ -604,22 +612,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
604#define TSOH_OFFSET NET_IP_ALIGN 612#define TSOH_OFFSET NET_IP_ALIGN
605#endif 613#endif
606 614
607#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
608
609/* Total size of struct efx_tso_header, buffer and padding */
610#define TSOH_SIZE(hdr_len) \
611 (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
612
613/* Size of blocks on free list. Larger blocks must be allocated from
614 * the heap.
615 */
616#define TSOH_STD_SIZE 128
617
618#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) 615#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
619#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
620#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
621#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
622#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data)
623 616
624/** 617/**
625 * struct tso_state - TSO state for an SKB 618 * struct tso_state - TSO state for an SKB
@@ -631,10 +624,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
631 * @in_len: Remaining length in current SKB fragment 624 * @in_len: Remaining length in current SKB fragment
632 * @unmap_len: Length of SKB fragment 625 * @unmap_len: Length of SKB fragment
633 * @unmap_addr: DMA address of SKB fragment 626 * @unmap_addr: DMA address of SKB fragment
634 * @unmap_single: DMA single vs page mapping flag 627 * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
635 * @protocol: Network protocol (after any VLAN header) 628 * @protocol: Network protocol (after any VLAN header)
629 * @ip_off: Offset of IP header
630 * @tcp_off: Offset of TCP header
636 * @header_len: Number of bytes of header 631 * @header_len: Number of bytes of header
637 * @full_packet_size: Number of bytes to put in each outgoing segment 632 * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
638 * 633 *
639 * The state used during segmentation. It is put into this data structure 634 * The state used during segmentation. It is put into this data structure
640 * just to make it easy to pass into inline functions. 635 * just to make it easy to pass into inline functions.
@@ -651,11 +646,13 @@ struct tso_state {
651 unsigned in_len; 646 unsigned in_len;
652 unsigned unmap_len; 647 unsigned unmap_len;
653 dma_addr_t unmap_addr; 648 dma_addr_t unmap_addr;
654 bool unmap_single; 649 unsigned short dma_flags;
655 650
656 __be16 protocol; 651 __be16 protocol;
652 unsigned int ip_off;
653 unsigned int tcp_off;
657 unsigned header_len; 654 unsigned header_len;
658 int full_packet_size; 655 unsigned int ip_base_len;
659}; 656};
660 657
661 658
@@ -687,91 +684,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb)
687 return protocol; 684 return protocol;
688} 685}
689 686
690 687static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
691/* 688 struct efx_tx_buffer *buffer, unsigned int len)
692 * Allocate a page worth of efx_tso_header structures, and string them
693 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
694 */
695static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
696{ 689{
697 struct device *dma_dev = &tx_queue->efx->pci_dev->dev; 690 u8 *result;
698 struct efx_tso_header *tsoh;
699 dma_addr_t dma_addr;
700 u8 *base_kva, *kva;
701
702 base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC);
703 if (base_kva == NULL) {
704 netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev,
705 "Unable to allocate page for TSO headers\n");
706 return -ENOMEM;
707 }
708
709 /* dma_alloc_coherent() allocates pages. */
710 EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
711
712 for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
713 tsoh = (struct efx_tso_header *)kva;
714 tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
715 tsoh->next = tx_queue->tso_headers_free;
716 tx_queue->tso_headers_free = tsoh;
717 }
718
719 return 0;
720}
721 691
692 EFX_BUG_ON_PARANOID(buffer->len);
693 EFX_BUG_ON_PARANOID(buffer->flags);
694 EFX_BUG_ON_PARANOID(buffer->unmap_len);
722 695
723/* Free up a TSO header, and all others in the same page. */ 696 if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) {
724static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, 697 unsigned index =
725 struct efx_tso_header *tsoh, 698 (tx_queue->insert_count & tx_queue->ptr_mask) / 2;
726 struct device *dma_dev) 699 struct efx_buffer *page_buf =
727{ 700 &tx_queue->tsoh_page[index / TSOH_PER_PAGE];
728 struct efx_tso_header **p; 701 unsigned offset =
729 unsigned long base_kva; 702 TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET;
730 dma_addr_t base_dma; 703
731 704 if (unlikely(!page_buf->addr) &&
732 base_kva = (unsigned long)tsoh & PAGE_MASK; 705 efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE))
733 base_dma = tsoh->dma_addr & PAGE_MASK; 706 return NULL;
734 707
735 p = &tx_queue->tso_headers_free; 708 result = (u8 *)page_buf->addr + offset;
736 while (*p != NULL) { 709 buffer->dma_addr = page_buf->dma_addr + offset;
737 if (((unsigned long)*p & PAGE_MASK) == base_kva) 710 buffer->flags = EFX_TX_BUF_CONT;
738 *p = (*p)->next; 711 } else {
739 else 712 tx_queue->tso_long_headers++;
740 p = &(*p)->next;
741 }
742
743 dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma);
744}
745 713
746static struct efx_tso_header * 714 buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC);
747efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) 715 if (unlikely(!buffer->heap_buf))
748{ 716 return NULL;
749 struct efx_tso_header *tsoh; 717 result = (u8 *)buffer->heap_buf + TSOH_OFFSET;
750 718 buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
751 tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
752 if (unlikely(!tsoh))
753 return NULL;
754
755 tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
756 TSOH_BUFFER(tsoh), header_len,
757 DMA_TO_DEVICE);
758 if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
759 tsoh->dma_addr))) {
760 kfree(tsoh);
761 return NULL;
762 } 719 }
763 720
764 tsoh->unmap_len = header_len; 721 buffer->len = len;
765 return tsoh;
766}
767 722
768static void 723 return result;
769efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
770{
771 dma_unmap_single(&tx_queue->efx->pci_dev->dev,
772 tsoh->dma_addr, tsoh->unmap_len,
773 DMA_TO_DEVICE);
774 kfree(tsoh);
775} 724}
776 725
777/** 726/**
@@ -781,47 +730,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
781 * @len: Length of fragment 730 * @len: Length of fragment
782 * @final_buffer: The final buffer inserted into the queue 731 * @final_buffer: The final buffer inserted into the queue
783 * 732 *
784 * Push descriptors onto the TX queue. Return 0 on success or 1 if 733 * Push descriptors onto the TX queue.
785 * @tx_queue full.
786 */ 734 */
787static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, 735static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
788 dma_addr_t dma_addr, unsigned len, 736 dma_addr_t dma_addr, unsigned len,
789 struct efx_tx_buffer **final_buffer) 737 struct efx_tx_buffer **final_buffer)
790{ 738{
791 struct efx_tx_buffer *buffer; 739 struct efx_tx_buffer *buffer;
792 struct efx_nic *efx = tx_queue->efx; 740 struct efx_nic *efx = tx_queue->efx;
793 unsigned dma_len, fill_level, insert_ptr; 741 unsigned dma_len, insert_ptr;
794 int q_space;
795 742
796 EFX_BUG_ON_PARANOID(len <= 0); 743 EFX_BUG_ON_PARANOID(len <= 0);
797 744
798 fill_level = tx_queue->insert_count - tx_queue->old_read_count;
799 /* -1 as there is no way to represent all descriptors used */
800 q_space = efx->txq_entries - 1 - fill_level;
801
802 while (1) { 745 while (1) {
803 if (unlikely(q_space-- <= 0)) {
804 /* It might be that completions have happened
805 * since the xmit path last checked. Update
806 * the xmit path's copy of read_count.
807 */
808 netif_tx_stop_queue(tx_queue->core_txq);
809 /* This memory barrier protects the change of
810 * queue state from the access of read_count. */
811 smp_mb();
812 tx_queue->old_read_count =
813 ACCESS_ONCE(tx_queue->read_count);
814 fill_level = (tx_queue->insert_count
815 - tx_queue->old_read_count);
816 q_space = efx->txq_entries - 1 - fill_level;
817 if (unlikely(q_space-- <= 0)) {
818 *final_buffer = NULL;
819 return 1;
820 }
821 smp_mb();
822 netif_tx_start_queue(tx_queue->core_txq);
823 }
824
825 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 746 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
826 buffer = &tx_queue->buffer[insert_ptr]; 747 buffer = &tx_queue->buffer[insert_ptr];
827 ++tx_queue->insert_count; 748 ++tx_queue->insert_count;
@@ -830,12 +751,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
830 tx_queue->read_count >= 751 tx_queue->read_count >=
831 efx->txq_entries); 752 efx->txq_entries);
832 753
833 efx_tsoh_free(tx_queue, buffer);
834 EFX_BUG_ON_PARANOID(buffer->len); 754 EFX_BUG_ON_PARANOID(buffer->len);
835 EFX_BUG_ON_PARANOID(buffer->unmap_len); 755 EFX_BUG_ON_PARANOID(buffer->unmap_len);
836 EFX_BUG_ON_PARANOID(buffer->skb); 756 EFX_BUG_ON_PARANOID(buffer->flags);
837 EFX_BUG_ON_PARANOID(!buffer->continuation);
838 EFX_BUG_ON_PARANOID(buffer->tsoh);
839 757
840 buffer->dma_addr = dma_addr; 758 buffer->dma_addr = dma_addr;
841 759
@@ -845,7 +763,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
845 if (dma_len >= len) 763 if (dma_len >= len)
846 break; 764 break;
847 765
848 buffer->len = dma_len; /* Don't set the other members */ 766 buffer->len = dma_len;
767 buffer->flags = EFX_TX_BUF_CONT;
849 dma_addr += dma_len; 768 dma_addr += dma_len;
850 len -= dma_len; 769 len -= dma_len;
851 } 770 }
@@ -853,7 +772,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
853 EFX_BUG_ON_PARANOID(!len); 772 EFX_BUG_ON_PARANOID(!len);
854 buffer->len = len; 773 buffer->len = len;
855 *final_buffer = buffer; 774 *final_buffer = buffer;
856 return 0;
857} 775}
858 776
859 777
@@ -864,54 +782,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
864 * a single fragment, and we know it doesn't cross a page boundary. It 782 * a single fragment, and we know it doesn't cross a page boundary. It
865 * also allows us to not worry about end-of-packet etc. 783 * also allows us to not worry about end-of-packet etc.
866 */ 784 */
867static void efx_tso_put_header(struct efx_tx_queue *tx_queue, 785static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
868 struct efx_tso_header *tsoh, unsigned len) 786 struct efx_tx_buffer *buffer, u8 *header)
869{ 787{
870 struct efx_tx_buffer *buffer; 788 if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
871 789 buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
872 buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; 790 header, buffer->len,
873 efx_tsoh_free(tx_queue, buffer); 791 DMA_TO_DEVICE);
874 EFX_BUG_ON_PARANOID(buffer->len); 792 if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
875 EFX_BUG_ON_PARANOID(buffer->unmap_len); 793 buffer->dma_addr))) {
876 EFX_BUG_ON_PARANOID(buffer->skb); 794 kfree(buffer->heap_buf);
877 EFX_BUG_ON_PARANOID(!buffer->continuation); 795 buffer->len = 0;
878 EFX_BUG_ON_PARANOID(buffer->tsoh); 796 buffer->flags = 0;
879 buffer->len = len; 797 return -ENOMEM;
880 buffer->dma_addr = tsoh->dma_addr; 798 }
881 buffer->tsoh = tsoh; 799 buffer->unmap_len = buffer->len;
800 buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
801 }
882 802
883 ++tx_queue->insert_count; 803 ++tx_queue->insert_count;
804 return 0;
884} 805}
885 806
886 807
887/* Remove descriptors put into a tx_queue. */ 808/* Remove buffers put into a tx_queue. None of the buffers must have
809 * an skb attached.
810 */
888static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) 811static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
889{ 812{
890 struct efx_tx_buffer *buffer; 813 struct efx_tx_buffer *buffer;
891 dma_addr_t unmap_addr;
892 814
893 /* Work backwards until we hit the original insert pointer value */ 815 /* Work backwards until we hit the original insert pointer value */
894 while (tx_queue->insert_count != tx_queue->write_count) { 816 while (tx_queue->insert_count != tx_queue->write_count) {
895 --tx_queue->insert_count; 817 --tx_queue->insert_count;
896 buffer = &tx_queue->buffer[tx_queue->insert_count & 818 buffer = &tx_queue->buffer[tx_queue->insert_count &
897 tx_queue->ptr_mask]; 819 tx_queue->ptr_mask];
898 efx_tsoh_free(tx_queue, buffer); 820 efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
899 EFX_BUG_ON_PARANOID(buffer->skb);
900 if (buffer->unmap_len) {
901 unmap_addr = (buffer->dma_addr + buffer->len -
902 buffer->unmap_len);
903 if (buffer->unmap_single)
904 dma_unmap_single(&tx_queue->efx->pci_dev->dev,
905 unmap_addr, buffer->unmap_len,
906 DMA_TO_DEVICE);
907 else
908 dma_unmap_page(&tx_queue->efx->pci_dev->dev,
909 unmap_addr, buffer->unmap_len,
910 DMA_TO_DEVICE);
911 buffer->unmap_len = 0;
912 }
913 buffer->len = 0;
914 buffer->continuation = true;
915 } 821 }
916} 822}
917 823
@@ -919,17 +825,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
919/* Parse the SKB header and initialise state. */ 825/* Parse the SKB header and initialise state. */
920static void tso_start(struct tso_state *st, const struct sk_buff *skb) 826static void tso_start(struct tso_state *st, const struct sk_buff *skb)
921{ 827{
922 /* All ethernet/IP/TCP headers combined size is TCP header size 828 st->ip_off = skb_network_header(skb) - skb->data;
923 * plus offset of TCP header relative to start of packet. 829 st->tcp_off = skb_transport_header(skb) - skb->data;
924 */ 830 st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
925 st->header_len = ((tcp_hdr(skb)->doff << 2u) 831 if (st->protocol == htons(ETH_P_IP)) {
926 + PTR_DIFF(tcp_hdr(skb), skb->data)); 832 st->ip_base_len = st->header_len - st->ip_off;
927 st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
928
929 if (st->protocol == htons(ETH_P_IP))
930 st->ipv4_id = ntohs(ip_hdr(skb)->id); 833 st->ipv4_id = ntohs(ip_hdr(skb)->id);
931 else 834 } else {
835 st->ip_base_len = st->header_len - st->tcp_off;
932 st->ipv4_id = 0; 836 st->ipv4_id = 0;
837 }
933 st->seqnum = ntohl(tcp_hdr(skb)->seq); 838 st->seqnum = ntohl(tcp_hdr(skb)->seq);
934 839
935 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); 840 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
@@ -938,7 +843,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb)
938 843
939 st->out_len = skb->len - st->header_len; 844 st->out_len = skb->len - st->header_len;
940 st->unmap_len = 0; 845 st->unmap_len = 0;
941 st->unmap_single = false; 846 st->dma_flags = 0;
942} 847}
943 848
944static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, 849static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
@@ -947,7 +852,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
947 st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, 852 st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
948 skb_frag_size(frag), DMA_TO_DEVICE); 853 skb_frag_size(frag), DMA_TO_DEVICE);
949 if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { 854 if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
950 st->unmap_single = false; 855 st->dma_flags = 0;
951 st->unmap_len = skb_frag_size(frag); 856 st->unmap_len = skb_frag_size(frag);
952 st->in_len = skb_frag_size(frag); 857 st->in_len = skb_frag_size(frag);
953 st->dma_addr = st->unmap_addr; 858 st->dma_addr = st->unmap_addr;
@@ -965,7 +870,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
965 st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, 870 st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl,
966 len, DMA_TO_DEVICE); 871 len, DMA_TO_DEVICE);
967 if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { 872 if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
968 st->unmap_single = true; 873 st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
969 st->unmap_len = len; 874 st->unmap_len = len;
970 st->in_len = len; 875 st->in_len = len;
971 st->dma_addr = st->unmap_addr; 876 st->dma_addr = st->unmap_addr;
@@ -982,20 +887,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
982 * @st: TSO state 887 * @st: TSO state
983 * 888 *
984 * Form descriptors for the current fragment, until we reach the end 889 * Form descriptors for the current fragment, until we reach the end
985 * of fragment or end-of-packet. Return 0 on success, 1 if not enough 890 * of fragment or end-of-packet.
986 * space in @tx_queue.
987 */ 891 */
988static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, 892static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
989 const struct sk_buff *skb, 893 const struct sk_buff *skb,
990 struct tso_state *st) 894 struct tso_state *st)
991{ 895{
992 struct efx_tx_buffer *buffer; 896 struct efx_tx_buffer *buffer;
993 int n, end_of_packet, rc; 897 int n;
994 898
995 if (st->in_len == 0) 899 if (st->in_len == 0)
996 return 0; 900 return;
997 if (st->packet_space == 0) 901 if (st->packet_space == 0)
998 return 0; 902 return;
999 903
1000 EFX_BUG_ON_PARANOID(st->in_len <= 0); 904 EFX_BUG_ON_PARANOID(st->in_len <= 0);
1001 EFX_BUG_ON_PARANOID(st->packet_space <= 0); 905 EFX_BUG_ON_PARANOID(st->packet_space <= 0);
@@ -1006,25 +910,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
1006 st->out_len -= n; 910 st->out_len -= n;
1007 st->in_len -= n; 911 st->in_len -= n;
1008 912
1009 rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); 913 efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
1010 if (likely(rc == 0)) {
1011 if (st->out_len == 0)
1012 /* Transfer ownership of the skb */
1013 buffer->skb = skb;
1014 914
1015 end_of_packet = st->out_len == 0 || st->packet_space == 0; 915 if (st->out_len == 0) {
1016 buffer->continuation = !end_of_packet; 916 /* Transfer ownership of the skb */
917 buffer->skb = skb;
918 buffer->flags = EFX_TX_BUF_SKB;
919 } else if (st->packet_space != 0) {
920 buffer->flags = EFX_TX_BUF_CONT;
921 }
1017 922
1018 if (st->in_len == 0) { 923 if (st->in_len == 0) {
1019 /* Transfer ownership of the DMA mapping */ 924 /* Transfer ownership of the DMA mapping */
1020 buffer->unmap_len = st->unmap_len; 925 buffer->unmap_len = st->unmap_len;
1021 buffer->unmap_single = st->unmap_single; 926 buffer->flags |= st->dma_flags;
1022 st->unmap_len = 0; 927 st->unmap_len = 0;
1023 }
1024 } 928 }
1025 929
1026 st->dma_addr += n; 930 st->dma_addr += n;
1027 return rc;
1028} 931}
1029 932
1030 933
@@ -1035,36 +938,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
1035 * @st: TSO state 938 * @st: TSO state
1036 * 939 *
1037 * Generate a new header and prepare for the new packet. Return 0 on 940 * Generate a new header and prepare for the new packet. Return 0 on
1038 * success, or -1 if failed to alloc header. 941 * success, or -%ENOMEM if failed to alloc header.
1039 */ 942 */
1040static int tso_start_new_packet(struct efx_tx_queue *tx_queue, 943static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1041 const struct sk_buff *skb, 944 const struct sk_buff *skb,
1042 struct tso_state *st) 945 struct tso_state *st)
1043{ 946{
1044 struct efx_tso_header *tsoh; 947 struct efx_tx_buffer *buffer =
948 &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
1045 struct tcphdr *tsoh_th; 949 struct tcphdr *tsoh_th;
1046 unsigned ip_length; 950 unsigned ip_length;
1047 u8 *header; 951 u8 *header;
952 int rc;
1048 953
1049 /* Allocate a DMA-mapped header buffer. */ 954 /* Allocate and insert a DMA-mapped header buffer. */
1050 if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { 955 header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
1051 if (tx_queue->tso_headers_free == NULL) { 956 if (!header)
1052 if (efx_tsoh_block_alloc(tx_queue)) 957 return -ENOMEM;
1053 return -1;
1054 }
1055 EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
1056 tsoh = tx_queue->tso_headers_free;
1057 tx_queue->tso_headers_free = tsoh->next;
1058 tsoh->unmap_len = 0;
1059 } else {
1060 tx_queue->tso_long_headers++;
1061 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
1062 if (unlikely(!tsoh))
1063 return -1;
1064 }
1065 958
1066 header = TSOH_BUFFER(tsoh); 959 tsoh_th = (struct tcphdr *)(header + st->tcp_off);
1067 tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
1068 960
1069 /* Copy and update the headers. */ 961 /* Copy and update the headers. */
1070 memcpy(header, skb->data, st->header_len); 962 memcpy(header, skb->data, st->header_len);
@@ -1073,19 +965,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1073 st->seqnum += skb_shinfo(skb)->gso_size; 965 st->seqnum += skb_shinfo(skb)->gso_size;
1074 if (st->out_len > skb_shinfo(skb)->gso_size) { 966 if (st->out_len > skb_shinfo(skb)->gso_size) {
1075 /* This packet will not finish the TSO burst. */ 967 /* This packet will not finish the TSO burst. */
1076 ip_length = st->full_packet_size - ETH_HDR_LEN(skb); 968 st->packet_space = skb_shinfo(skb)->gso_size;
1077 tsoh_th->fin = 0; 969 tsoh_th->fin = 0;
1078 tsoh_th->psh = 0; 970 tsoh_th->psh = 0;
1079 } else { 971 } else {
1080 /* This packet will be the last in the TSO burst. */ 972 /* This packet will be the last in the TSO burst. */
1081 ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; 973 st->packet_space = st->out_len;
1082 tsoh_th->fin = tcp_hdr(skb)->fin; 974 tsoh_th->fin = tcp_hdr(skb)->fin;
1083 tsoh_th->psh = tcp_hdr(skb)->psh; 975 tsoh_th->psh = tcp_hdr(skb)->psh;
1084 } 976 }
977 ip_length = st->ip_base_len + st->packet_space;
1085 978
1086 if (st->protocol == htons(ETH_P_IP)) { 979 if (st->protocol == htons(ETH_P_IP)) {
1087 struct iphdr *tsoh_iph = 980 struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off);
1088 (struct iphdr *)(header + SKB_IPV4_OFF(skb));
1089 981
1090 tsoh_iph->tot_len = htons(ip_length); 982 tsoh_iph->tot_len = htons(ip_length);
1091 983
@@ -1094,16 +986,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1094 st->ipv4_id++; 986 st->ipv4_id++;
1095 } else { 987 } else {
1096 struct ipv6hdr *tsoh_iph = 988 struct ipv6hdr *tsoh_iph =
1097 (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb)); 989 (struct ipv6hdr *)(header + st->ip_off);
1098 990
1099 tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); 991 tsoh_iph->payload_len = htons(ip_length);
1100 } 992 }
1101 993
1102 st->packet_space = skb_shinfo(skb)->gso_size; 994 rc = efx_tso_put_header(tx_queue, buffer, header);
1103 ++tx_queue->tso_packets; 995 if (unlikely(rc))
996 return rc;
1104 997
1105 /* Form a descriptor for this header. */ 998 ++tx_queue->tso_packets;
1106 efx_tso_put_header(tx_queue, tsoh, st->header_len);
1107 999
1108 return 0; 1000 return 0;
1109} 1001}
@@ -1118,13 +1010,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1118 * 1010 *
1119 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if 1011 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1120 * @skb was not enqueued. In all cases @skb is consumed. Return 1012 * @skb was not enqueued. In all cases @skb is consumed. Return
1121 * %NETDEV_TX_OK or %NETDEV_TX_BUSY. 1013 * %NETDEV_TX_OK.
1122 */ 1014 */
1123static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 1015static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1124 struct sk_buff *skb) 1016 struct sk_buff *skb)
1125{ 1017{
1126 struct efx_nic *efx = tx_queue->efx; 1018 struct efx_nic *efx = tx_queue->efx;
1127 int frag_i, rc, rc2 = NETDEV_TX_OK; 1019 int frag_i, rc;
1128 struct tso_state state; 1020 struct tso_state state;
1129 1021
1130 /* Find the packet protocol and sanity-check it */ 1022 /* Find the packet protocol and sanity-check it */
@@ -1156,11 +1048,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1156 goto mem_err; 1048 goto mem_err;
1157 1049
1158 while (1) { 1050 while (1) {
1159 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); 1051 tso_fill_packet_with_fragment(tx_queue, skb, &state);
1160 if (unlikely(rc)) {
1161 rc2 = NETDEV_TX_BUSY;
1162 goto unwind;
1163 }
1164 1052
1165 /* Move onto the next fragment? */ 1053 /* Move onto the next fragment? */
1166 if (state.in_len == 0) { 1054 if (state.in_len == 0) {
@@ -1184,6 +1072,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1184 /* Pass off to hardware */ 1072 /* Pass off to hardware */
1185 efx_nic_push_buffers(tx_queue); 1073 efx_nic_push_buffers(tx_queue);
1186 1074
1075 efx_tx_maybe_stop_queue(tx_queue);
1076
1187 tx_queue->tso_bursts++; 1077 tx_queue->tso_bursts++;
1188 return NETDEV_TX_OK; 1078 return NETDEV_TX_OK;
1189 1079
@@ -1192,10 +1082,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1192 "Out of memory for TSO headers, or DMA mapping error\n"); 1082 "Out of memory for TSO headers, or DMA mapping error\n");
1193 dev_kfree_skb_any(skb); 1083 dev_kfree_skb_any(skb);
1194 1084
1195 unwind:
1196 /* Free the DMA mapping we were in the process of writing out */ 1085 /* Free the DMA mapping we were in the process of writing out */
1197 if (state.unmap_len) { 1086 if (state.unmap_len) {
1198 if (state.unmap_single) 1087 if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
1199 dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, 1088 dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
1200 state.unmap_len, DMA_TO_DEVICE); 1089 state.unmap_len, DMA_TO_DEVICE);
1201 else 1090 else
@@ -1204,25 +1093,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1204 } 1093 }
1205 1094
1206 efx_enqueue_unwind(tx_queue); 1095 efx_enqueue_unwind(tx_queue);
1207 return rc2; 1096 return NETDEV_TX_OK;
1208}
1209
1210
1211/*
1212 * Free up all TSO datastructures associated with tx_queue. This
1213 * routine should be called only once the tx_queue is both empty and
1214 * will no longer be used.
1215 */
1216static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1217{
1218 unsigned i;
1219
1220 if (tx_queue->buffer) {
1221 for (i = 0; i <= tx_queue->ptr_mask; ++i)
1222 efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1223 }
1224
1225 while (tx_queue->tso_headers_free != NULL)
1226 efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1227 &tx_queue->efx->pci_dev->dev);
1228} 1097}