/****************************************************************************
* Driver for Solarflare Solarstorm network controllers and boards
* Copyright 2005-2006 Fen Systems Ltd.
* Copyright 2005-2008 Solarflare Communications Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, incorporated herein by reference.
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <linux/crc32.h>
#include <linux/ethtool.h>
#include <linux/topology.h>
#include "net_driver.h"
#include "gmii.h"
#include "ethtool.h"
#include "tx.h"
#include "rx.h"
#include "efx.h"
#include "mdio_10g.h"
#include "falcon.h"
#include "mac.h"
#define EFX_MAX_MTU (9 * 1024)
/* RX slow fill workqueue. If memory allocation fails in the fast path,
* a work item is pushed onto this work queue to retry the allocation later,
* to avoid the NIC being starved of RX buffers. Since this is a per cpu
* workqueue, there is nothing to be gained in making it per NIC
*/
static struct workqueue_struct *refill_workqueue;
/**************************************************************************
*
* Configurable values
*
*************************************************************************/
/*
* Enable large receive offload (LRO) aka soft segment reassembly (SSR)
*
* This sets the default for new devices. It can be controlled later
* using ethtool.
*/
static int lro = true;
module_param(lro, int, 0644);
MODULE_PARM_DESC(lro, "Large receive offload acceleration");
/*
* Use separate channels for TX and RX events
*
* Set this to 1 to use separate channels for TX and RX. It allows us to
* apply a higher level of interrupt moderation to TX events.
*
* This is forced to 0 for MSI interrupt mode as the interrupt vector
* is not written
*/
static unsigned int separate_tx_and_rx_channels = true;
/* This is the weight assigned to each of the (per-channel) virtual
* NAPI devices.
*/
static int napi_weight = 64;
/* This is the time (in jiffies) between invocations of the hardware
* monitor, which checks for known hardware bugs and resets the
* hardware and driver as necessary.
*/
unsigned int efx_monitor_interval = 1 * HZ;
/* This controls whether or not the hardware monitor will trigger a
* reset when it detects an error condition.
*/
static unsigned int monitor_reset = true;
/* This controls whether or not the driver will initialise devices
* with invalid MAC addresses stored in the EEPROM or flash. If true,
* such devices will be initialised with a random locally-generated
* MAC address. This allows for loading the sfc_mtd driver to
* reprogram the flash, even if the flash contents (including the MAC
* address) have previously been erased.
*/
static unsigned int allow_bad_hwaddr;
/* Initial interrupt moderation settings. They can be modified after
* module load with ethtool.
*
* The default for RX should strike a balance between increasing the
* round-trip latency and reducing overhead.
*/
static unsigned int rx_irq_mod_usec = 60;
/* Initial interrupt moderation settings. They can be modified after
* module load with ethtool.
*
* This default is chosen to ensure that a 10G link does not go idle
* while a TX queue is stopped after it has become full. A queue is
* restarted when it drops below half full. The time this takes (assuming
* worst case 3 descriptors per packet and 1024 descriptors) is
* 512 / 3 * 1.2 = 205 usec.
*/
static unsigned int tx_irq_mod_usec = 150;
/* This is the first interrupt mode to try out of:
* 0 => MSI-X
* 1 => MSI
* 2 => legacy
*/
static unsigned int interrupt_mode;
/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
* i.e. the number of CPUs among which we may distribute simultaneous
* interrupt handling.
*
* Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
* The default (0) means to assign an interrupt to each package (level II cache)
*/
static unsigned int rss_cpus;
module_param(rss_cpus, uint, 0444);
MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
/**************************************************************************
*
* Utility functions and prototypes
*
*************************************************************************/
static void efx_remove_channel(struct efx_channel *channel);
static void efx_remove_port(struct efx_nic *efx);
static void efx_fini_napi(struct efx_nic *efx);
static void efx_fini_channels(struct efx_nic *efx);
#define EFX_ASSERT_RESET_SERIALISED(efx) \
do { \
if ((efx->state == STATE_RUNNING) || \
(efx->state == STATE_RESETTING)) \
ASSERT_RTNL(); \
} while (0)
/**************************************************************************
*
* Event queue processing
*
*************************************************************************/
/* Process channel's event queue
*
* This function is responsible for processing the event queue of a
* single channel. The caller must guarantee that this function will
* never be concurrently called more than once on the same channel,
* though different channels may be being processed concurrently.
*/
static int efx_process_channel(struct efx_channel *channel, int rx_quota)
{
int rxdmaqs;
struct efx_rx_queue *rx_queue;
if (unlikely(channel->efx->reset_pending != RESET_TYPE_NONE ||
!channel->enabled))
return rx_quota;
rxdmaqs = falcon_process_eventq(channel, &rx_quota);
/* Deliver last RX packet. */
if (channel->rx_pkt) {
__efx_rx_packet(channel, channel->rx_pkt,
channel->rx_pkt_csummed);
channel->rx_pkt = NULL;
}
efx_flush_lro(channel);
efx_rx_strategy(channel);
/* Refill descriptor rings as necessary */
rx_queue = &channel->efx->rx_queue[0];
while (rxdmaqs) {
if (rxdmaqs & 0x01)
efx_fast_push_rx_descriptors(rx_queue);
rx_queue++;
rxdmaqs >>= 1;
}
return rx_quota;
}
/* Mark channel as finished processing
*
* Note that since we will not receive further interrupts for this
* channel before we finish processing and call the eventq_read_ack()
* method, there is no need to use the interrupt hold-off timers.
*/
static inline void efx_channel_processed(struct efx_channel *channel)
{
/* The interrupt handler for this channel may set work_pending
* as soon as we acknowledge the events we've seen. Make sure
* it's cleared before then. */
channel->work_pending = false;
smp_wmb();
falcon_eventq_read_ack(channel);
}
/* NAPI poll handler
*
* NAPI guarantees serialisation of polls of the same device, which
* provides the guarantee required by efx_process_channel().
*/
static int efx_poll(struct napi_struct *napi, int budget)
{
struct efx_channel *channel =
container_of(napi, struct efx_channel, napi_str);
struct net_device *napi_dev = channel->napi_dev;
int unused;
int rx_packets;
EFX_TRACE(channel->efx, "channel %d NAPI poll executing on CPU %d\n",
channel->channel, raw_smp_processor_id());
unused = efx_process_channel(channel, budget);
rx_packets = (budget - unused);
if (rx_packets < budget) {
/* There is no race here; although napi_disable() will
* only wait for netif_rx_complete(), this isn't a problem
* since efx_channel_processed() will have no effect if
* interrupts have already been disabled.
*/
netif_rx_complete(napi_dev, napi);
efx_channel_processed(channel);
}
return rx_packets;
}
/* Process the eventq of the specified channel immediately on this CPU
*
* Disable hardware generated interrupts, wait for any existing
* processing to finish, then directly poll (and ack ) the eventq.
* Finally reenable NAPI and interrupts.
*
* Since we are touching interrupts the caller should hold the suspend lock
*/
void efx_process_channel_now(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
BUG_ON(!channel->used_flags);
BUG_ON(!channel->enabled);
/* Disable interrupts and wait for ISRs to complete */
falcon_disable_interrupts(efx);
if (efx->legacy_irq)
synchronize_irq(efx->legacy_irq);
if (channel->irq)
synchronize_irq(channel->irq);
/* Wait for any NAPI processing to complete */
napi_disable(&channel->napi_str);
/* Poll the channel */
efx_process_channel(channel, efx->type->evq_size);
/* Ack the eventq. This may cause an interrupt to be generated
* when they are reenabled */
efx_channel_processed(channel);
napi_enable(&channel->napi_str);
falcon_enable_interrupts(efx);
}
/* Create event queue
* Event queue memory allocations are done only once. If the channel
* is reset, the memory buffer will be reused; this guards against
* errors during channel reset and also simplifies interrupt handling.
*/
static int efx_probe_eventq(struct efx_channel *channel)
{
EFX_LOG(channel->efx, "chan %d create event queue\n", channel->channel);
return falcon_probe_eventq(channel);
}
/* Prepare channel's event queue */
static int efx_init_eventq(struct efx_channel *channel)
{
EFX_LOG(channel->efx, "chan %d init event queue\n", channel->channel);
channel->eventq_read_ptr = 0;
return falcon_init_eventq(channel);
}
static void efx_fini_eventq(struct efx_channel *channel)
{
EFX_LOG(channel->efx, "chan %d fini event queue\n", channel->channel);
falcon_fini_eventq(channel);
}
static void efx_remove_eventq(struct efx_channel *channel)
{
EFX_LOG(channel->efx, "chan %d remove event queue\n", channel->channel);
falcon_remove_eventq(channel);
}
/**************************************************************************
*
* Channel handling
*
*************************************************************************/
static int efx_probe_channel(struct efx_channel *channel)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
int rc;
EFX_LOG(channel->efx, "creating channel %d\n", channel->channel);
rc = efx_probe_eventq(channel);
if (rc)
goto fail1;
efx_for_each_channel_tx_queue(tx_queue, channel) {
rc = efx_probe_tx_queue(tx_queue);
if (rc)
goto fail2;
}
efx_for_each_channel_rx_queue(rx_queue, channel) {
rc = efx_probe_rx_queue(rx_queue);
if (rc)
goto fail3;
}
channel->n_rx_frm_trunc = 0;
return 0;
fail3:
efx_for_each_channel_rx_queue(rx_queue, channel)
efx_remove_rx_queue(rx_queue);
fail2:
efx_for_each_channel_tx_queue(tx_queue, channel)
efx_remove_tx_queue(tx_queue);
fail1:
return rc;
}
/* Channels are shutdown and reinitialised whilst the NIC is running
* to propagate configuration changes (mtu, checksum offload), or
* to clear hardware error conditions
*/
static int efx_init_channels(struct efx_nic *efx)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
struct efx_channel *channel;
int rc = 0;
/* Calculate the rx buffer allocation parameters required to
* support the current MTU, including padding for header
* alignment and overruns.
*/
efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) +
EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
efx->type->rx_buffer_padding);
efx->rx_buffer_order = get_order(efx->rx_buffer_len);
/* Initialise the channels */
efx_for_each_channel(channel, efx) {
EFX_LOG(channel->efx, "init chan %d\n", channel->channel);
rc = efx_init_eventq(channel);
if (rc)
goto err;
efx_for_each_channel_tx_queue(tx_queue, channel) {
rc = efx_init_tx_queue(tx_queue);
if (rc)
goto err;
}
/* The rx buffer allocation strategy is MTU dependent */
efx_rx_strategy(channel);
efx_for_each_channel_rx_queue(rx_queue, channel) {
rc = efx_init_rx_queue(rx_queue);
if (rc)
goto err;
}
WARN_ON(channel->rx_pkt != NULL);
efx_rx_strategy(channel);
}
return 0;
err:
EFX_ERR(efx, "failed to initialise channel %d\n",
channel ? channel->channel : -1);
efx_fini_channels(efx);
|