aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2013-01-29 18:33:15 -0500
committerBen Hutchings <bhutchings@solarflare.com>2013-03-07 15:22:12 -0500
commit85740cdf0b84224a9fce62dc9150008ef8d6ab4e (patch)
treea8a774740aca4302119bc95326bcd344a63f8a72
parentb74e3e8cd6f952faf8797fca81a5a2ceace6b9aa (diff)
sfc: Enable RX DMA scattering where possible
Enable RX DMA scattering iff an RX buffer large enough for the current MTU will not fit into a single page and the NIC supports DMA scattering for kernel-mode RX queues. On Falcon and Siena, the RX_USR_BUF_SIZE field is used as the DMA limit for both all RX queues with scatter enabled. Set it to 1824, matching what Onload uses now. Maintain a statistic for frames truncated due to lack of descriptors (rx_nodesc_trunc). This is distinct from rx_frm_trunc which may be incremented when scattering is disabled and implies an over-length frame. Whenever an MTU change causes scattering to be turned on or off, update filters that point to the PF queues, but leave others unchanged, as VF drivers assume scattering is off. Add n_frags parameters to various functions, and make them iterate: - efx_rx_packet() - efx_recycle_rx_buffers() - efx_rx_mk_skb() - efx_rx_deliver() Make efx_handle_rx_event() responsible for updating efx_rx_queue::removed_count. Change the RX pipeline state to a starting ring index and number of fragments, and make __efx_rx_packet() responsible for clearing it. Based on earlier versions by David Riddoch and Jon Cooper. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
-rw-r--r--drivers/net/ethernet/sfc/efx.c34
-rw-r--r--drivers/net/ethernet/sfc/efx.h13
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon.c17
-rw-r--r--drivers/net/ethernet/sfc/filter.c74
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h35
-rw-r--r--drivers/net/ethernet/sfc/nic.c90
-rw-r--r--drivers/net/ethernet/sfc/rx.c211
-rw-r--r--drivers/net/ethernet/sfc/siena.c3
9 files changed, 363 insertions, 118 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index f8013c3ea37c..1213af5024d1 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -88,8 +88,6 @@ const char *const efx_reset_type_names[] = {
88 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 88 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
89}; 89};
90 90
91#define EFX_MAX_MTU (9 * 1024)
92
93/* Reset workqueue. If any NIC has a hardware failure then a reset will be 91/* Reset workqueue. If any NIC has a hardware failure then a reset will be
94 * queued onto this work queue. This is not a per-nic work queue, because 92 * queued onto this work queue. This is not a per-nic work queue, because
95 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 93 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
@@ -627,9 +625,11 @@ fail:
627 */ 625 */
628static void efx_start_datapath(struct efx_nic *efx) 626static void efx_start_datapath(struct efx_nic *efx)
629{ 627{
628 bool old_rx_scatter = efx->rx_scatter;
630 struct efx_tx_queue *tx_queue; 629 struct efx_tx_queue *tx_queue;
631 struct efx_rx_queue *rx_queue; 630 struct efx_rx_queue *rx_queue;
632 struct efx_channel *channel; 631 struct efx_channel *channel;
632 size_t rx_buf_len;
633 633
634 /* Calculate the rx buffer allocation parameters required to 634 /* Calculate the rx buffer allocation parameters required to
635 * support the current MTU, including padding for header 635 * support the current MTU, including padding for header
@@ -638,8 +638,32 @@ static void efx_start_datapath(struct efx_nic *efx)
638 efx->rx_dma_len = (efx->type->rx_buffer_hash_size + 638 efx->rx_dma_len = (efx->type->rx_buffer_hash_size +
639 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 639 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
640 efx->type->rx_buffer_padding); 640 efx->type->rx_buffer_padding);
641 efx->rx_buffer_order = get_order(sizeof(struct efx_rx_page_state) + 641 rx_buf_len = (sizeof(struct efx_rx_page_state) +
642 EFX_PAGE_IP_ALIGN + efx->rx_dma_len); 642 EFX_PAGE_IP_ALIGN + efx->rx_dma_len);
643 if (rx_buf_len <= PAGE_SIZE) {
644 efx->rx_scatter = false;
645 efx->rx_buffer_order = 0;
646 if (rx_buf_len <= PAGE_SIZE / 2)
647 efx->rx_buffer_truesize = PAGE_SIZE / 2;
648 else
649 efx->rx_buffer_truesize = PAGE_SIZE;
650 } else if (efx->type->can_rx_scatter) {
651 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
652 EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE >
653 PAGE_SIZE / 2);
654 efx->rx_scatter = true;
655 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
656 efx->rx_buffer_order = 0;
657 efx->rx_buffer_truesize = PAGE_SIZE / 2;
658 } else {
659 efx->rx_scatter = false;
660 efx->rx_buffer_order = get_order(rx_buf_len);
661 efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
662 }
663
664 /* RX filters also have scatter-enabled flags */
665 if (efx->rx_scatter != old_rx_scatter)
666 efx_filter_update_rx_scatter(efx);
643 667
644 /* We must keep at least one descriptor in a TX ring empty. 668 /* We must keep at least one descriptor in a TX ring empty.
645 * We could avoid this when the queue size does not exactly 669 * We could avoid this when the queue size does not exactly
@@ -661,7 +685,7 @@ static void efx_start_datapath(struct efx_nic *efx)
661 efx_nic_generate_fill_event(rx_queue); 685 efx_nic_generate_fill_event(rx_queue);
662 } 686 }
663 687
664 WARN_ON(channel->rx_pkt != NULL); 688 WARN_ON(channel->rx_pkt_n_frags);
665 } 689 }
666 690
667 if (netif_device_present(efx->net_dev)) 691 if (netif_device_present(efx->net_dev))
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 00e7077fa1d8..211da79a65e8 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -39,16 +39,14 @@ extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
39extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); 39extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
40extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); 40extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue);
41extern void efx_rx_slow_fill(unsigned long context); 41extern void efx_rx_slow_fill(unsigned long context);
42extern void __efx_rx_packet(struct efx_channel *channel, 42extern void __efx_rx_packet(struct efx_channel *channel);
43 struct efx_rx_buffer *rx_buf); 43extern void efx_rx_packet(struct efx_rx_queue *rx_queue,
44extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, 44 unsigned int index, unsigned int n_frags,
45 unsigned int len, u16 flags); 45 unsigned int len, u16 flags);
46static inline void efx_rx_flush_packet(struct efx_channel *channel) 46static inline void efx_rx_flush_packet(struct efx_channel *channel)
47{ 47{
48 if (channel->rx_pkt) { 48 if (channel->rx_pkt_n_frags)
49 __efx_rx_packet(channel, channel->rx_pkt); 49 __efx_rx_packet(channel);
50 channel->rx_pkt = NULL;
51 }
52} 50}
53extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); 51extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
54 52
@@ -73,6 +71,7 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
73extern int efx_probe_filters(struct efx_nic *efx); 71extern int efx_probe_filters(struct efx_nic *efx);
74extern void efx_restore_filters(struct efx_nic *efx); 72extern void efx_restore_filters(struct efx_nic *efx);
75extern void efx_remove_filters(struct efx_nic *efx); 73extern void efx_remove_filters(struct efx_nic *efx);
74extern void efx_filter_update_rx_scatter(struct efx_nic *efx);
76extern s32 efx_filter_insert_filter(struct efx_nic *efx, 75extern s32 efx_filter_insert_filter(struct efx_nic *efx,
77 struct efx_filter_spec *spec, 76 struct efx_filter_spec *spec,
78 bool replace); 77 bool replace);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 8e61cd06f66a..6e768175e7e0 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -154,6 +154,7 @@ static const struct efx_ethtool_stat efx_ethtool_stats[] = {
154 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), 154 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
155 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), 155 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
156 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), 156 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
157 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc),
157}; 158};
158 159
159/* Number of ethtool statistics */ 160/* Number of ethtool statistics */
@@ -978,7 +979,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
978 rule->m_ext.data[1])) 979 rule->m_ext.data[1]))
979 return -EINVAL; 980 return -EINVAL;
980 981
981 efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, 982 efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
983 efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
982 (rule->ring_cookie == RX_CLS_FLOW_DISC) ? 984 (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
983 0xfff : rule->ring_cookie); 985 0xfff : rule->ring_cookie);
984 986
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index 49bcd196e10d..4486102fa9b3 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -1546,10 +1546,6 @@ static int falcon_probe_nic(struct efx_nic *efx)
1546 1546
1547static void falcon_init_rx_cfg(struct efx_nic *efx) 1547static void falcon_init_rx_cfg(struct efx_nic *efx)
1548{ 1548{
1549 /* Prior to Siena the RX DMA engine will split each frame at
1550 * intervals of RX_USR_BUF_SIZE (32-byte units). We set it to
1551 * be so large that that never happens. */
1552 const unsigned huge_buf_size = (3 * 4096) >> 5;
1553 /* RX control FIFO thresholds (32 entries) */ 1549 /* RX control FIFO thresholds (32 entries) */
1554 const unsigned ctrl_xon_thr = 20; 1550 const unsigned ctrl_xon_thr = 20;
1555 const unsigned ctrl_xoff_thr = 25; 1551 const unsigned ctrl_xoff_thr = 25;
@@ -1557,10 +1553,15 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
1557 1553
1558 efx_reado(efx, &reg, FR_AZ_RX_CFG); 1554 efx_reado(efx, &reg, FR_AZ_RX_CFG);
1559 if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) { 1555 if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
1560 /* Data FIFO size is 5.5K */ 1556 /* Data FIFO size is 5.5K. The RX DMA engine only
1557 * supports scattering for user-mode queues, but will
1558 * split DMA writes at intervals of RX_USR_BUF_SIZE
1559 * (32-byte units) even for kernel-mode queues. We
1560 * set it to be so large that that never happens.
1561 */
1561 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0); 1562 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
1562 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE, 1563 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
1563 huge_buf_size); 1564 (3 * 4096) >> 5);
1564 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8); 1565 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
1565 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8); 1566 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
1566 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr); 1567 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
@@ -1569,7 +1570,7 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
1569 /* Data FIFO size is 80K; register fields moved */ 1570 /* Data FIFO size is 80K; register fields moved */
1570 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0); 1571 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
1571 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE, 1572 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
1572 huge_buf_size); 1573 EFX_RX_USR_BUF_SIZE >> 5);
1573 /* Send XON and XOFF at ~3 * max MTU away from empty/full */ 1574 /* Send XON and XOFF at ~3 * max MTU away from empty/full */
1574 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8); 1575 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
1575 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8); 1576 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
@@ -1815,6 +1816,7 @@ const struct efx_nic_type falcon_a1_nic_type = {
1815 .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER, 1816 .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER,
1816 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 1817 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
1817 .rx_buffer_padding = 0x24, 1818 .rx_buffer_padding = 0x24,
1819 .can_rx_scatter = false,
1818 .max_interrupt_mode = EFX_INT_MODE_MSI, 1820 .max_interrupt_mode = EFX_INT_MODE_MSI,
1819 .phys_addr_channels = 4, 1821 .phys_addr_channels = 4,
1820 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, 1822 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH,
@@ -1865,6 +1867,7 @@ const struct efx_nic_type falcon_b0_nic_type = {
1865 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 1867 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
1866 .rx_buffer_hash_size = 0x10, 1868 .rx_buffer_hash_size = 0x10,
1867 .rx_buffer_padding = 0, 1869 .rx_buffer_padding = 0,
1870 .can_rx_scatter = true,
1868 .max_interrupt_mode = EFX_INT_MODE_MSIX, 1871 .max_interrupt_mode = EFX_INT_MODE_MSIX,
1869 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy 1872 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
1870 * interrupt handler only supports 32 1873 * interrupt handler only supports 32
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index 61b4408bbdb8..2397f0e8d3eb 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -172,6 +172,25 @@ static void efx_filter_push_rx_config(struct efx_nic *efx)
172 filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, 172 filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
173 !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & 173 !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
174 EFX_FILTER_FLAG_RX_RSS)); 174 EFX_FILTER_FLAG_RX_RSS));
175
176 /* There is a single bit to enable RX scatter for all
177 * unmatched packets. Only set it if scatter is
178 * enabled in both filter specs.
179 */
180 EFX_SET_OWORD_FIELD(
181 filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
182 !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags &
183 table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
184 EFX_FILTER_FLAG_RX_SCATTER));
185 } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
186 /* We don't expose 'default' filters because unmatched
187 * packets always go to the queue number found in the
188 * RSS table. But we still need to set the RX scatter
189 * bit here.
190 */
191 EFX_SET_OWORD_FIELD(
192 filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
193 efx->rx_scatter);
175 } 194 }
176 195
177 efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); 196 efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
@@ -413,13 +432,18 @@ static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx)
413 struct efx_filter_state *state = efx->filter_state; 432 struct efx_filter_state *state = efx->filter_state;
414 struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; 433 struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF];
415 struct efx_filter_spec *spec = &table->spec[filter_idx]; 434 struct efx_filter_spec *spec = &table->spec[filter_idx];
435 enum efx_filter_flags flags = 0;
416 436
417 /* If there's only one channel then disable RSS for non VF 437 /* If there's only one channel then disable RSS for non VF
418 * traffic, thereby allowing VFs to use RSS when the PF can't. 438 * traffic, thereby allowing VFs to use RSS when the PF can't.
419 */ 439 */
420 efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, 440 if (efx->n_rx_channels > 1)
421 efx->n_rx_channels > 1 ? EFX_FILTER_FLAG_RX_RSS : 0, 441 flags |= EFX_FILTER_FLAG_RX_RSS;
422 0); 442
443 if (efx->rx_scatter)
444 flags |= EFX_FILTER_FLAG_RX_SCATTER;
445
446 efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, flags, 0);
423 spec->type = EFX_FILTER_UC_DEF + filter_idx; 447 spec->type = EFX_FILTER_UC_DEF + filter_idx;
424 table->used_bitmap[0] |= 1 << filter_idx; 448 table->used_bitmap[0] |= 1 << filter_idx;
425} 449}
@@ -1101,6 +1125,50 @@ void efx_remove_filters(struct efx_nic *efx)
1101 kfree(state); 1125 kfree(state);
1102} 1126}
1103 1127
1128/* Update scatter enable flags for filters pointing to our own RX queues */
1129void efx_filter_update_rx_scatter(struct efx_nic *efx)
1130{
1131 struct efx_filter_state *state = efx->filter_state;
1132 enum efx_filter_table_id table_id;
1133 struct efx_filter_table *table;
1134 efx_oword_t filter;
1135 unsigned int filter_idx;
1136
1137 spin_lock_bh(&state->lock);
1138
1139 for (table_id = EFX_FILTER_TABLE_RX_IP;
1140 table_id <= EFX_FILTER_TABLE_RX_DEF;
1141 table_id++) {
1142 table = &state->table[table_id];
1143
1144 for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
1145 if (!test_bit(filter_idx, table->used_bitmap) ||
1146 table->spec[filter_idx].dmaq_id >=
1147 efx->n_rx_channels)
1148 continue;
1149
1150 if (efx->rx_scatter)
1151 table->spec[filter_idx].flags |=
1152 EFX_FILTER_FLAG_RX_SCATTER;
1153 else
1154 table->spec[filter_idx].flags &=
1155 ~EFX_FILTER_FLAG_RX_SCATTER;
1156
1157 if (table_id == EFX_FILTER_TABLE_RX_DEF)
1158 /* Pushed by efx_filter_push_rx_config() */
1159 continue;
1160
1161 efx_filter_build(&filter, &table->spec[filter_idx]);
1162 efx_writeo(efx, &filter,
1163 table->offset + table->step * filter_idx);
1164 }
1165 }
1166
1167 efx_filter_push_rx_config(efx);
1168
1169 spin_unlock_bh(&state->lock);
1170}
1171
1104#ifdef CONFIG_RFS_ACCEL 1172#ifdef CONFIG_RFS_ACCEL
1105 1173
1106int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, 1174int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1bc911f980b5..e41b54bada7c 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -69,6 +69,12 @@
69#define EFX_TXQ_TYPES 4 69#define EFX_TXQ_TYPES 4
70#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) 70#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS)
71 71
72/* Maximum possible MTU the driver supports */
73#define EFX_MAX_MTU (9 * 1024)
74
75/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page. */
76#define EFX_RX_USR_BUF_SIZE 1824
77
72/* Forward declare Precision Time Protocol (PTP) support structure. */ 78/* Forward declare Precision Time Protocol (PTP) support structure. */
73struct efx_ptp_data; 79struct efx_ptp_data;
74 80
@@ -212,7 +218,8 @@ struct efx_tx_queue {
212 * If completed: offset in @page of Ethernet header. 218 * If completed: offset in @page of Ethernet header.
213 * @len: If pending: length for DMA descriptor. 219 * @len: If pending: length for DMA descriptor.
214 * If completed: received length, excluding hash prefix. 220 * If completed: received length, excluding hash prefix.
215 * @flags: Flags for buffer and packet state. 221 * @flags: Flags for buffer and packet state. These are only set on the
222 * first buffer of a scattered packet.
216 */ 223 */
217struct efx_rx_buffer { 224struct efx_rx_buffer {
218 dma_addr_t dma_addr; 225 dma_addr_t dma_addr;
@@ -256,6 +263,7 @@ struct efx_rx_page_state {
256 * @added_count: Number of buffers added to the receive queue. 263 * @added_count: Number of buffers added to the receive queue.
257 * @notified_count: Number of buffers given to NIC (<= @added_count). 264 * @notified_count: Number of buffers given to NIC (<= @added_count).
258 * @removed_count: Number of buffers removed from the receive queue. 265 * @removed_count: Number of buffers removed from the receive queue.
266 * @scatter_n: Number of buffers used by current packet
259 * @max_fill: RX descriptor maximum fill level (<= ring size) 267 * @max_fill: RX descriptor maximum fill level (<= ring size)
260 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill 268 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
261 * (<= @max_fill) 269 * (<= @max_fill)
@@ -276,6 +284,7 @@ struct efx_rx_queue {
276 unsigned int added_count; 284 unsigned int added_count;
277 unsigned int notified_count; 285 unsigned int notified_count;
278 unsigned int removed_count; 286 unsigned int removed_count;
287 unsigned int scatter_n;
279 unsigned int max_fill; 288 unsigned int max_fill;
280 unsigned int fast_fill_trigger; 289 unsigned int fast_fill_trigger;
281 unsigned int min_fill; 290 unsigned int min_fill;
@@ -335,6 +344,12 @@ enum efx_rx_alloc_method {
335 * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors 344 * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
336 * @n_rx_overlength: Count of RX_OVERLENGTH errors 345 * @n_rx_overlength: Count of RX_OVERLENGTH errors
337 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun 346 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
347 * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
348 * lack of descriptors
349 * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
350 * __efx_rx_packet(), or zero if there is none
351 * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
352 * by __efx_rx_packet(), if @rx_pkt_n_frags != 0
338 * @rx_queue: RX queue for this channel 353 * @rx_queue: RX queue for this channel
339 * @tx_queue: TX queues for this channel 354 * @tx_queue: TX queues for this channel
340 */ 355 */
@@ -366,11 +381,10 @@ struct efx_channel {
366 unsigned n_rx_frm_trunc; 381 unsigned n_rx_frm_trunc;
367 unsigned n_rx_overlength; 382 unsigned n_rx_overlength;
368 unsigned n_skbuff_leaks; 383 unsigned n_skbuff_leaks;
384 unsigned int n_rx_nodesc_trunc;
369 385
370 /* Used to pipeline received packets in order to optimise memory 386 unsigned int rx_pkt_n_frags;
371 * access with prefetches. 387 unsigned int rx_pkt_index;
372 */
373 struct efx_rx_buffer *rx_pkt;
374 388
375 struct efx_rx_queue rx_queue; 389 struct efx_rx_queue rx_queue;
376 struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; 390 struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
@@ -672,8 +686,11 @@ struct vfdi_status;
672 * @n_tx_channels: Number of channels used for TX 686 * @n_tx_channels: Number of channels used for TX
673 * @rx_dma_len: Current maximum RX DMA length 687 * @rx_dma_len: Current maximum RX DMA length
674 * @rx_buffer_order: Order (log2) of number of pages for each RX buffer 688 * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
689 * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
690 * for use in sk_buff::truesize
675 * @rx_hash_key: Toeplitz hash key for RSS 691 * @rx_hash_key: Toeplitz hash key for RSS
676 * @rx_indir_table: Indirection table for RSS 692 * @rx_indir_table: Indirection table for RSS
693 * @rx_scatter: Scatter mode enabled for receives
677 * @int_error_count: Number of internal errors seen recently 694 * @int_error_count: Number of internal errors seen recently
678 * @int_error_expire: Time at which error count will be expired 695 * @int_error_expire: Time at which error count will be expired
679 * @irq_status: Interrupt status buffer 696 * @irq_status: Interrupt status buffer
@@ -788,8 +805,10 @@ struct efx_nic {
788 unsigned n_tx_channels; 805 unsigned n_tx_channels;
789 unsigned int rx_dma_len; 806 unsigned int rx_dma_len;
790 unsigned int rx_buffer_order; 807 unsigned int rx_buffer_order;
808 unsigned int rx_buffer_truesize;
791 u8 rx_hash_key[40]; 809 u8 rx_hash_key[40];
792 u32 rx_indir_table[128]; 810 u32 rx_indir_table[128];
811 bool rx_scatter;
793 812
794 unsigned int_error_count; 813 unsigned int_error_count;
795 unsigned long int_error_expire; 814 unsigned long int_error_expire;
@@ -920,8 +939,9 @@ static inline unsigned int efx_port_num(struct efx_nic *efx)
920 * @evq_ptr_tbl_base: Event queue pointer table base address 939 * @evq_ptr_tbl_base: Event queue pointer table base address
921 * @evq_rptr_tbl_base: Event queue read-pointer table base address 940 * @evq_rptr_tbl_base: Event queue read-pointer table base address
922 * @max_dma_mask: Maximum possible DMA mask 941 * @max_dma_mask: Maximum possible DMA mask
923 * @rx_buffer_hash_size: Size of hash at start of RX buffer 942 * @rx_buffer_hash_size: Size of hash at start of RX packet
924 * @rx_buffer_padding: Size of padding at end of RX buffer 943 * @rx_buffer_padding: Size of padding at end of RX packet
944 * @can_rx_scatter: NIC is able to scatter packet to multiple buffers
925 * @max_interrupt_mode: Highest capability interrupt mode supported 945 * @max_interrupt_mode: Highest capability interrupt mode supported
926 * from &enum efx_init_mode. 946 * from &enum efx_init_mode.
927 * @phys_addr_channels: Number of channels with physically addressed 947 * @phys_addr_channels: Number of channels with physically addressed
@@ -969,6 +989,7 @@ struct efx_nic_type {
969 u64 max_dma_mask; 989 u64 max_dma_mask;
970 unsigned int rx_buffer_hash_size; 990 unsigned int rx_buffer_hash_size;
971 unsigned int rx_buffer_padding; 991 unsigned int rx_buffer_padding;
992 bool can_rx_scatter;
972 unsigned int max_interrupt_mode; 993 unsigned int max_interrupt_mode;
973 unsigned int phys_addr_channels; 994 unsigned int phys_addr_channels;
974 unsigned int timer_period_max; 995 unsigned int timer_period_max;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 0ad790cc473c..f9f5df8b51fe 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -591,12 +591,22 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
591 struct efx_nic *efx = rx_queue->efx; 591 struct efx_nic *efx = rx_queue->efx;
592 bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; 592 bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0;
593 bool iscsi_digest_en = is_b0; 593 bool iscsi_digest_en = is_b0;
594 bool jumbo_en;
595
596 /* For kernel-mode queues in Falcon A1, the JUMBO flag enables
597 * DMA to continue after a PCIe page boundary (and scattering
598 * is not possible). In Falcon B0 and Siena, it enables
599 * scatter.
600 */
601 jumbo_en = !is_b0 || efx->rx_scatter;
594 602
595 netif_dbg(efx, hw, efx->net_dev, 603 netif_dbg(efx, hw, efx->net_dev,
596 "RX queue %d ring in special buffers %d-%d\n", 604 "RX queue %d ring in special buffers %d-%d\n",
597 efx_rx_queue_index(rx_queue), rx_queue->rxd.index, 605 efx_rx_queue_index(rx_queue), rx_queue->rxd.index,
598 rx_queue->rxd.index + rx_queue->rxd.entries - 1); 606 rx_queue->rxd.index + rx_queue->rxd.entries - 1);
599 607
608 rx_queue->scatter_n = 0;
609
600 /* Pin RX descriptor ring */ 610 /* Pin RX descriptor ring */
601 efx_init_special_buffer(efx, &rx_queue->rxd); 611 efx_init_special_buffer(efx, &rx_queue->rxd);
602 612
@@ -613,8 +623,7 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
613 FRF_AZ_RX_DESCQ_SIZE, 623 FRF_AZ_RX_DESCQ_SIZE,
614 __ffs(rx_queue->rxd.entries), 624 __ffs(rx_queue->rxd.entries),
615 FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , 625 FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
616 /* For >=B0 this is scatter so disable */ 626 FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
617 FRF_AZ_RX_DESCQ_JUMBO, !is_b0,
618 FRF_AZ_RX_DESCQ_EN, 1); 627 FRF_AZ_RX_DESCQ_EN, 1);
619 efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, 628 efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
620 efx_rx_queue_index(rx_queue)); 629 efx_rx_queue_index(rx_queue));
@@ -968,13 +977,24 @@ static u16 efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
968 EFX_RX_PKT_DISCARD : 0; 977 EFX_RX_PKT_DISCARD : 0;
969} 978}
970 979
971/* Handle receive events that are not in-order. */ 980/* Handle receive events that are not in-order. Return true if this
972static void 981 * can be handled as a partial packet discard, false if it's more
982 * serious.
983 */
984static bool
973efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) 985efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
974{ 986{
987 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
975 struct efx_nic *efx = rx_queue->efx; 988 struct efx_nic *efx = rx_queue->efx;
976 unsigned expected, dropped; 989 unsigned expected, dropped;
977 990
991 if (rx_queue->scatter_n &&
992 index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
993 rx_queue->ptr_mask)) {
994 ++channel->n_rx_nodesc_trunc;
995 return true;
996 }
997
978 expected = rx_queue->removed_count & rx_queue->ptr_mask; 998 expected = rx_queue->removed_count & rx_queue->ptr_mask;
979 dropped = (index - expected) & rx_queue->ptr_mask; 999 dropped = (index - expected) & rx_queue->ptr_mask;
980 netif_info(efx, rx_err, efx->net_dev, 1000 netif_info(efx, rx_err, efx->net_dev,
@@ -983,6 +1003,7 @@ efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
983 1003
984 efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? 1004 efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
985 RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); 1005 RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
1006 return false;
986} 1007}
987 1008
988/* Handle a packet received event 1009/* Handle a packet received event
@@ -998,7 +1019,7 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
998 unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; 1019 unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
999 unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; 1020 unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
1000 unsigned expected_ptr; 1021 unsigned expected_ptr;
1001 bool rx_ev_pkt_ok; 1022 bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
1002 u16 flags; 1023 u16 flags;
1003 struct efx_rx_queue *rx_queue; 1024 struct efx_rx_queue *rx_queue;
1004 struct efx_nic *efx = channel->efx; 1025 struct efx_nic *efx = channel->efx;
@@ -1006,21 +1027,56 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
1006 if (unlikely(ACCESS_ONCE(efx->reset_pending))) 1027 if (unlikely(ACCESS_ONCE(efx->reset_pending)))
1007 return; 1028 return;
1008 1029
1009 /* Basic packet information */ 1030 rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
1010 rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); 1031 rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
1011 rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
1012 rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
1013 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT));
1014 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1);
1015 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != 1032 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
1016 channel->channel); 1033 channel->channel);
1017 1034
1018 rx_queue = efx_channel_get_rx_queue(channel); 1035 rx_queue = efx_channel_get_rx_queue(channel);
1019 1036
1020 rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); 1037 rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
1021 expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask; 1038 expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
1022 if (unlikely(rx_ev_desc_ptr != expected_ptr)) 1039 rx_queue->ptr_mask);
1023 efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); 1040
1041 /* Check for partial drops and other errors */
1042 if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
1043 unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
1044 if (rx_ev_desc_ptr != expected_ptr &&
1045 !efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
1046 return;
1047
1048 /* Discard all pending fragments */
1049 if (rx_queue->scatter_n) {
1050 efx_rx_packet(
1051 rx_queue,
1052 rx_queue->removed_count & rx_queue->ptr_mask,
1053 rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD);
1054 rx_queue->removed_count += rx_queue->scatter_n;
1055 rx_queue->scatter_n = 0;
1056 }
1057
1058 /* Return if there is no new fragment */
1059 if (rx_ev_desc_ptr != expected_ptr)
1060 return;
1061
1062 /* Discard new fragment if not SOP */
1063 if (!rx_ev_sop) {
1064 efx_rx_packet(
1065 rx_queue,
1066 rx_queue->removed_count & rx_queue->ptr_mask,
1067 1, 0, EFX_RX_PKT_DISCARD);
1068 ++rx_queue->removed_count;
1069 return;
1070 }
1071 }
1072
1073 ++rx_queue->scatter_n;
1074 if (rx_ev_cont)
1075 return;
1076
1077 rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
1078 rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
1079 rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
1024 1080
1025 if (likely(rx_ev_pkt_ok)) { 1081 if (likely(rx_ev_pkt_ok)) {
1026 /* If packet is marked as OK and packet type is TCP/IP or 1082 /* If packet is marked as OK and packet type is TCP/IP or
@@ -1048,7 +1104,11 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
1048 channel->irq_mod_score += 2; 1104 channel->irq_mod_score += 2;
1049 1105
1050 /* Handle received packet */ 1106 /* Handle received packet */
1051 efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags); 1107 efx_rx_packet(rx_queue,
1108 rx_queue->removed_count & rx_queue->ptr_mask,
1109 rx_queue->scatter_n, rx_ev_byte_cnt, flags);
1110 rx_queue->removed_count += rx_queue->scatter_n;
1111 rx_queue->scatter_n = 0;
1052} 1112}
1053 1113
1054/* If this flush done event corresponds to a &struct efx_tx_queue, then 1114/* If this flush done event corresponds to a &struct efx_tx_queue, then
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 04518722ac1d..88aa1ff01e3f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -39,13 +39,17 @@
39 */ 39 */
40static unsigned int rx_refill_threshold; 40static unsigned int rx_refill_threshold;
41 41
42/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
43#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
44 EFX_RX_USR_BUF_SIZE)
45
42/* 46/*
43 * RX maximum head room required. 47 * RX maximum head room required.
44 * 48 *
45 * This must be at least 1 to prevent overflow and at least 2 to allow 49 * This must be at least 1 to prevent overflow, plus one packet-worth
46 * pipelined receives. 50 * to allow pipelined receives.
47 */ 51 */
48#define EFX_RXD_HEAD_ROOM 2 52#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
49 53
50static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) 54static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
51{ 55{
@@ -66,6 +70,15 @@ static inline u32 efx_rx_buf_hash(const u8 *eh)
66#endif 70#endif
67} 71}
68 72
73static inline struct efx_rx_buffer *
74efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
75{
76 if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
77 return efx_rx_buffer(rx_queue, 0);
78 else
79 return rx_buf + 1;
80}
81
69/** 82/**
70 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers 83 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
71 * 84 *
@@ -199,28 +212,34 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
199 ++rx_queue->added_count; 212 ++rx_queue->added_count;
200} 213}
201 214
202/* Recycle the given rx buffer directly back into the rx_queue. There is 215/* Recycle buffers directly back into the rx_queue. There is always
203 * always room to add this buffer, because we've just popped a buffer. */ 216 * room to add these buffer, because we've just popped them.
204static void efx_recycle_rx_buffer(struct efx_channel *channel, 217 */
205 struct efx_rx_buffer *rx_buf) 218static void efx_recycle_rx_buffers(struct efx_channel *channel,
219 struct efx_rx_buffer *rx_buf,
220 unsigned int n_frags)
206{ 221{
207 struct efx_nic *efx = channel->efx; 222 struct efx_nic *efx = channel->efx;
208 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); 223 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
209 struct efx_rx_buffer *new_buf; 224 struct efx_rx_buffer *new_buf;
210 unsigned index; 225 unsigned index;
211 226
212 rx_buf->flags = 0; 227 do {
228 rx_buf->flags = 0;
213 229
214 if (efx->rx_dma_len <= EFX_RX_HALF_PAGE && 230 if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
215 page_count(rx_buf->page) == 1) 231 page_count(rx_buf->page) == 1)
216 efx_resurrect_rx_buffer(rx_queue, rx_buf); 232 efx_resurrect_rx_buffer(rx_queue, rx_buf);
217 233
218 index = rx_queue->added_count & rx_queue->ptr_mask; 234 index = rx_queue->added_count & rx_queue->ptr_mask;
219 new_buf = efx_rx_buffer(rx_queue, index); 235 new_buf = efx_rx_buffer(rx_queue, index);
220 236
221 memcpy(new_buf, rx_buf, sizeof(*new_buf)); 237 memcpy(new_buf, rx_buf, sizeof(*new_buf));
222 rx_buf->page = NULL; 238 rx_buf->page = NULL;
223 ++rx_queue->added_count; 239 ++rx_queue->added_count;
240
241 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
242 } while (--n_frags);
224} 243}
225 244
226/** 245/**
@@ -328,46 +347,56 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
328/* Pass a received packet up through GRO. GRO can handle pages 347/* Pass a received packet up through GRO. GRO can handle pages
329 * regardless of checksum state and skbs with a good checksum. 348 * regardless of checksum state and skbs with a good checksum.
330 */ 349 */
331static void efx_rx_packet_gro(struct efx_channel *channel, 350static void
332 struct efx_rx_buffer *rx_buf, 351efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
333 const u8 *eh) 352 unsigned int n_frags, u8 *eh)
334{ 353{
335 struct napi_struct *napi = &channel->napi_str; 354 struct napi_struct *napi = &channel->napi_str;
336 gro_result_t gro_result; 355 gro_result_t gro_result;
337 struct efx_nic *efx = channel->efx; 356 struct efx_nic *efx = channel->efx;
338 struct page *page = rx_buf->page;
339 struct sk_buff *skb; 357 struct sk_buff *skb;
340 358
341 rx_buf->page = NULL;
342
343 skb = napi_get_frags(napi); 359 skb = napi_get_frags(napi);
344 if (!skb) { 360 if (unlikely(!skb)) {
345 put_page(page); 361 while (n_frags--) {
362 put_page(rx_buf->page);
363 rx_buf->page = NULL;
364 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
365 }
346 return; 366 return;
347 } 367 }
348 368
349 if (efx->net_dev->features & NETIF_F_RXHASH) 369 if (efx->net_dev->features & NETIF_F_RXHASH)
350 skb->rxhash = efx_rx_buf_hash(eh); 370 skb->rxhash = efx_rx_buf_hash(eh);
351
352 skb_fill_page_desc(skb, 0, page, rx_buf->page_offset, rx_buf->len);
353
354 skb->len = rx_buf->len;
355 skb->data_len = rx_buf->len;
356 skb->truesize += rx_buf->len;
357 skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? 371 skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
358 CHECKSUM_UNNECESSARY : CHECKSUM_NONE); 372 CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
359 373
360 skb_record_rx_queue(skb, channel->rx_queue.core_index); 374 for (;;) {
375 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
376 rx_buf->page, rx_buf->page_offset,
377 rx_buf->len);
378 rx_buf->page = NULL;
379 skb->len += rx_buf->len;
380 if (skb_shinfo(skb)->nr_frags == n_frags)
381 break;
361 382
362 gro_result = napi_gro_frags(napi); 383 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
384 }
385
386 skb->data_len = skb->len;
387 skb->truesize += n_frags * efx->rx_buffer_truesize;
388
389 skb_record_rx_queue(skb, channel->rx_queue.core_index);
363 390
391 gro_result = napi_gro_frags(napi);
364 if (gro_result != GRO_DROP) 392 if (gro_result != GRO_DROP)
365 channel->irq_mod_score += 2; 393 channel->irq_mod_score += 2;
366} 394}
367 395
368/* Allocate and construct an SKB around a struct page.*/ 396/* Allocate and construct an SKB around page fragments */
369static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, 397static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
370 struct efx_rx_buffer *rx_buf, 398 struct efx_rx_buffer *rx_buf,
399 unsigned int n_frags,
371 u8 *eh, int hdr_len) 400 u8 *eh, int hdr_len)
372{ 401{
373 struct efx_nic *efx = channel->efx; 402 struct efx_nic *efx = channel->efx;
@@ -381,25 +410,32 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
381 EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); 410 EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
382 411
383 skb_reserve(skb, EFX_PAGE_SKB_ALIGN); 412 skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
413 memcpy(__skb_put(skb, hdr_len), eh, hdr_len);
384 414
385 skb->len = rx_buf->len; 415 /* Append the remaining page(s) onto the frag list */
386 skb->truesize = rx_buf->len + sizeof(struct sk_buff);
387 memcpy(skb->data, eh, hdr_len);
388 skb->tail += hdr_len;
389
390 /* Append the remaining page onto the frag list */
391 if (rx_buf->len > hdr_len) { 416 if (rx_buf->len > hdr_len) {
392 skb->data_len = skb->len - hdr_len; 417 rx_buf->page_offset += hdr_len;
393 skb_fill_page_desc(skb, 0, rx_buf->page, 418 rx_buf->len -= hdr_len;
394 rx_buf->page_offset + hdr_len, 419
395 skb->data_len); 420 for (;;) {
421 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
422 rx_buf->page, rx_buf->page_offset,
423 rx_buf->len);
424 rx_buf->page = NULL;
425 skb->len += rx_buf->len;
426 skb->data_len += rx_buf->len;
427 if (skb_shinfo(skb)->nr_frags == n_frags)
428 break;
429
430 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
431 }
396 } else { 432 } else {
397 __free_pages(rx_buf->page, efx->rx_buffer_order); 433 __free_pages(rx_buf->page, efx->rx_buffer_order);
398 skb->data_len = 0; 434 rx_buf->page = NULL;
435 n_frags = 0;
399 } 436 }
400 437
401 /* Ownership has transferred from the rx_buf to skb */ 438 skb->truesize += n_frags * efx->rx_buffer_truesize;
402 rx_buf->page = NULL;
403 439
404 /* Move past the ethernet header */ 440 /* Move past the ethernet header */
405 skb->protocol = eth_type_trans(skb, efx->net_dev); 441 skb->protocol = eth_type_trans(skb, efx->net_dev);
@@ -408,7 +444,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
408} 444}
409 445
410void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, 446void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
411 unsigned int len, u16 flags) 447 unsigned int n_frags, unsigned int len, u16 flags)
412{ 448{
413 struct efx_nic *efx = rx_queue->efx; 449 struct efx_nic *efx = rx_queue->efx;
414 struct efx_channel *channel = efx_rx_queue_channel(rx_queue); 450 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
@@ -417,35 +453,43 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
417 rx_buf = efx_rx_buffer(rx_queue, index); 453 rx_buf = efx_rx_buffer(rx_queue, index);
418 rx_buf->flags |= flags; 454 rx_buf->flags |= flags;
419 455
420 /* This allows the refill path to post another buffer. 456 /* Validate the number of fragments and completed length */
421 * EFX_RXD_HEAD_ROOM ensures that the slot we are using 457 if (n_frags == 1) {
422 * isn't overwritten yet. 458 efx_rx_packet__check_len(rx_queue, rx_buf, len);
423 */ 459 } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
424 rx_queue->removed_count++; 460 unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) ||
425 461 unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) ||
426 /* Validate the length encoded in the event vs the descriptor pushed */ 462 unlikely(!efx->rx_scatter)) {
427 efx_rx_packet__check_len(rx_queue, rx_buf, len); 463 /* If this isn't an explicit discard request, either
464 * the hardware or the driver is broken.
465 */
466 WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
467 rx_buf->flags |= EFX_RX_PKT_DISCARD;
468 }
428 469
429 netif_vdbg(efx, rx_status, efx->net_dev, 470 netif_vdbg(efx, rx_status, efx->net_dev,
430 "RX queue %d received id %x at %llx+%x %s%s\n", 471 "RX queue %d received ids %x-%x len %d %s%s\n",
431 efx_rx_queue_index(rx_queue), index, 472 efx_rx_queue_index(rx_queue), index,
432 (unsigned long long)rx_buf->dma_addr, len, 473 (index + n_frags - 1) & rx_queue->ptr_mask, len,
433 (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", 474 (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
434 (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); 475 (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
435 476
436 /* Discard packet, if instructed to do so */ 477 /* Discard packet, if instructed to do so. Process the
478 * previous receive first.
479 */
437 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { 480 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
438 efx_recycle_rx_buffer(channel, rx_buf); 481 efx_rx_flush_packet(channel);
439 482 efx_recycle_rx_buffers(channel, rx_buf, n_frags);
440 /* Don't hold off the previous receive */ 483 return;
441 rx_buf = NULL;
442 goto out;
443 } 484 }
444 485
486 if (n_frags == 1)
487 rx_buf->len = len;
488
445 /* Release and/or sync DMA mapping - assumes all RX buffers 489 /* Release and/or sync DMA mapping - assumes all RX buffers
446 * consumed in-order per RX queue 490 * consumed in-order per RX queue
447 */ 491 */
448 efx_unmap_rx_buffer(efx, rx_buf, len); 492 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
449 493
450 /* Prefetch nice and early so data will (hopefully) be in cache by 494 /* Prefetch nice and early so data will (hopefully) be in cache by
451 * the time we look at it. 495 * the time we look at it.
@@ -453,23 +497,40 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
453 prefetch(efx_rx_buf_va(rx_buf)); 497 prefetch(efx_rx_buf_va(rx_buf));
454 498
455 rx_buf->page_offset += efx->type->rx_buffer_hash_size; 499 rx_buf->page_offset += efx->type->rx_buffer_hash_size;
456 rx_buf->len = len - efx->type->rx_buffer_hash_size; 500 rx_buf->len -= efx->type->rx_buffer_hash_size;
501
502 if (n_frags > 1) {
503 /* Release/sync DMA mapping for additional fragments.
504 * Fix length for last fragment.
505 */
506 unsigned int tail_frags = n_frags - 1;
507
508 for (;;) {
509 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
510 if (--tail_frags == 0)
511 break;
512 efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
513 }
514 rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
515 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
516 }
457 517
458 /* Pipeline receives so that we give time for packet headers to be 518 /* Pipeline receives so that we give time for packet headers to be
459 * prefetched into cache. 519 * prefetched into cache.
460 */ 520 */
461out:
462 efx_rx_flush_packet(channel); 521 efx_rx_flush_packet(channel);
463 channel->rx_pkt = rx_buf; 522 channel->rx_pkt_n_frags = n_frags;
523 channel->rx_pkt_index = index;
464} 524}
465 525
466static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, 526static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
467 struct efx_rx_buffer *rx_buf) 527 struct efx_rx_buffer *rx_buf,
528 unsigned int n_frags)
468{ 529{
469 struct sk_buff *skb; 530 struct sk_buff *skb;
470 u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); 531 u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
471 532
472 skb = efx_rx_mk_skb(channel, rx_buf, eh, hdr_len); 533 skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
473 if (unlikely(skb == NULL)) { 534 if (unlikely(skb == NULL)) {
474 efx_free_rx_buffer(channel->efx, rx_buf); 535 efx_free_rx_buffer(channel->efx, rx_buf);
475 return; 536 return;
@@ -488,9 +549,11 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
488} 549}
489 550
490/* Handle a received packet. Second half: Touches packet payload. */ 551/* Handle a received packet. Second half: Touches packet payload. */
491void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) 552void __efx_rx_packet(struct efx_channel *channel)
492{ 553{
493 struct efx_nic *efx = channel->efx; 554 struct efx_nic *efx = channel->efx;
555 struct efx_rx_buffer *rx_buf =
556 efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
494 u8 *eh = efx_rx_buf_va(rx_buf); 557 u8 *eh = efx_rx_buf_va(rx_buf);
495 558
496 /* If we're in loopback test, then pass the packet directly to the 559 /* If we're in loopback test, then pass the packet directly to the
@@ -499,16 +562,18 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
499 if (unlikely(efx->loopback_selftest)) { 562 if (unlikely(efx->loopback_selftest)) {
500 efx_loopback_rx_packet(efx, eh, rx_buf->len); 563 efx_loopback_rx_packet(efx, eh, rx_buf->len);
501 efx_free_rx_buffer(efx, rx_buf); 564 efx_free_rx_buffer(efx, rx_buf);
502 return; 565 goto out;
503 } 566 }
504 567
505 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) 568 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
506 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; 569 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
507 570
508 if (!channel->type->receive_skb) 571 if (!channel->type->receive_skb)
509 efx_rx_packet_gro(channel, rx_buf, eh); 572 efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
510 else 573 else
511 efx_rx_deliver(channel, eh, rx_buf); 574 efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
575out:
576 channel->rx_pkt_n_frags = 0;
512} 577}
513 578
514int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) 579int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index e07ff0d3f26b..51669244d154 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -414,6 +414,8 @@ static int siena_init_nic(struct efx_nic *efx)
414 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1); 414 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1);
415 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1); 415 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1);
416 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1); 416 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1);
417 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE,
418 EFX_RX_USR_BUF_SIZE >> 5);
417 efx_writeo(efx, &temp, FR_AZ_RX_CFG); 419 efx_writeo(efx, &temp, FR_AZ_RX_CFG);
418 420
419 /* Set hash key for IPv4 */ 421 /* Set hash key for IPv4 */
@@ -718,6 +720,7 @@ const struct efx_nic_type siena_a0_nic_type = {
718 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 720 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
719 .rx_buffer_hash_size = 0x10, 721 .rx_buffer_hash_size = 0x10,
720 .rx_buffer_padding = 0, 722 .rx_buffer_padding = 0,
723 .can_rx_scatter = true,
721 .max_interrupt_mode = EFX_INT_MODE_MSIX, 724 .max_interrupt_mode = EFX_INT_MODE_MSIX,
722 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy 725 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
723 * interrupt handler only supports 32 726 * interrupt handler only supports 32