aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/sfc/efx.c34
-rw-r--r--drivers/net/ethernet/sfc/efx.h13
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon.c17
-rw-r--r--drivers/net/ethernet/sfc/filter.c74
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h35
-rw-r--r--drivers/net/ethernet/sfc/nic.c90
-rw-r--r--drivers/net/ethernet/sfc/rx.c211
-rw-r--r--drivers/net/ethernet/sfc/siena.c3
9 files changed, 363 insertions, 118 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index f8013c3ea37c..1213af5024d1 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -88,8 +88,6 @@ const char *const efx_reset_type_names[] = {
88 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 88 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
89}; 89};
90 90
91#define EFX_MAX_MTU (9 * 1024)
92
93/* Reset workqueue. If any NIC has a hardware failure then a reset will be 91/* Reset workqueue. If any NIC has a hardware failure then a reset will be
94 * queued onto this work queue. This is not a per-nic work queue, because 92 * queued onto this work queue. This is not a per-nic work queue, because
95 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 93 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
@@ -627,9 +625,11 @@ fail:
627 */ 625 */
628static void efx_start_datapath(struct efx_nic *efx) 626static void efx_start_datapath(struct efx_nic *efx)
629{ 627{
628 bool old_rx_scatter = efx->rx_scatter;
630 struct efx_tx_queue *tx_queue; 629 struct efx_tx_queue *tx_queue;
631 struct efx_rx_queue *rx_queue; 630 struct efx_rx_queue *rx_queue;
632 struct efx_channel *channel; 631 struct efx_channel *channel;
632 size_t rx_buf_len;
633 633
634 /* Calculate the rx buffer allocation parameters required to 634 /* Calculate the rx buffer allocation parameters required to
635 * support the current MTU, including padding for header 635 * support the current MTU, including padding for header
@@ -638,8 +638,32 @@ static void efx_start_datapath(struct efx_nic *efx)
638 efx->rx_dma_len = (efx->type->rx_buffer_hash_size + 638 efx->rx_dma_len = (efx->type->rx_buffer_hash_size +
639 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 639 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
640 efx->type->rx_buffer_padding); 640 efx->type->rx_buffer_padding);
641 efx->rx_buffer_order = get_order(sizeof(struct efx_rx_page_state) + 641 rx_buf_len = (sizeof(struct efx_rx_page_state) +
642 EFX_PAGE_IP_ALIGN + efx->rx_dma_len); 642 EFX_PAGE_IP_ALIGN + efx->rx_dma_len);
643 if (rx_buf_len <= PAGE_SIZE) {
644 efx->rx_scatter = false;
645 efx->rx_buffer_order = 0;
646 if (rx_buf_len <= PAGE_SIZE / 2)
647 efx->rx_buffer_truesize = PAGE_SIZE / 2;
648 else
649 efx->rx_buffer_truesize = PAGE_SIZE;
650 } else if (efx->type->can_rx_scatter) {
651 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
652 EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE >
653 PAGE_SIZE / 2);
654 efx->rx_scatter = true;
655 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
656 efx->rx_buffer_order = 0;
657 efx->rx_buffer_truesize = PAGE_SIZE / 2;
658 } else {
659 efx->rx_scatter = false;
660 efx->rx_buffer_order = get_order(rx_buf_len);
661 efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
662 }
663
664 /* RX filters also have scatter-enabled flags */
665 if (efx->rx_scatter != old_rx_scatter)
666 efx_filter_update_rx_scatter(efx);
643 667
644 /* We must keep at least one descriptor in a TX ring empty. 668 /* We must keep at least one descriptor in a TX ring empty.
645 * We could avoid this when the queue size does not exactly 669 * We could avoid this when the queue size does not exactly
@@ -661,7 +685,7 @@ static void efx_start_datapath(struct efx_nic *efx)
661 efx_nic_generate_fill_event(rx_queue); 685 efx_nic_generate_fill_event(rx_queue);
662 } 686 }
663 687
664 WARN_ON(channel->rx_pkt != NULL); 688 WARN_ON(channel->rx_pkt_n_frags);
665 } 689 }
666 690
667 if (netif_device_present(efx->net_dev)) 691 if (netif_device_present(efx->net_dev))
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 00e7077fa1d8..211da79a65e8 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -39,16 +39,14 @@ extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
39extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); 39extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
40extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); 40extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue);
41extern void efx_rx_slow_fill(unsigned long context); 41extern void efx_rx_slow_fill(unsigned long context);
42extern void __efx_rx_packet(struct efx_channel *channel, 42extern void __efx_rx_packet(struct efx_channel *channel);
43 struct efx_rx_buffer *rx_buf); 43extern void efx_rx_packet(struct efx_rx_queue *rx_queue,
44extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, 44 unsigned int index, unsigned int n_frags,
45 unsigned int len, u16 flags); 45 unsigned int len, u16 flags);
46static inline void efx_rx_flush_packet(struct efx_channel *channel) 46static inline void efx_rx_flush_packet(struct efx_channel *channel)
47{ 47{
48 if (channel->rx_pkt) { 48 if (channel->rx_pkt_n_frags)
49 __efx_rx_packet(channel, channel->rx_pkt); 49 __efx_rx_packet(channel);
50 channel->rx_pkt = NULL;
51 }
52} 50}
53extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); 51extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
54 52
@@ -73,6 +71,7 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
73extern int efx_probe_filters(struct efx_nic *efx); 71extern int efx_probe_filters(struct efx_nic *efx);
74extern void efx_restore_filters(struct efx_nic *efx); 72extern void efx_restore_filters(struct efx_nic *efx);
75extern void efx_remove_filters(struct efx_nic *efx); 73extern void efx_remove_filters(struct efx_nic *efx);
74extern void efx_filter_update_rx_scatter(struct efx_nic *efx);
76extern s32 efx_filter_insert_filter(struct efx_nic *efx, 75extern s32 efx_filter_insert_filter(struct efx_nic *efx,
77 struct efx_filter_spec *spec, 76 struct efx_filter_spec *spec,
78 bool replace); 77 bool replace);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 8e61cd06f66a..6e768175e7e0 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -154,6 +154,7 @@ static const struct efx_ethtool_stat efx_ethtool_stats[] = {
154 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), 154 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
155 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), 155 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
156 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), 156 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
157 EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc),
157}; 158};
158 159
159/* Number of ethtool statistics */ 160/* Number of ethtool statistics */
@@ -978,7 +979,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
978 rule->m_ext.data[1])) 979 rule->m_ext.data[1]))
979 return -EINVAL; 980 return -EINVAL;
980 981
981 efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, 982 efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
983 efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
982 (rule->ring_cookie == RX_CLS_FLOW_DISC) ? 984 (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
983 0xfff : rule->ring_cookie); 985 0xfff : rule->ring_cookie);
984 986
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index 49bcd196e10d..4486102fa9b3 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -1546,10 +1546,6 @@ static int falcon_probe_nic(struct efx_nic *efx)
1546 1546
1547static void falcon_init_rx_cfg(struct efx_nic *efx) 1547static void falcon_init_rx_cfg(struct efx_nic *efx)
1548{ 1548{
1549 /* Prior to Siena the RX DMA engine will split each frame at
1550 * intervals of RX_USR_BUF_SIZE (32-byte units). We set it to
1551 * be so large that that never happens. */
1552 const unsigned huge_buf_size = (3 * 4096) >> 5;
1553 /* RX control FIFO thresholds (32 entries) */ 1549 /* RX control FIFO thresholds (32 entries) */
1554 const unsigned ctrl_xon_thr = 20; 1550 const unsigned ctrl_xon_thr = 20;
1555 const unsigned ctrl_xoff_thr = 25; 1551 const unsigned ctrl_xoff_thr = 25;
@@ -1557,10 +1553,15 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
1557 1553
1558 efx_reado(efx, &reg, FR_AZ_RX_CFG); 1554 efx_reado(efx, &reg, FR_AZ_RX_CFG);
1559 if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) { 1555 if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
1560 /* Data FIFO size is 5.5K */ 1556 /* Data FIFO size is 5.5K. The RX DMA engine only
1557 * supports scattering for user-mode queues, but will
1558 * split DMA writes at intervals of RX_USR_BUF_SIZE
1559 * (32-byte units) even for kernel-mode queues. We
1560 * set it to be so large that that never happens.
1561 */
1561 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0); 1562 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
1562 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE, 1563 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
1563 huge_buf_size); 1564 (3 * 4096) >> 5);
1564 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8); 1565 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
1565 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8); 1566 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
1566 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr); 1567 EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
@@ -1569,7 +1570,7 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
1569 /* Data FIFO size is 80K; register fields moved */ 1570 /* Data FIFO size is 80K; register fields moved */
1570 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0); 1571 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
1571 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE, 1572 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
1572 huge_buf_size); 1573 EFX_RX_USR_BUF_SIZE >> 5);
1573 /* Send XON and XOFF at ~3 * max MTU away from empty/full */ 1574 /* Send XON and XOFF at ~3 * max MTU away from empty/full */
1574 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8); 1575 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
1575 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8); 1576 EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
@@ -1815,6 +1816,7 @@ const struct efx_nic_type falcon_a1_nic_type = {
1815 .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER, 1816 .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER,
1816 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 1817 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
1817 .rx_buffer_padding = 0x24, 1818 .rx_buffer_padding = 0x24,
1819 .can_rx_scatter = false,
1818 .max_interrupt_mode = EFX_INT_MODE_MSI, 1820 .max_interrupt_mode = EFX_INT_MODE_MSI,
1819 .phys_addr_channels = 4, 1821 .phys_addr_channels = 4,
1820 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, 1822 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH,
@@ -1865,6 +1867,7 @@ const struct efx_nic_type falcon_b0_nic_type = {
1865 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 1867 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
1866 .rx_buffer_hash_size = 0x10, 1868 .rx_buffer_hash_size = 0x10,
1867 .rx_buffer_padding = 0, 1869 .rx_buffer_padding = 0,
1870 .can_rx_scatter = true,
1868 .max_interrupt_mode = EFX_INT_MODE_MSIX, 1871 .max_interrupt_mode = EFX_INT_MODE_MSIX,
1869 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy 1872 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
1870 * interrupt handler only supports 32 1873 * interrupt handler only supports 32
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index 61b4408bbdb8..2397f0e8d3eb 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -172,6 +172,25 @@ static void efx_filter_push_rx_config(struct efx_nic *efx)
172 filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, 172 filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
173 !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & 173 !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
174 EFX_FILTER_FLAG_RX_RSS)); 174 EFX_FILTER_FLAG_RX_RSS));
175
176 /* There is a single bit to enable RX scatter for all
177 * unmatched packets. Only set it if scatter is
178 * enabled in both filter specs.
179 */
180 EFX_SET_OWORD_FIELD(
181 filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
182 !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags &
183 table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
184 EFX_FILTER_FLAG_RX_SCATTER));
185 } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
186 /* We don't expose 'default' filters because unmatched
187 * packets always go to the queue number found in the
188 * RSS table. But we still need to set the RX scatter
189 * bit here.
190 */
191 EFX_SET_OWORD_FIELD(
192 filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
193 efx->rx_scatter);
175 } 194 }
176 195
177 efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); 196 efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
@@ -413,13 +432,18 @@ static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx)
413 struct efx_filter_state *state = efx->filter_state; 432 struct efx_filter_state *state = efx->filter_state;
414 struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; 433 struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF];
415 struct efx_filter_spec *spec = &table->spec[filter_idx]; 434 struct efx_filter_spec *spec = &table->spec[filter_idx];
435 enum efx_filter_flags flags = 0;
416 436
417 /* If there's only one channel then disable RSS for non VF 437 /* If there's only one channel then disable RSS for non VF
418 * traffic, thereby allowing VFs to use RSS when the PF can't. 438 * traffic, thereby allowing VFs to use RSS when the PF can't.
419 */ 439 */
420 efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, 440 if (efx->n_rx_channels > 1)
421 efx->n_rx_channels > 1 ? EFX_FILTER_FLAG_RX_RSS : 0, 441 flags |= EFX_FILTER_FLAG_RX_RSS;
422 0); 442
443 if (efx->rx_scatter)
444 flags |= EFX_FILTER_FLAG_RX_SCATTER;
445
446 efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, flags, 0);
423 spec->type = EFX_FILTER_UC_DEF + filter_idx; 447 spec->type = EFX_FILTER_UC_DEF + filter_idx;
424 table->used_bitmap[0] |= 1 << filter_idx; 448 table->used_bitmap[0] |= 1 << filter_idx;
425} 449}
@@ -1101,6 +1125,50 @@ void efx_remove_filters(struct efx_nic *efx)
1101 kfree(state); 1125 kfree(state);
1102} 1126}
1103 1127
1128/* Update scatter enable flags for filters pointing to our own RX queues */
1129void efx_filter_update_rx_scatter(struct efx_nic *efx)
1130{
1131 struct efx_filter_state *state = efx->filter_state;
1132 enum efx_filter_table_id table_id;
1133 struct efx_filter_table *table;
1134 efx_oword_t filter;
1135 unsigned int filter_idx;
1136
1137 spin_lock_bh(&state->lock);
1138
1139 for (table_id = EFX_FILTER_TABLE_RX_IP;
1140 table_id <= EFX_FILTER_TABLE_RX_DEF;
1141 table_id++) {
1142 table = &state->table[table_id];
1143
1144 for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
1145 if (!test_bit(filter_idx, table->used_bitmap) ||
1146 table->spec[filter_idx].dmaq_id >=
1147 efx->n_rx_channels)
1148 continue;
1149
1150 if (efx->rx_scatter)
1151 table->spec[filter_idx].flags |=
1152 EFX_FILTER_FLAG_RX_SCATTER;
1153 else
1154 table->spec[filter_idx].flags &=
1155 ~EFX_FILTER_FLAG_RX_SCATTER;
1156
1157 if (table_id == EFX_FILTER_TABLE_RX_DEF)
1158 /* Pushed by efx_filter_push_rx_config() */
1159 continue;
1160
1161 efx_filter_build(&filter, &table->spec[filter_idx]);
1162 efx_writeo(efx, &filter,
1163 table->offset + table->step * filter_idx);
1164 }
1165 }
1166
1167 efx_filter_push_rx_config(efx);
1168
1169 spin_unlock_bh(&state->lock);
1170}
1171
1104#ifdef CONFIG_RFS_ACCEL 1172#ifdef CONFIG_RFS_ACCEL
1105 1173
1106int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, 1174int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1bc911f980b5..e41b54bada7c 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -69,6 +69,12 @@
69#define EFX_TXQ_TYPES 4 69#define EFX_TXQ_TYPES 4
70#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) 70#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS)
71 71
72/* Maximum possible MTU the driver supports */
73#define EFX_MAX_MTU (9 * 1024)
74
75/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page. */
76#define EFX_RX_USR_BUF_SIZE 1824
77
72/* Forward declare Precision Time Protocol (PTP) support structure. */ 78/* Forward declare Precision Time Protocol (PTP) support structure. */
73struct efx_ptp_data; 79struct efx_ptp_data;
74 80
@@ -212,7 +218,8 @@ struct efx_tx_queue {
212 * If completed: offset in @page of Ethernet header. 218 * If completed: offset in @page of Ethernet header.
213 * @len: If pending: length for DMA descriptor. 219 * @len: If pending: length for DMA descriptor.
214 * If completed: received length, excluding hash prefix. 220 * If completed: received length, excluding hash prefix.
215 * @flags: Flags for buffer and packet state. 221 * @flags: Flags for buffer and packet state. These are only set on the
222 * first buffer of a scattered packet.
216 */ 223 */
217struct efx_rx_buffer { 224struct efx_rx_buffer {
218 dma_addr_t dma_addr; 225 dma_addr_t dma_addr;
@@ -256,6 +263,7 @@ struct efx_rx_page_state {
256 * @added_count: Number of buffers added to the receive queue. 263 * @added_count: Number of buffers added to the receive queue.
257 * @notified_count: Number of buffers given to NIC (<= @added_count). 264 * @notified_count: Number of buffers given to NIC (<= @added_count).
258 * @removed_count: Number of buffers removed from the receive queue. 265 * @removed_count: Number of buffers removed from the receive queue.
266 * @scatter_n: Number of buffers used by current packet
259 * @max_fill: RX descriptor maximum fill level (<= ring size) 267 * @max_fill: RX descriptor maximum fill level (<= ring size)
260 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill 268 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
261 * (<= @max_fill) 269 * (<= @max_fill)
@@ -276,6 +284,7 @@ struct efx_rx_queue {
276 unsigned int added_count; 284 unsigned int added_count;
277 unsigned int notified_count; 285 unsigned int notified_count;
278 unsigned int removed_count; 286 unsigned int removed_count;
287 unsigned int scatter_n;
279 unsigned int max_fill; 288 unsigned int max_fill;
280 unsigned int fast_fill_trigger; 289 unsigned int fast_fill_trigger;
281 unsigned int min_fill; 290 unsigned int min_fill;
@@ -335,6 +344,12 @@ enum efx_rx_alloc_method {
335 * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors 344 * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
336 * @n_rx_overlength: Count of RX_OVERLENGTH errors 345 * @n_rx_overlength: Count of RX_OVERLENGTH errors
337 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun 346 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
347 * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
348 * lack of descriptors
349 * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
350 * __efx_rx_packet(), or zero if there is none
351 * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
352 * by __efx_rx_packet(), if @rx_pkt_n_frags != 0
338 * @rx_queue: RX queue for this channel 353 * @rx_queue: RX queue for this channel
339 * @tx_queue: TX queues for this channel 354 * @tx_queue: TX queues for this channel
340 */ 355 */
@@ -366,11 +381,10 @@ struct efx_channel {
366 unsigned n_rx_frm_trunc; 381 unsigned n_rx_frm_trunc;
367 unsigned n_rx_overlength; 382 unsigned n_rx_overlength;
368 unsigned n_skbuff_leaks; 383 unsigned n_skbuff_leaks;
384 unsigned int n_rx_nodesc_trunc;
369 385
370 /* Used to pipeline received packets in order to optimise memory 386 unsigned int rx_pkt_n_frags;
371 * access with prefetches. 387 unsigned int rx_pkt_index;
372 */
373 struct efx_rx_buffer *rx_pkt;
374 388
375 struct efx_rx_queue rx_queue; 389 struct efx_rx_queue rx_queue;
376 struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; 390 struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
@@ -672,8 +686,11 @@ struct vfdi_status;
672 * @n_tx_channels: Number of channels used for TX 686 * @n_tx_channels: Number of channels used for TX
673 * @rx_dma_len: Current maximum RX DMA length 687 * @rx_dma_len: Current maximum RX DMA length
674 * @rx_buffer_order: Order (log2) of number of pages for each RX buffer 688 * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
689 * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
690 * for use in sk_buff::truesize
675 * @rx_hash_key: Toeplitz hash key for RSS 691 * @rx_hash_key: Toeplitz hash key for RSS
676 * @rx_indir_table: Indirection table for RSS 692 * @rx_indir_table: Indirection table for RSS
693 * @rx_scatter: Scatter mode enabled for receives
677 * @int_error_count: Number of internal errors seen recently 694 * @int_error_count: Number of internal errors seen recently
678 * @int_error_expire: Time at which error count will be expired 695 * @int_error_expire: Time at which error count will be expired
679 * @irq_status: Interrupt status buffer 696 * @irq_status: Interrupt status buffer
@@ -788,8 +805,10 @@ struct efx_nic {
788 unsigned n_tx_channels; 805 unsigned n_tx_channels;
789 unsigned int rx_dma_len; 806 unsigned int rx_dma_len;
790 unsigned int rx_buffer_order; 807 unsigned int rx_buffer_order;
808 unsigned int rx_buffer_truesize;
791 u8 rx_hash_key[40]; 809 u8 rx_hash_key[40];
792 u32 rx_indir_table[128]; 810 u32 rx_indir_table[128];
811 bool rx_scatter;
793 812
794 unsigned int_error_count; 813 unsigned int_error_count;
795 unsigned long int_error_expire; 814 unsigned long int_error_expire;
@@ -920,8 +939,9 @@ static inline unsigned int efx_port_num(struct efx_nic *efx)
920 * @evq_ptr_tbl_base: Event queue pointer table base address 939 * @evq_ptr_tbl_base: Event queue pointer table base address
921 * @evq_rptr_tbl_base: Event queue read-pointer table base address 940 * @evq_rptr_tbl_base: Event queue read-pointer table base address
922 * @max_dma_mask: Maximum possible DMA mask 941 * @max_dma_mask: Maximum possible DMA mask
923 * @rx_buffer_hash_size: Size of hash at start of RX buffer 942 * @rx_buffer_hash_size: Size of hash at start of RX packet
924 * @rx_buffer_padding: Size of padding at end of RX buffer 943 * @rx_buffer_padding: Size of padding at end of RX packet
944 * @can_rx_scatter: NIC is able to scatter packet to multiple buffers
925 * @max_interrupt_mode: Highest capability interrupt mode supported 945 * @max_interrupt_mode: Highest capability interrupt mode supported
926 * from &enum efx_init_mode. 946 * from &enum efx_init_mode.
927 * @phys_addr_channels: Number of channels with physically addressed 947 * @phys_addr_channels: Number of channels with physically addressed
@@ -969,6 +989,7 @@ struct efx_nic_type {
969 u64 max_dma_mask; 989 u64 max_dma_mask;
970 unsigned int rx_buffer_hash_size; 990 unsigned int rx_buffer_hash_size;
971 unsigned int rx_buffer_padding; 991 unsigned int rx_buffer_padding;
992 bool can_rx_scatter;
972 unsigned int max_interrupt_mode; 993 unsigned int max_interrupt_mode;
973 unsigned int phys_addr_channels; 994 unsigned int phys_addr_channels;
974 unsigned int timer_period_max; 995 unsigned int timer_period_max;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 0ad790cc473c..f9f5df8b51fe 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -591,12 +591,22 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
591 struct efx_nic *efx = rx_queue->efx; 591 struct efx_nic *efx = rx_queue->efx;
592 bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; 592 bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0;
593 bool iscsi_digest_en = is_b0; 593 bool iscsi_digest_en = is_b0;
594 bool jumbo_en;
595
596 /* For kernel-mode queues in Falcon A1, the JUMBO flag enables
597 * DMA to continue after a PCIe page boundary (and scattering
598 * is not possible). In Falcon B0 and Siena, it enables
599 * scatter.
600 */
601 jumbo_en = !is_b0 || efx->rx_scatter;
594 602
595 netif_dbg(efx, hw, efx->net_dev, 603 netif_dbg(efx, hw, efx->net_dev,
596 "RX queue %d ring in special buffers %d-%d\n", 604 "RX queue %d ring in special buffers %d-%d\n",
597 efx_rx_queue_index(rx_queue), rx_queue->rxd.index, 605 efx_rx_queue_index(rx_queue), rx_queue->rxd.index,
598 rx_queue->rxd.index + rx_queue->rxd.entries - 1); 606 rx_queue->rxd.index + rx_queue->rxd.entries - 1);
599 607
608 rx_queue->scatter_n = 0;
609
600 /* Pin RX descriptor ring */ 610 /* Pin RX descriptor ring */
601 efx_init_special_buffer(efx, &rx_queue->rxd); 611 efx_init_special_buffer(efx, &rx_queue->rxd);
602 612
@@ -613,8 +623,7 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
613 FRF_AZ_RX_DESCQ_SIZE, 623 FRF_AZ_RX_DESCQ_SIZE,
614 __ffs(rx_queue->rxd.entries), 624 __ffs(rx_queue->rxd.entries),
615 FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , 625 FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
616 /* For >=B0 this is scatter so disable */ 626 FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
617 FRF_AZ_RX_DESCQ_JUMBO, !is_b0,
618 FRF_AZ_RX_DESCQ_EN, 1); 627 FRF_AZ_RX_DESCQ_EN, 1);
619 efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, 628 efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
620 efx_rx_queue_index(rx_queue)); 629 efx_rx_queue_index(rx_queue));
@@ -968,13 +977,24 @@ static u16 efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
968 EFX_RX_PKT_DISCARD : 0; 977 EFX_RX_PKT_DISCARD : 0;
969} 978}
970 979
971/* Handle receive events that are not in-order. */ 980/* Handle receive events that are not in-order. Return true if this
972static void 981 * can be handled as a partial packet discard, false if it's more
982 * serious.
983 */
984static bool
973efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) 985efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
974{ 986{
987 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
975 struct efx_nic *efx = rx_queue->efx; 988 struct efx_nic *efx = rx_queue->efx;
976 unsigned expected, dropped; 989 unsigned expected, dropped;
977 990
991 if (rx_queue->scatter_n &&
992 index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
993 rx_queue->ptr_mask)) {
994 ++channel->n_rx_nodesc_trunc;
995 return true;
996 }
997
978 expected = rx_queue->removed_count & rx_queue->ptr_mask; 998 expected = rx_queue->removed_count & rx_queue->ptr_mask;
979 dropped = (index - expected) & rx_queue->ptr_mask; 999 dropped = (index - expected) & rx_queue->ptr_mask;
980 netif_info(efx, rx_err, efx->net_dev, 1000 netif_info(efx, rx_err, efx->net_dev,
@@ -983,6 +1003,7 @@ efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
983 1003
984 efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? 1004 efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
985 RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); 1005 RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
1006 return false;
986} 1007}
987 1008
988/* Handle a packet received event 1009/* Handle a packet received event
@@ -998,7 +1019,7 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
998 unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; 1019 unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
999 unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; 1020 unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
1000 unsigned expected_ptr; 1021 unsigned expected_ptr;
1001 bool rx_ev_pkt_ok; 1022 bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
1002 u16 flags; 1023 u16 flags;
1003 struct efx_rx_queue *rx_queue; 1024 struct efx_rx_queue *rx_queue;
1004 struct efx_nic *efx = channel->efx; 1025 struct efx_nic *efx = channel->efx;
@@ -1006,21 +1027,56 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
1006 if (unlikely(ACCESS_ONCE(efx->reset_pending))) 1027 if (unlikely(ACCESS_ONCE(efx->reset_pending)))
1007 return; 1028 return;
1008 1029
1009 /* Basic packet information */ 1030 rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
1010 rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); 1031 rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
1011 rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
1012 rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
1013 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT));
1014 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1);
1015 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != 1032 WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
1016 channel->channel); 1033 channel->channel);
1017 1034
1018 rx_queue = efx_channel_get_rx_queue(channel); 1035 rx_queue = efx_channel_get_rx_queue(channel);
1019 1036
1020 rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); 1037 rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
1021 expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask; 1038 expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
1022 if (unlikely(rx_ev_desc_ptr != expected_ptr)) 1039 rx_queue->ptr_mask);
1023 efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); 1040
1041 /* Check for partial drops and other errors */
1042 if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
1043 unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
1044 if (rx_ev_desc_ptr != expected_ptr &&
1045 !efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
1046 return;
1047
1048 /* Discard all pending fragments */
1049 if (rx_queue->scatter_n) {
1050 efx_rx_packet(
1051 rx_queue,
1052 rx_queue->removed_count & rx_queue->ptr_mask,
1053 rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD);
1054 rx_queue->removed_count += rx_queue->scatter_n;
1055 rx_queue->scatter_n = 0;
1056 }
1057
1058 /* Return if there is no new fragment */
1059 if (rx_ev_desc_ptr != expected_ptr)
1060 return;
1061
1062 /* Discard new fragment if not SOP */
1063 if (!rx_ev_sop) {
1064 efx_rx_packet(
1065 rx_queue,
1066 rx_queue->removed_count & rx_queue->ptr_mask,
1067 1, 0, EFX_RX_PKT_DISCARD);
1068 ++rx_queue->removed_count;
1069 return;
1070 }
1071 }
1072
1073 ++rx_queue->scatter_n;
1074 if (rx_ev_cont)
1075 return;
1076
1077 rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
1078 rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
1079 rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
1024 1080
1025 if (likely(rx_ev_pkt_ok)) { 1081 if (likely(rx_ev_pkt_ok)) {
1026 /* If packet is marked as OK and packet type is TCP/IP or 1082 /* If packet is marked as OK and packet type is TCP/IP or
@@ -1048,7 +1104,11 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
1048 channel->irq_mod_score += 2; 1104 channel->irq_mod_score += 2;
1049 1105
1050 /* Handle received packet */ 1106 /* Handle received packet */
1051 efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags); 1107 efx_rx_packet(rx_queue,
1108 rx_queue->removed_count & rx_queue->ptr_mask,
1109 rx_queue->scatter_n, rx_ev_byte_cnt, flags);
1110 rx_queue->removed_count += rx_queue->scatter_n;
1111 rx_queue->scatter_n = 0;
1052} 1112}
1053 1113
1054/* If this flush done event corresponds to a &struct efx_tx_queue, then 1114/* If this flush done event corresponds to a &struct efx_tx_queue, then
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 04518722ac1d..88aa1ff01e3f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -39,13 +39,17 @@
39 */ 39 */
40static unsigned int rx_refill_threshold; 40static unsigned int rx_refill_threshold;
41 41
42/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
43#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
44 EFX_RX_USR_BUF_SIZE)
45
42/* 46/*
43 * RX maximum head room required. 47 * RX maximum head room required.
44 * 48 *
45 * This must be at least 1 to prevent overflow and at least 2 to allow 49 * This must be at least 1 to prevent overflow, plus one packet-worth
46 * pipelined receives. 50 * to allow pipelined receives.
47 */ 51 */
48#define EFX_RXD_HEAD_ROOM 2 52#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
49 53
50static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) 54static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
51{ 55{
@@ -66,6 +70,15 @@ static inline u32 efx_rx_buf_hash(const u8 *eh)
66#endif 70#endif
67} 71}
68 72
73static inline struct efx_rx_buffer *
74efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
75{
76 if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
77 return efx_rx_buffer(rx_queue, 0);
78 else
79 return rx_buf + 1;
80}
81
69/** 82/**
70 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers 83 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
71 * 84 *
@@ -199,28 +212,34 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
199 ++rx_queue->added_count; 212 ++rx_queue->added_count;
200} 213}
201 214
202/* Recycle the given rx buffer directly back into the rx_queue. There is 215/* Recycle buffers directly back into the rx_queue. There is always
203 * always room to add this buffer, because we've just popped a buffer. */ 216 * room to add these buffer, because we've just popped them.
204static void efx_recycle_rx_buffer(struct efx_channel *channel, 217 */
205 struct efx_rx_buffer *rx_buf) 218static void efx_recycle_rx_buffers(struct efx_channel *channel,
219 struct efx_rx_buffer *rx_buf,
220 unsigned int n_frags)
206{ 221{
207 struct efx_nic *efx = channel->efx; 222 struct efx_nic *efx = channel->efx;
208 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); 223 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
209 struct efx_rx_buffer *new_buf; 224 struct efx_rx_buffer *new_buf;
210 unsigned index; 225 unsigned index;
211 226
212 rx_buf->flags = 0; 227 do {
228 rx_buf->flags = 0;
213 229
214 if (efx->rx_dma_len <= EFX_RX_HALF_PAGE && 230 if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
215 page_count(rx_buf->page) == 1) 231 page_count(rx_buf->page) == 1)
216 efx_resurrect_rx_buffer(rx_queue, rx_buf); 232 efx_resurrect_rx_buffer(rx_queue, rx_buf);
217 233
218 index = rx_queue->added_count & rx_queue->ptr_mask; 234 index = rx_queue->added_count & rx_queue->ptr_mask;
219 new_buf = efx_rx_buffer(rx_queue, index); 235 new_buf = efx_rx_buffer(rx_queue, index);
220 236
221 memcpy(new_buf, rx_buf, sizeof(*new_buf)); 237 memcpy(new_buf, rx_buf, sizeof(*new_buf));
222 rx_buf->page = NULL; 238 rx_buf->page = NULL;
223 ++rx_queue->added_count; 239 ++rx_queue->added_count;
240
241 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
242 } while (--n_frags);
224} 243}
225 244
226/** 245/**
@@ -328,46 +347,56 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
328/* Pass a received packet up through GRO. GRO can handle pages 347/* Pass a received packet up through GRO. GRO can handle pages
329 * regardless of checksum state and skbs with a good checksum. 348 * regardless of checksum state and skbs with a good checksum.
330 */ 349 */
331static void efx_rx_packet_gro(struct efx_channel *channel, 350static void
332 struct efx_rx_buffer *rx_buf, 351efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
333 const u8 *eh) 352 unsigned int n_frags, u8 *eh)
334{ 353{
335 struct napi_struct *napi = &channel->napi_str; 354 struct napi_struct *napi = &channel->napi_str;
336 gro_result_t gro_result; 355 gro_result_t gro_result;
337 struct efx_nic *efx = channel->efx; 356 struct efx_nic *efx = channel->efx;
338 struct page *page = rx_buf->page;
339 struct sk_buff *skb; 357 struct sk_buff *skb;
340 358
341 rx_buf->page = NULL;
342
343 skb = napi_get_frags(napi); 359 skb = napi_get_frags(napi);
344 if (!skb) { 360 if (unlikely(!skb)) {
345 put_page(page); 361 while (n_frags--) {
362 put_page(rx_buf->page);
363 rx_buf->page = NULL;
364 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
365 }
346 return; 366 return;
347 } 367 }
348 368
349 if (efx->net_dev->features & NETIF_F_RXHASH) 369 if (efx->net_dev->features & NETIF_F_RXHASH)
350 skb->rxhash = efx_rx_buf_hash(eh); 370 skb->rxhash = efx_rx_buf_hash(eh);
351
352 skb_fill_page_desc(skb, 0, page, rx_buf->page_offset, rx_buf->len);
353
354 skb->len = rx_buf->len;
355 skb->data_len = rx_buf->len;
356 skb->truesize += rx_buf->len;
357 skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? 371 skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
358 CHECKSUM_UNNECESSARY : CHECKSUM_NONE); 372 CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
359 373
360 skb_record_rx_queue(skb, channel->rx_queue.core_index); 374 for (;;) {
375 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
376 rx_buf->page, rx_buf->page_offset,
377 rx_buf->len);
378 rx_buf->page = NULL;
379 skb->len += rx_buf->len;
380 if (skb_shinfo(skb)->nr_frags == n_frags)
381 break;
361 382
362 gro_result = napi_gro_frags(napi); 383 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
384 }
385
386 skb->data_len = skb->len;
387 skb->truesize += n_frags * efx->rx_buffer_truesize;
388
389 skb_record_rx_queue(skb, channel->rx_queue.core_index);
363 390
391 gro_result = napi_gro_frags(napi);
364 if (gro_result != GRO_DROP) 392 if (gro_result != GRO_DROP)
365 channel->irq_mod_score += 2; 393 channel->irq_mod_score += 2;
366} 394}
367 395
368/* Allocate and construct an SKB around a struct page.*/ 396/* Allocate and construct an SKB around page fragments */
369static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, 397static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
370 struct efx_rx_buffer *rx_buf, 398 struct efx_rx_buffer *rx_buf,
399 unsigned int n_frags,
371 u8 *eh, int hdr_len) 400 u8 *eh, int hdr_len)
372{ 401{
373 struct efx_nic *efx = channel->efx; 402 struct efx_nic *efx = channel->efx;
@@ -381,25 +410,32 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
381 EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); 410 EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
382 411
383 skb_reserve(skb, EFX_PAGE_SKB_ALIGN); 412 skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
413 memcpy(__skb_put(skb, hdr_len), eh, hdr_len);
384 414
385 skb->len = rx_buf->len; 415 /* Append the remaining page(s) onto the frag list */
386 skb->truesize = rx_buf->len + sizeof(struct sk_buff);
387 memcpy(skb->data, eh, hdr_len);
388 skb->tail += hdr_len;
389
390 /* Append the remaining page onto the frag list */
391 if (rx_buf->len > hdr_len) { 416 if (rx_buf->len > hdr_len) {
392 skb->data_len = skb->len - hdr_len; 417 rx_buf->page_offset += hdr_len;
393 skb_fill_page_desc(skb, 0, rx_buf->page, 418 rx_buf->len -= hdr_len;
394 rx_buf->page_offset + hdr_len, 419
395 skb->data_len); 420 for (;;) {
421 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
422 rx_buf->page, rx_buf->page_offset,
423 rx_buf->len);
424 rx_buf->page = NULL;
425 skb->len += rx_buf->len;
426 skb->data_len += rx_buf->len;
427 if (skb_shinfo(skb)->nr_frags == n_frags)
428 break;
429
430 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
431 }
396 } else { 432 } else {
397 __free_pages(rx_buf->page, efx->rx_buffer_order); 433 __free_pages(rx_buf->page, efx->rx_buffer_order);
398 skb->data_len = 0; 434 rx_buf->page = NULL;
435 n_frags = 0;
399 } 436 }
400 437
401 /* Ownership has transferred from the rx_buf to skb */ 438 skb->truesize += n_frags * efx->rx_buffer_truesize;
402 rx_buf->page = NULL;
403 439
404 /* Move past the ethernet header */ 440 /* Move past the ethernet header */
405 skb->protocol = eth_type_trans(skb, efx->net_dev); 441 skb->protocol = eth_type_trans(skb, efx->net_dev);
@@ -408,7 +444,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
408} 444}
409 445
410void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, 446void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
411 unsigned int len, u16 flags) 447 unsigned int n_frags, unsigned int len, u16 flags)
412{ 448{
413 struct efx_nic *efx = rx_queue->efx; 449 struct efx_nic *efx = rx_queue->efx;
414 struct efx_channel *channel = efx_rx_queue_channel(rx_queue); 450 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
@@ -417,35 +453,43 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
417 rx_buf = efx_rx_buffer(rx_queue, index); 453 rx_buf = efx_rx_buffer(rx_queue, index);
418 rx_buf->flags |= flags; 454 rx_buf->flags |= flags;
419 455
420 /* This allows the refill path to post another buffer. 456 /* Validate the number of fragments and completed length */
421 * EFX_RXD_HEAD_ROOM ensures that the slot we are using 457 if (n_frags == 1) {
422 * isn't overwritten yet. 458 efx_rx_packet__check_len(rx_queue, rx_buf, len);
423 */ 459 } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
424 rx_queue->removed_count++; 460 unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) ||
425 461 unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) ||
426 /* Validate the length encoded in the event vs the descriptor pushed */ 462 unlikely(!efx->rx_scatter)) {
427 efx_rx_packet__check_len(rx_queue, rx_buf, len); 463 /* If this isn't an explicit discard request, either
464 * the hardware or the driver is broken.
465 */
466 WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
467 rx_buf->flags |= EFX_RX_PKT_DISCARD;
468 }
428 469
429 netif_vdbg(efx, rx_status, efx->net_dev, 470 netif_vdbg(efx, rx_status, efx->net_dev,
430 "RX queue %d received id %x at %llx+%x %s%s\n", 471 "RX queue %d received ids %x-%x len %d %s%s\n",
431 efx_rx_queue_index(rx_queue), index, 472 efx_rx_queue_index(rx_queue), index,
432 (unsigned long long)rx_buf->dma_addr, len, 473 (index + n_frags - 1) & rx_queue->ptr_mask, len,
433 (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", 474 (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
434 (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); 475 (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
435 476
436 /* Discard packet, if instructed to do so */ 477 /* Discard packet, if instructed to do so. Process the
478 * previous receive first.
479 */
437 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { 480 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
438 efx_recycle_rx_buffer(channel, rx_buf); 481 efx_rx_flush_packet(channel);
439 482 efx_recycle_rx_buffers(channel, rx_buf, n_frags);
440 /* Don't hold off the previous receive */ 483 return;
441 rx_buf = NULL;
442 goto out;
443 } 484 }
444 485
486 if (n_frags == 1)
487 rx_buf->len = len;
488
445 /* Release and/or sync DMA mapping - assumes all RX buffers 489 /* Release and/or sync DMA mapping - assumes all RX buffers
446 * consumed in-order per RX queue 490 * consumed in-order per RX queue
447 */ 491 */
448 efx_unmap_rx_buffer(efx, rx_buf, len); 492 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
449 493
450 /* Prefetch nice and early so data will (hopefully) be in cache by 494 /* Prefetch nice and early so data will (hopefully) be in cache by
451 * the time we look at it. 495 * the time we look at it.
@@ -453,23 +497,40 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
453 prefetch(efx_rx_buf_va(rx_buf)); 497 prefetch(efx_rx_buf_va(rx_buf));
454 498
455 rx_buf->page_offset += efx->type->rx_buffer_hash_size; 499 rx_buf->page_offset += efx->type->rx_buffer_hash_size;
456 rx_buf->len = len - efx->type->rx_buffer_hash_size; 500 rx_buf->len -= efx->type->rx_buffer_hash_size;
501
502 if (n_frags > 1) {
503 /* Release/sync DMA mapping for additional fragments.
504 * Fix length for last fragment.
505 */
506 unsigned int tail_frags = n_frags - 1;
507
508 for (;;) {
509 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
510 if (--tail_frags == 0)
511 break;
512 efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
513 }
514 rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
515 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
516 }
457 517
458 /* Pipeline receives so that we give time for packet headers to be 518 /* Pipeline receives so that we give time for packet headers to be
459 * prefetched into cache. 519 * prefetched into cache.
460 */ 520 */
461out:
462 efx_rx_flush_packet(channel); 521 efx_rx_flush_packet(channel);
463 channel->rx_pkt = rx_buf; 522 channel->rx_pkt_n_frags = n_frags;
523 channel->rx_pkt_index = index;
464} 524}
465 525
466static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, 526static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
467 struct efx_rx_buffer *rx_buf) 527 struct efx_rx_buffer *rx_buf,
528 unsigned int n_frags)
468{ 529{
469 struct sk_buff *skb; 530 struct sk_buff *skb;
470 u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); 531 u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
471 532
472 skb = efx_rx_mk_skb(channel, rx_buf, eh, hdr_len); 533 skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
473 if (unlikely(skb == NULL)) { 534 if (unlikely(skb == NULL)) {
474 efx_free_rx_buffer(channel->efx, rx_buf); 535 efx_free_rx_buffer(channel->efx, rx_buf);
475 return; 536 return;
@@ -488,9 +549,11 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
488} 549}
489 550
490/* Handle a received packet. Second half: Touches packet payload. */ 551/* Handle a received packet. Second half: Touches packet payload. */
491void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) 552void __efx_rx_packet(struct efx_channel *channel)
492{ 553{
493 struct efx_nic *efx = channel->efx; 554 struct efx_nic *efx = channel->efx;
555 struct efx_rx_buffer *rx_buf =
556 efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
494 u8 *eh = efx_rx_buf_va(rx_buf); 557 u8 *eh = efx_rx_buf_va(rx_buf);
495 558
496 /* If we're in loopback test, then pass the packet directly to the 559 /* If we're in loopback test, then pass the packet directly to the
@@ -499,16 +562,18 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
499 if (unlikely(efx->loopback_selftest)) { 562 if (unlikely(efx->loopback_selftest)) {
500 efx_loopback_rx_packet(efx, eh, rx_buf->len); 563 efx_loopback_rx_packet(efx, eh, rx_buf->len);
501 efx_free_rx_buffer(efx, rx_buf); 564 efx_free_rx_buffer(efx, rx_buf);
502 return; 565 goto out;
503 } 566 }
504 567
505 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) 568 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
506 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; 569 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
507 570
508 if (!channel->type->receive_skb) 571 if (!channel->type->receive_skb)
509 efx_rx_packet_gro(channel, rx_buf, eh); 572 efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
510 else 573 else
511 efx_rx_deliver(channel, eh, rx_buf); 574 efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
575out:
576 channel->rx_pkt_n_frags = 0;
512} 577}
513 578
514int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) 579int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index e07ff0d3f26b..51669244d154 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -414,6 +414,8 @@ static int siena_init_nic(struct efx_nic *efx)
414 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1); 414 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1);
415 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1); 415 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1);
416 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1); 416 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1);
417 EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE,
418 EFX_RX_USR_BUF_SIZE >> 5);
417 efx_writeo(efx, &temp, FR_AZ_RX_CFG); 419 efx_writeo(efx, &temp, FR_AZ_RX_CFG);
418 420
419 /* Set hash key for IPv4 */ 421 /* Set hash key for IPv4 */
@@ -718,6 +720,7 @@ const struct efx_nic_type siena_a0_nic_type = {
718 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), 720 .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
719 .rx_buffer_hash_size = 0x10, 721 .rx_buffer_hash_size = 0x10,
720 .rx_buffer_padding = 0, 722 .rx_buffer_padding = 0,
723 .can_rx_scatter = true,
721 .max_interrupt_mode = EFX_INT_MODE_MSIX, 724 .max_interrupt_mode = EFX_INT_MODE_MSIX,
722 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy 725 .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
723 * interrupt handler only supports 32 726 * interrupt handler only supports 32