aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTariq Toukan <tariqt@mellanox.com>2018-02-07 07:46:36 -0500
committerSaeed Mahameed <saeedm@mellanox.com>2018-03-30 19:55:06 -0400
commit22f4539881944a8acc9c6f5afa7aa7f028898002 (patch)
tree0c914203d9a0d17238fc8f47add2dc2ba51e608b
parent121e89275471dccbd5b252e40ad6a823fa0527f7 (diff)
net/mlx5e: Support XDP over Striding RQ
Add XDP support over Striding RQ. Now that linear SKB is supported over Striding RQ, we can support XDP by setting stride size to PAGE_SIZE and headroom to XDP_PACKET_HEADROOM. Upon a MPWQE free, do not release pages that are being XDP xmit, they will be released upon completions. Striding RQ is capable of a higher packet-rate than conventional RQ. A performance gain is expected for all cases that had a HW packet-rate bottleneck. This is the case whenever using many flows that distribute to many cores. Performance testing: ConnectX-5, 24 rings, default MTU. CQE compression ON (to reduce completions BW in PCI). XDP_DROP packet rate: -------------------------------------------------- | pkt size | XDP rate | 100GbE linerate | pct% | -------------------------------------------------- | 64byte | 126.2 Mpps | 148.0 Mpps | 85% | | 128byte | 80.0 Mpps | 84.8 Mpps | 94% | | 256byte | 42.7 Mpps | 42.7 Mpps | 100% | | 512byte | 23.4 Mpps | 23.4 Mpps | 100% | -------------------------------------------------- Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c30
3 files changed, 26 insertions, 8 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a6ca54393bb6..7997d7c159db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -457,6 +457,7 @@ struct mlx5e_mpw_info {
457 struct mlx5e_umr_dma_info umr; 457 struct mlx5e_umr_dma_info umr;
458 u16 consumed_strides; 458 u16 consumed_strides;
459 u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; 459 u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
460 DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
460}; 461};
461 462
462/* a single cache unit is capable to serve one napi call (for non-striding rq) 463/* a single cache unit is capable to serve one napi call (for non-striding rq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bba2fa0aa15f..b03a2327356a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -200,7 +200,8 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
200 struct mlx5e_params *params) 200 struct mlx5e_params *params)
201{ 201{
202 return mlx5e_check_fragmented_striding_rq_cap(mdev) && 202 return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
203 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev); 203 !MLX5_IPSEC_DEV(mdev) &&
204 !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
204} 205}
205 206
206void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) 207void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a827571deb85..1da79cab1838 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -349,13 +349,16 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
349 349
350void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) 350void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
351{ 351{
352 const bool no_xdp_xmit =
353 bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
352 int pg_strides = mlx5e_mpwqe_strides_per_page(rq); 354 int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
353 struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; 355 struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
354 int i; 356 int i;
355 357
356 for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { 358 for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
357 page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); 359 page_ref_sub(dma_info[i].page, pg_strides - wi->skbs_frags[i]);
358 mlx5e_page_release(rq, dma_info, true); 360 if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
361 mlx5e_page_release(rq, &dma_info[i], true);
359 } 362 }
360} 363}
361 364
@@ -404,6 +407,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
404 } 407 }
405 408
406 memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); 409 memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
410 bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
407 wi->consumed_strides = 0; 411 wi->consumed_strides = 0;
408 412
409 rq->mpwqe.umr_in_progress = true; 413 rq->mpwqe.umr_in_progress = true;
@@ -1028,18 +1032,30 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1028{ 1032{
1029 struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; 1033 struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
1030 u16 rx_headroom = rq->buff.headroom; 1034 u16 rx_headroom = rq->buff.headroom;
1035 u32 cqe_bcnt32 = cqe_bcnt;
1031 struct sk_buff *skb; 1036 struct sk_buff *skb;
1032 void *va, *data; 1037 void *va, *data;
1033 u32 frag_size; 1038 u32 frag_size;
1039 bool consumed;
1034 1040
1035 va = page_address(di->page) + head_offset; 1041 va = page_address(di->page) + head_offset;
1036 data = va + rx_headroom; 1042 data = va + rx_headroom;
1037 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1043 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
1038 1044
1039 dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, 1045 dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
1040 frag_size, DMA_FROM_DEVICE); 1046 frag_size, DMA_FROM_DEVICE);
1041 prefetch(data); 1047 prefetch(data);
1042 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt); 1048
1049 rcu_read_lock();
1050 consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
1051 rcu_read_unlock();
1052 if (consumed) {
1053 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
1054 __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
1055 return NULL; /* page/packet was consumed by XDP */
1056 }
1057
1058 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
1043 if (unlikely(!skb)) 1059 if (unlikely(!skb))
1044 return NULL; 1060 return NULL;
1045 1061
@@ -1078,7 +1094,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1078 1094
1079 skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, 1095 skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
1080 page_idx); 1096 page_idx);
1081 if (unlikely(!skb)) 1097 if (!skb)
1082 goto mpwrq_cqe_out; 1098 goto mpwrq_cqe_out;
1083 1099
1084 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 1100 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);