aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-06-03 11:11:35 -0400
committerAlexei Starovoitov <ast@kernel.org>2018-06-03 11:11:36 -0400
commitea9916ea3ed98d0a1f67f5cbe8ed8ae28e37f8c8 (patch)
tree98d1d3475a3b6ca67e1f6eaa65fb4774fa1950cd
parent69b450789136f70005f8d36315d875158ea430cf (diff)
parentc1ece6b245bd12a57124da78abafbf8a511394d6 (diff)
Merge branch 'ndo_xdp_xmit-cleanup'
Jesper Dangaard Brouer says: ==================== As I mentioned in merge commit 10f678683e4 ("Merge branch 'xdp_xmit-bulking'") I plan to change the API for ndo_xdp_xmit once more, by adding a flags argument, which is done in this patchset. I know it is late in the cycle (currently at rc7), but it would be nice to avoid changing NDOs over several kernel releases, as it is annoying to vendors and distro backporters, but it is not strictly UAPI so it is allowed (according to Alexei). The end-goal is getting rid of the ndo_xdp_flush operation, as it will make it possible for drivers to implement a TXQ synchronization mechanism that is not necessarily derived from the CPU id (smp_processor_id). This patchset removes all callers of the ndo_xdp_flush operation, but it doesn't take the last step of removing it from all drivers. This can be done later, or I can update the patchset on request. Micro-benchmarks only show a very small performance improvement, for map-redirect around ~2 ns, and for non-map redirect ~7 ns. I've not benchmarked this with CONFIG_RETPOLINE, but the performance benefit should be more visible given we end-up removing an indirect call. --- V2: Updated based on feedback from Song Liu <songliubraving@fb.com> ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c14
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c23
-rw-r--r--drivers/net/tun.c25
-rw-r--r--drivers/net/virtio_net.c9
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/net/xdp.h4
-rw-r--r--kernel/bpf/devmap.c19
-rw-r--r--net/core/filter.c3
9 files changed, 72 insertions, 35 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9b698c5acd05..5f01e4ce9c92 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3670 * For error cases, a negative errno code is returned and no-frames 3670 * For error cases, a negative errno code is returned and no-frames
3671 * are transmitted (caller must handle freeing frames). 3671 * are transmitted (caller must handle freeing frames).
3672 **/ 3672 **/
3673int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) 3673int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
3674 u32 flags)
3674{ 3675{
3675 struct i40e_netdev_priv *np = netdev_priv(dev); 3676 struct i40e_netdev_priv *np = netdev_priv(dev);
3676 unsigned int queue_index = smp_processor_id(); 3677 unsigned int queue_index = smp_processor_id();
3677 struct i40e_vsi *vsi = np->vsi; 3678 struct i40e_vsi *vsi = np->vsi;
3679 struct i40e_ring *xdp_ring;
3678 int drops = 0; 3680 int drops = 0;
3679 int i; 3681 int i;
3680 3682
@@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
3684 if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) 3686 if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
3685 return -ENXIO; 3687 return -ENXIO;
3686 3688
3689 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
3690 return -EINVAL;
3691
3692 xdp_ring = vsi->xdp_rings[queue_index];
3693
3687 for (i = 0; i < n; i++) { 3694 for (i = 0; i < n; i++) {
3688 struct xdp_frame *xdpf = frames[i]; 3695 struct xdp_frame *xdpf = frames[i];
3689 int err; 3696 int err;
3690 3697
3691 err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); 3698 err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
3692 if (err != I40E_XDP_TX) { 3699 if (err != I40E_XDP_TX) {
3693 xdp_return_frame_rx_napi(xdpf); 3700 xdp_return_frame_rx_napi(xdpf);
3694 drops++; 3701 drops++;
3695 } 3702 }
3696 } 3703 }
3697 3704
3705 if (unlikely(flags & XDP_XMIT_FLUSH))
3706 i40e_xdp_ring_update_tail(xdp_ring);
3707
3698 return n - drops; 3708 return n - drops;
3699} 3709}
3700 3710
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index eb8804b3d7b6..820f76db251b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
487void i40e_detect_recover_hung(struct i40e_vsi *vsi); 487void i40e_detect_recover_hung(struct i40e_vsi *vsi);
488int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); 488int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
489bool __i40e_chk_linearize(struct sk_buff *skb); 489bool __i40e_chk_linearize(struct sk_buff *skb);
490int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames); 490int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
491 u32 flags);
491void i40e_xdp_flush(struct net_device *dev); 492void i40e_xdp_flush(struct net_device *dev);
492 493
493/** 494/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 031d65c4178d..4fd77c9067f2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
10022 } 10022 }
10023} 10023}
10024 10024
10025static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
10026{
10027 /* Force memory writes to complete before letting h/w know there
10028 * are new descriptors to fetch.
10029 */
10030 wmb();
10031 writel(ring->next_to_use, ring->tail);
10032}
10033
10025static int ixgbe_xdp_xmit(struct net_device *dev, int n, 10034static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10026 struct xdp_frame **frames) 10035 struct xdp_frame **frames, u32 flags)
10027{ 10036{
10028 struct ixgbe_adapter *adapter = netdev_priv(dev); 10037 struct ixgbe_adapter *adapter = netdev_priv(dev);
10029 struct ixgbe_ring *ring; 10038 struct ixgbe_ring *ring;
@@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10033 if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) 10042 if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
10034 return -ENETDOWN; 10043 return -ENETDOWN;
10035 10044
10045 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
10046 return -EINVAL;
10047
10036 /* During program transitions its possible adapter->xdp_prog is assigned 10048 /* During program transitions its possible adapter->xdp_prog is assigned
10037 * but ring has not been configured yet. In this case simply abort xmit. 10049 * but ring has not been configured yet. In this case simply abort xmit.
10038 */ 10050 */
@@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10051 } 10063 }
10052 } 10064 }
10053 10065
10066 if (unlikely(flags & XDP_XMIT_FLUSH))
10067 ixgbe_xdp_ring_update_tail(ring);
10068
10054 return n - drops; 10069 return n - drops;
10055} 10070}
10056 10071
@@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
10069 if (unlikely(!ring)) 10084 if (unlikely(!ring))
10070 return; 10085 return;
10071 10086
10072 /* Force memory writes to complete before letting h/w know there 10087 ixgbe_xdp_ring_update_tail(ring);
10073 * are new descriptors to fetch.
10074 */
10075 wmb();
10076 writel(ring->next_to_use, ring->tail);
10077 10088
10078 return; 10089 return;
10079} 10090}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2265d2ccea47..d82a05fb0594 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = {
1285 .ndo_get_stats64 = tun_net_get_stats64, 1285 .ndo_get_stats64 = tun_net_get_stats64,
1286}; 1286};
1287 1287
1288static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) 1288static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1289{
1290 /* Notify and wake up reader process */
1291 if (tfile->flags & TUN_FASYNC)
1292 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1293 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1294}
1295
1296static int tun_xdp_xmit(struct net_device *dev, int n,
1297 struct xdp_frame **frames, u32 flags)
1289{ 1298{
1290 struct tun_struct *tun = netdev_priv(dev); 1299 struct tun_struct *tun = netdev_priv(dev);
1291 struct tun_file *tfile; 1300 struct tun_file *tfile;
@@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
1294 int cnt = n; 1303 int cnt = n;
1295 int i; 1304 int i;
1296 1305
1306 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
1307 return -EINVAL;
1308
1297 rcu_read_lock(); 1309 rcu_read_lock();
1298 1310
1299 numqueues = READ_ONCE(tun->numqueues); 1311 numqueues = READ_ONCE(tun->numqueues);
@@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
1321 } 1333 }
1322 spin_unlock(&tfile->tx_ring.producer_lock); 1334 spin_unlock(&tfile->tx_ring.producer_lock);
1323 1335
1336 if (flags & XDP_XMIT_FLUSH)
1337 __tun_xdp_flush_tfile(tfile);
1338
1324 rcu_read_unlock(); 1339 rcu_read_unlock();
1325 return cnt - drops; 1340 return cnt - drops;
1326} 1341}
@@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
1332 if (unlikely(!frame)) 1347 if (unlikely(!frame))
1333 return -EOVERFLOW; 1348 return -EOVERFLOW;
1334 1349
1335 return tun_xdp_xmit(dev, 1, &frame); 1350 return tun_xdp_xmit(dev, 1, &frame, 0);
1336} 1351}
1337 1352
1338static void tun_xdp_flush(struct net_device *dev) 1353static void tun_xdp_flush(struct net_device *dev)
@@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)
1349 1364
1350 tfile = rcu_dereference(tun->tfiles[smp_processor_id() % 1365 tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1351 numqueues]); 1366 numqueues]);
1352 /* Notify and wake up reader process */ 1367 __tun_xdp_flush_tfile(tfile);
1353 if (tfile->flags & TUN_FASYNC)
1354 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1355 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1356
1357out: 1368out:
1358 rcu_read_unlock(); 1369 rcu_read_unlock();
1359} 1370}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b2647dd5d302..62ba8aadd8e6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
468} 468}
469 469
470static int virtnet_xdp_xmit(struct net_device *dev, 470static int virtnet_xdp_xmit(struct net_device *dev,
471 int n, struct xdp_frame **frames) 471 int n, struct xdp_frame **frames, u32 flags)
472{ 472{
473 struct virtnet_info *vi = netdev_priv(dev); 473 struct virtnet_info *vi = netdev_priv(dev);
474 struct receive_queue *rq = vi->rq; 474 struct receive_queue *rq = vi->rq;
@@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
481 int err; 481 int err;
482 int i; 482 int i;
483 483
484 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
485 return -EINVAL;
486
484 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); 487 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
485 sq = &vi->sq[qp]; 488 sq = &vi->sq[qp];
486 489
@@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
504 drops++; 507 drops++;
505 } 508 }
506 } 509 }
510
511 if (flags & XDP_XMIT_FLUSH)
512 virtqueue_kick(sq->vq);
513
507 return n - drops; 514 return n - drops;
508} 515}
509 516
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8452f72087ef..7f17785a59d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,13 +1185,13 @@ struct dev_ifalias {
1185 * This function is used to set or query state related to XDP on the 1185 * This function is used to set or query state related to XDP on the
1186 * netdevice and manage BPF offload. See definition of 1186 * netdevice and manage BPF offload. See definition of
1187 * enum bpf_netdev_command for details. 1187 * enum bpf_netdev_command for details.
1188 * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp); 1188 * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
1189 * u32 flags);
1189 * This function is used to submit @n XDP packets for transmit on a 1190 * This function is used to submit @n XDP packets for transmit on a
1190 * netdevice. Returns number of frames successfully transmitted, frames 1191 * netdevice. Returns number of frames successfully transmitted, frames
1191 * that got dropped are freed/returned via xdp_return_frame(). 1192 * that got dropped are freed/returned via xdp_return_frame().
1192 * Returns negative number, means general error invoking ndo, meaning 1193 * Returns negative number, means general error invoking ndo, meaning
1193 * no frames were xmit'ed and core-caller will free all frames. 1194 * no frames were xmit'ed and core-caller will free all frames.
1194 * TODO: Consider add flag to allow sending flush operation.
1195 * void (*ndo_xdp_flush)(struct net_device *dev); 1195 * void (*ndo_xdp_flush)(struct net_device *dev);
1196 * This function is used to inform the driver to flush a particular 1196 * This function is used to inform the driver to flush a particular
1197 * xdp tx queue. Must be called on same CPU as xdp_xmit. 1197 * xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1380,7 +1380,8 @@ struct net_device_ops {
1380 int (*ndo_bpf)(struct net_device *dev, 1380 int (*ndo_bpf)(struct net_device *dev,
1381 struct netdev_bpf *bpf); 1381 struct netdev_bpf *bpf);
1382 int (*ndo_xdp_xmit)(struct net_device *dev, int n, 1382 int (*ndo_xdp_xmit)(struct net_device *dev, int n,
1383 struct xdp_frame **xdp); 1383 struct xdp_frame **xdp,
1384 u32 flags);
1384 void (*ndo_xdp_flush)(struct net_device *dev); 1385 void (*ndo_xdp_flush)(struct net_device *dev);
1385}; 1386};
1386 1387
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 7ad779237ae8..a3b71a4dd71d 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -40,6 +40,10 @@ enum xdp_mem_type {
40 MEM_TYPE_MAX, 40 MEM_TYPE_MAX,
41}; 41};
42 42
43/* XDP flags for ndo_xdp_xmit */
44#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
45#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
46
43struct xdp_mem_info { 47struct xdp_mem_info {
44 u32 type; /* enum xdp_mem_type, but known size type */ 48 u32 type; /* enum xdp_mem_type, but known size type */
45 u32 id; 49 u32 id;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 1fe3fe60508a..a7cc7b3494a9 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
217} 217}
218 218
219static int bq_xmit_all(struct bpf_dtab_netdev *obj, 219static int bq_xmit_all(struct bpf_dtab_netdev *obj,
220 struct xdp_bulk_queue *bq) 220 struct xdp_bulk_queue *bq, u32 flags)
221{ 221{
222 struct net_device *dev = obj->dev; 222 struct net_device *dev = obj->dev;
223 int sent = 0, drops = 0, err = 0; 223 int sent = 0, drops = 0, err = 0;
@@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
232 prefetch(xdpf); 232 prefetch(xdpf);
233 } 233 }
234 234
235 sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q); 235 sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
236 if (sent < 0) { 236 if (sent < 0) {
237 err = sent; 237 err = sent;
238 sent = 0; 238 sent = 0;
@@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
276 for_each_set_bit(bit, bitmap, map->max_entries) { 276 for_each_set_bit(bit, bitmap, map->max_entries) {
277 struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); 277 struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
278 struct xdp_bulk_queue *bq; 278 struct xdp_bulk_queue *bq;
279 struct net_device *netdev;
280 279
281 /* This is possible if the dev entry is removed by user space 280 /* This is possible if the dev entry is removed by user space
282 * between xdp redirect and flush op. 281 * between xdp redirect and flush op.
@@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
287 __clear_bit(bit, bitmap); 286 __clear_bit(bit, bitmap);
288 287
289 bq = this_cpu_ptr(dev->bulkq); 288 bq = this_cpu_ptr(dev->bulkq);
290 bq_xmit_all(dev, bq); 289 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
291 netdev = dev->dev;
292 if (likely(netdev->netdev_ops->ndo_xdp_flush))
293 netdev->netdev_ops->ndo_xdp_flush(netdev);
294 } 290 }
295} 291}
296 292
@@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
320 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); 316 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
321 317
322 if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 318 if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
323 bq_xmit_all(obj, bq); 319 bq_xmit_all(obj, bq, 0);
324 320
325 /* Ingress dev_rx will be the same for all xdp_frame's in 321 /* Ingress dev_rx will be the same for all xdp_frame's in
326 * bulk_queue, because bq stored per-CPU and must be flushed 322 * bulk_queue, because bq stored per-CPU and must be flushed
@@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
359 355
360static void dev_map_flush_old(struct bpf_dtab_netdev *dev) 356static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
361{ 357{
362 if (dev->dev->netdev_ops->ndo_xdp_flush) { 358 if (dev->dev->netdev_ops->ndo_xdp_xmit) {
363 struct net_device *fl = dev->dev;
364 struct xdp_bulk_queue *bq; 359 struct xdp_bulk_queue *bq;
365 unsigned long *bitmap; 360 unsigned long *bitmap;
366 361
@@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
371 __clear_bit(dev->bit, bitmap); 366 __clear_bit(dev->bit, bitmap);
372 367
373 bq = per_cpu_ptr(dev->bulkq, cpu); 368 bq = per_cpu_ptr(dev->bulkq, cpu);
374 bq_xmit_all(dev, bq); 369 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
375
376 fl->netdev_ops->ndo_xdp_flush(dev->dev);
377 } 370 }
378 } 371 }
379} 372}
diff --git a/net/core/filter.c b/net/core/filter.c
index 28e864777c0f..a72ea9f61010 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
3056 if (unlikely(!xdpf)) 3056 if (unlikely(!xdpf))
3057 return -EOVERFLOW; 3057 return -EOVERFLOW;
3058 3058
3059 sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf); 3059 sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
3060 if (sent <= 0) 3060 if (sent <= 0)
3061 return sent; 3061 return sent;
3062 dev->netdev_ops->ndo_xdp_flush(dev);
3063 return 0; 3062 return 0;
3064} 3063}
3065 3064