aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorTom Herbert <therbert@google.com>2010-11-21 08:17:27 -0500
committerDavid S. Miller <davem@davemloft.net>2010-11-24 14:44:20 -0500
commit1d24eb4815d1e0e8b451ecc546645f8ef1176d4f (patch)
tree0172e72b9452dc46c4e1043817005979cec022a7 /include
parent3853b5841c01a3f492fe137afaad9c209e5162c6 (diff)
xps: Transmit Packet Steering
This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netdevice.h30
1 files changed, 30 insertions, 0 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b45c1b8b1d19..badf9285fe0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -503,6 +503,10 @@ struct netdev_queue {
503 struct Qdisc *qdisc; 503 struct Qdisc *qdisc;
504 unsigned long state; 504 unsigned long state;
505 struct Qdisc *qdisc_sleeping; 505 struct Qdisc *qdisc_sleeping;
506#ifdef CONFIG_RPS
507 struct kobject kobj;
508#endif
509
506/* 510/*
507 * write mostly part 511 * write mostly part
508 */ 512 */
@@ -530,6 +534,30 @@ struct rps_map {
530#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) 534#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
531 535
532/* 536/*
537 * This structure holds an XPS map which can be of variable length. The
538 * map is an array of queues.
539 */
540struct xps_map {
541 unsigned int len;
542 unsigned int alloc_len;
543 struct rcu_head rcu;
544 u16 queues[0];
545};
546#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
547#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \
548 / sizeof(u16))
549
550/*
551 * This structure holds all XPS maps for device. Maps are indexed by CPU.
552 */
553struct xps_dev_maps {
554 struct rcu_head rcu;
555 struct xps_map *cpu_map[0];
556};
557#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \
558 (nr_cpu_ids * sizeof(struct xps_map *)))
559
560/*
533 * The rps_dev_flow structure contains the mapping of a flow to a CPU and the 561 * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
534 * tail pointer for that CPU's input queue at the time of last enqueue. 562 * tail pointer for that CPU's input queue at the time of last enqueue.
535 */ 563 */
@@ -1016,6 +1044,8 @@ struct net_device {
1016 unsigned long tx_queue_len; /* Max frames per queue allowed */ 1044 unsigned long tx_queue_len; /* Max frames per queue allowed */
1017 spinlock_t tx_global_lock; 1045 spinlock_t tx_global_lock;
1018 1046
1047 struct xps_dev_maps *xps_maps;
1048
1019 /* These may be needed for future network-power-down code. */ 1049 /* These may be needed for future network-power-down code. */
1020 1050
1021 /* 1051 /*