aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h33
-rw-r--r--net/Kconfig6
-rw-r--r--net/core/dev.c97
3 files changed, 127 insertions, 9 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 371fa8839d51..a335f2022690 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -554,14 +554,16 @@ struct rps_map {
554#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) 554#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
555 555
556/* 556/*
557 * The rps_dev_flow structure contains the mapping of a flow to a CPU and the 557 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
558 * tail pointer for that CPU's input queue at the time of last enqueue. 558 * tail pointer for that CPU's input queue at the time of last enqueue, and
559 * a hardware filter index.
559 */ 560 */
560struct rps_dev_flow { 561struct rps_dev_flow {
561 u16 cpu; 562 u16 cpu;
562 u16 fill; 563 u16 filter;
563 unsigned int last_qtail; 564 unsigned int last_qtail;
564}; 565};
566#define RPS_NO_FILTER 0xffff
565 567
566/* 568/*
567 * The rps_dev_flow_table structure contains a table of flow mappings. 569 * The rps_dev_flow_table structure contains a table of flow mappings.
@@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
611 613
612extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; 614extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
613 615
616#ifdef CONFIG_RFS_ACCEL
617extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
618 u32 flow_id, u16 filter_id);
619#endif
620
614/* This structure contains an instance of an RX queue. */ 621/* This structure contains an instance of an RX queue. */
615struct netdev_rx_queue { 622struct netdev_rx_queue {
616 struct rps_map __rcu *rps_map; 623 struct rps_map __rcu *rps_map;
@@ -769,6 +776,13 @@ struct netdev_tc_txq {
769 * is always called from the stack with the rtnl lock held and netif tx 776 * is always called from the stack with the rtnl lock held and netif tx
770 * queues stopped. This allows the netdevice to perform queue management 777 * queues stopped. This allows the netdevice to perform queue management
771 * safely. 778 * safely.
779 *
780 * RFS acceleration.
781 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
782 * u16 rxq_index, u32 flow_id);
783 * Set hardware filter for RFS. rxq_index is the target queue index;
784 * flow_id is a flow ID to be passed to rps_may_expire_flow() later.
785 * Return the filter ID on success, or a negative error code.
772 */ 786 */
773#define HAVE_NET_DEVICE_OPS 787#define HAVE_NET_DEVICE_OPS
774struct net_device_ops { 788struct net_device_ops {
@@ -842,6 +856,12 @@ struct net_device_ops {
842 int (*ndo_fcoe_get_wwn)(struct net_device *dev, 856 int (*ndo_fcoe_get_wwn)(struct net_device *dev,
843 u64 *wwn, int type); 857 u64 *wwn, int type);
844#endif 858#endif
859#ifdef CONFIG_RFS_ACCEL
860 int (*ndo_rx_flow_steer)(struct net_device *dev,
861 const struct sk_buff *skb,
862 u16 rxq_index,
863 u32 flow_id);
864#endif
845}; 865};
846 866
847/* 867/*
@@ -1056,6 +1076,13 @@ struct net_device {
1056 1076
1057 /* Number of RX queues currently active in device */ 1077 /* Number of RX queues currently active in device */
1058 unsigned int real_num_rx_queues; 1078 unsigned int real_num_rx_queues;
1079
1080#ifdef CONFIG_RFS_ACCEL
1081 /* CPU reverse-mapping for RX completion interrupts, indexed
1082 * by RX queue number. Assigned by driver. This must only be
1083 * set if the ndo_rx_flow_steer operation is defined. */
1084 struct cpu_rmap *rx_cpu_rmap;
1085#endif
1059#endif 1086#endif
1060 1087
1061 rx_handler_func_t __rcu *rx_handler; 1088 rx_handler_func_t __rcu *rx_handler;
diff --git a/net/Kconfig b/net/Kconfig
index 72840626284b..79cabf1ee68b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,6 +221,12 @@ config RPS
221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
222 default y 222 default y
223 223
224config RFS_ACCEL
225 boolean
226 depends on RPS && GENERIC_HARDIRQS
227 select CPU_RMAP
228 default y
229
224config XPS 230config XPS
225 boolean 231 boolean
226 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 232 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
diff --git a/net/core/dev.c b/net/core/dev.c
index d162ba8d622d..aa761472f9e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2588struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2589struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2589EXPORT_SYMBOL(rps_sock_flow_table); 2590EXPORT_SYMBOL(rps_sock_flow_table);
2590 2591
2592static struct rps_dev_flow *
2593set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2594 struct rps_dev_flow *rflow, u16 next_cpu)
2595{
2596 u16 tcpu;
2597
2598 tcpu = rflow->cpu = next_cpu;
2599 if (tcpu != RPS_NO_CPU) {
2600#ifdef CONFIG_RFS_ACCEL
2601 struct netdev_rx_queue *rxqueue;
2602 struct rps_dev_flow_table *flow_table;
2603 struct rps_dev_flow *old_rflow;
2604 u32 flow_id;
2605 u16 rxq_index;
2606 int rc;
2607
2608 /* Should we steer this flow to a different hardware queue? */
2609 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
2610 goto out;
2611 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2612 if (rxq_index == skb_get_rx_queue(skb))
2613 goto out;
2614
2615 rxqueue = dev->_rx + rxq_index;
2616 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2617 if (!flow_table)
2618 goto out;
2619 flow_id = skb->rxhash & flow_table->mask;
2620 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2621 rxq_index, flow_id);
2622 if (rc < 0)
2623 goto out;
2624 old_rflow = rflow;
2625 rflow = &flow_table->flows[flow_id];
2626 rflow->cpu = next_cpu;
2627 rflow->filter = rc;
2628 if (old_rflow->filter == rflow->filter)
2629 old_rflow->filter = RPS_NO_FILTER;
2630 out:
2631#endif
2632 rflow->last_qtail =
2633 per_cpu(softnet_data, tcpu).input_queue_head;
2634 }
2635
2636 return rflow;
2637}
2638
2591/* 2639/*
2592 * get_rps_cpu is called from netif_receive_skb and returns the target 2640 * get_rps_cpu is called from netif_receive_skb and returns the target
2593 * CPU from the RPS map of the receiving queue for a given skb. 2641 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2658 if (unlikely(tcpu != next_cpu) && 2706 if (unlikely(tcpu != next_cpu) &&
2659 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 2707 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2660 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 2708 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2661 rflow->last_qtail)) >= 0)) { 2709 rflow->last_qtail)) >= 0))
2662 tcpu = rflow->cpu = next_cpu; 2710 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2663 if (tcpu != RPS_NO_CPU) 2711
2664 rflow->last_qtail = per_cpu(softnet_data,
2665 tcpu).input_queue_head;
2666 }
2667 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 2712 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2668 *rflowp = rflow; 2713 *rflowp = rflow;
2669 cpu = tcpu; 2714 cpu = tcpu;
@@ -2684,6 +2729,46 @@ done:
2684 return cpu; 2729 return cpu;
2685} 2730}
2686 2731
2732#ifdef CONFIG_RFS_ACCEL
2733
2734/**
2735 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2736 * @dev: Device on which the filter was set
2737 * @rxq_index: RX queue index
2738 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2739 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2740 *
2741 * Drivers that implement ndo_rx_flow_steer() should periodically call
2742 * this function for each installed filter and remove the filters for
2743 * which it returns %true.
2744 */
2745bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2746 u32 flow_id, u16 filter_id)
2747{
2748 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2749 struct rps_dev_flow_table *flow_table;
2750 struct rps_dev_flow *rflow;
2751 bool expire = true;
2752 int cpu;
2753
2754 rcu_read_lock();
2755 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2756 if (flow_table && flow_id <= flow_table->mask) {
2757 rflow = &flow_table->flows[flow_id];
2758 cpu = ACCESS_ONCE(rflow->cpu);
2759 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2760 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2761 rflow->last_qtail) <
2762 (int)(10 * flow_table->mask)))
2763 expire = false;
2764 }
2765 rcu_read_unlock();
2766 return expire;
2767}
2768EXPORT_SYMBOL(rps_may_expire_flow);
2769
2770#endif /* CONFIG_RFS_ACCEL */
2771
2687/* Called from hardirq (IPI) context */ 2772/* Called from hardirq (IPI) context */
2688static void rps_trigger_softirq(void *data) 2773static void rps_trigger_softirq(void *data)
2689{ 2774{