diff options
-rw-r--r-- | include/linux/netdevice.h | 33 | ||||
-rw-r--r-- | net/Kconfig | 6 | ||||
-rw-r--r-- | net/core/dev.c | 97 |
3 files changed, 127 insertions, 9 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 371fa8839d51..a335f2022690 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -554,14 +554,16 @@ struct rps_map { | |||
554 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) | 554 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * The rps_dev_flow structure contains the mapping of a flow to a CPU and the | 557 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the |
558 | * tail pointer for that CPU's input queue at the time of last enqueue. | 558 | * tail pointer for that CPU's input queue at the time of last enqueue, and |
559 | * a hardware filter index. | ||
559 | */ | 560 | */ |
560 | struct rps_dev_flow { | 561 | struct rps_dev_flow { |
561 | u16 cpu; | 562 | u16 cpu; |
562 | u16 fill; | 563 | u16 filter; |
563 | unsigned int last_qtail; | 564 | unsigned int last_qtail; |
564 | }; | 565 | }; |
566 | #define RPS_NO_FILTER 0xffff | ||
565 | 567 | ||
566 | /* | 568 | /* |
567 | * The rps_dev_flow_table structure contains a table of flow mappings. | 569 | * The rps_dev_flow_table structure contains a table of flow mappings. |
@@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, | |||
611 | 613 | ||
612 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; | 614 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; |
613 | 615 | ||
616 | #ifdef CONFIG_RFS_ACCEL | ||
617 | extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||
618 | u32 flow_id, u16 filter_id); | ||
619 | #endif | ||
620 | |||
614 | /* This structure contains an instance of an RX queue. */ | 621 | /* This structure contains an instance of an RX queue. */ |
615 | struct netdev_rx_queue { | 622 | struct netdev_rx_queue { |
616 | struct rps_map __rcu *rps_map; | 623 | struct rps_map __rcu *rps_map; |
@@ -769,6 +776,13 @@ struct netdev_tc_txq { | |||
769 | * is always called from the stack with the rtnl lock held and netif tx | 776 | * is always called from the stack with the rtnl lock held and netif tx |
770 | * queues stopped. This allows the netdevice to perform queue management | 777 | * queues stopped. This allows the netdevice to perform queue management |
771 | * safely. | 778 | * safely. |
779 | * | ||
780 | * RFS acceleration. | ||
781 | * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, | ||
782 | * u16 rxq_index, u32 flow_id); | ||
783 | * Set hardware filter for RFS. rxq_index is the target queue index; | ||
784 | * flow_id is a flow ID to be passed to rps_may_expire_flow() later. | ||
785 | * Return the filter ID on success, or a negative error code. | ||
772 | */ | 786 | */ |
773 | #define HAVE_NET_DEVICE_OPS | 787 | #define HAVE_NET_DEVICE_OPS |
774 | struct net_device_ops { | 788 | struct net_device_ops { |
@@ -842,6 +856,12 @@ struct net_device_ops { | |||
842 | int (*ndo_fcoe_get_wwn)(struct net_device *dev, | 856 | int (*ndo_fcoe_get_wwn)(struct net_device *dev, |
843 | u64 *wwn, int type); | 857 | u64 *wwn, int type); |
844 | #endif | 858 | #endif |
859 | #ifdef CONFIG_RFS_ACCEL | ||
860 | int (*ndo_rx_flow_steer)(struct net_device *dev, | ||
861 | const struct sk_buff *skb, | ||
862 | u16 rxq_index, | ||
863 | u32 flow_id); | ||
864 | #endif | ||
845 | }; | 865 | }; |
846 | 866 | ||
847 | /* | 867 | /* |
@@ -1056,6 +1076,13 @@ struct net_device { | |||
1056 | 1076 | ||
1057 | /* Number of RX queues currently active in device */ | 1077 | /* Number of RX queues currently active in device */ |
1058 | unsigned int real_num_rx_queues; | 1078 | unsigned int real_num_rx_queues; |
1079 | |||
1080 | #ifdef CONFIG_RFS_ACCEL | ||
1081 | /* CPU reverse-mapping for RX completion interrupts, indexed | ||
1082 | * by RX queue number. Assigned by driver. This must only be | ||
1083 | * set if the ndo_rx_flow_steer operation is defined. */ | ||
1084 | struct cpu_rmap *rx_cpu_rmap; | ||
1085 | #endif | ||
1059 | #endif | 1086 | #endif |
1060 | 1087 | ||
1061 | rx_handler_func_t __rcu *rx_handler; | 1088 | rx_handler_func_t __rcu *rx_handler; |
diff --git a/net/Kconfig b/net/Kconfig index 72840626284b..79cabf1ee68b 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -221,6 +221,12 @@ config RPS | |||
221 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS | 221 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS |
222 | default y | 222 | default y |
223 | 223 | ||
224 | config RFS_ACCEL | ||
225 | boolean | ||
226 | depends on RPS && GENERIC_HARDIRQS | ||
227 | select CPU_RMAP | ||
228 | default y | ||
229 | |||
224 | config XPS | 230 | config XPS |
225 | boolean | 231 | boolean |
226 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS | 232 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS |
diff --git a/net/core/dev.c b/net/core/dev.c index d162ba8d622d..aa761472f9e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -132,6 +132,7 @@ | |||
132 | #include <trace/events/skb.h> | 132 | #include <trace/events/skb.h> |
133 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
135 | #include <linux/cpu_rmap.h> | ||
135 | 136 | ||
136 | #include "net-sysfs.h" | 137 | #include "net-sysfs.h" |
137 | 138 | ||
@@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
2588 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2589 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2589 | EXPORT_SYMBOL(rps_sock_flow_table); | 2590 | EXPORT_SYMBOL(rps_sock_flow_table); |
2590 | 2591 | ||
2592 | static struct rps_dev_flow * | ||
2593 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2594 | struct rps_dev_flow *rflow, u16 next_cpu) | ||
2595 | { | ||
2596 | u16 tcpu; | ||
2597 | |||
2598 | tcpu = rflow->cpu = next_cpu; | ||
2599 | if (tcpu != RPS_NO_CPU) { | ||
2600 | #ifdef CONFIG_RFS_ACCEL | ||
2601 | struct netdev_rx_queue *rxqueue; | ||
2602 | struct rps_dev_flow_table *flow_table; | ||
2603 | struct rps_dev_flow *old_rflow; | ||
2604 | u32 flow_id; | ||
2605 | u16 rxq_index; | ||
2606 | int rc; | ||
2607 | |||
2608 | /* Should we steer this flow to a different hardware queue? */ | ||
2609 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) | ||
2610 | goto out; | ||
2611 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); | ||
2612 | if (rxq_index == skb_get_rx_queue(skb)) | ||
2613 | goto out; | ||
2614 | |||
2615 | rxqueue = dev->_rx + rxq_index; | ||
2616 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2617 | if (!flow_table) | ||
2618 | goto out; | ||
2619 | flow_id = skb->rxhash & flow_table->mask; | ||
2620 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | ||
2621 | rxq_index, flow_id); | ||
2622 | if (rc < 0) | ||
2623 | goto out; | ||
2624 | old_rflow = rflow; | ||
2625 | rflow = &flow_table->flows[flow_id]; | ||
2626 | rflow->cpu = next_cpu; | ||
2627 | rflow->filter = rc; | ||
2628 | if (old_rflow->filter == rflow->filter) | ||
2629 | old_rflow->filter = RPS_NO_FILTER; | ||
2630 | out: | ||
2631 | #endif | ||
2632 | rflow->last_qtail = | ||
2633 | per_cpu(softnet_data, tcpu).input_queue_head; | ||
2634 | } | ||
2635 | |||
2636 | return rflow; | ||
2637 | } | ||
2638 | |||
2591 | /* | 2639 | /* |
2592 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2640 | * get_rps_cpu is called from netif_receive_skb and returns the target |
2593 | * CPU from the RPS map of the receiving queue for a given skb. | 2641 | * CPU from the RPS map of the receiving queue for a given skb. |
@@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2658 | if (unlikely(tcpu != next_cpu) && | 2706 | if (unlikely(tcpu != next_cpu) && |
2659 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 2707 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || |
2660 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 2708 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
2661 | rflow->last_qtail)) >= 0)) { | 2709 | rflow->last_qtail)) >= 0)) |
2662 | tcpu = rflow->cpu = next_cpu; | 2710 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
2663 | if (tcpu != RPS_NO_CPU) | 2711 | |
2664 | rflow->last_qtail = per_cpu(softnet_data, | ||
2665 | tcpu).input_queue_head; | ||
2666 | } | ||
2667 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 2712 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { |
2668 | *rflowp = rflow; | 2713 | *rflowp = rflow; |
2669 | cpu = tcpu; | 2714 | cpu = tcpu; |
@@ -2684,6 +2729,46 @@ done: | |||
2684 | return cpu; | 2729 | return cpu; |
2685 | } | 2730 | } |
2686 | 2731 | ||
2732 | #ifdef CONFIG_RFS_ACCEL | ||
2733 | |||
2734 | /** | ||
2735 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed | ||
2736 | * @dev: Device on which the filter was set | ||
2737 | * @rxq_index: RX queue index | ||
2738 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() | ||
2739 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() | ||
2740 | * | ||
2741 | * Drivers that implement ndo_rx_flow_steer() should periodically call | ||
2742 | * this function for each installed filter and remove the filters for | ||
2743 | * which it returns %true. | ||
2744 | */ | ||
2745 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||
2746 | u32 flow_id, u16 filter_id) | ||
2747 | { | ||
2748 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; | ||
2749 | struct rps_dev_flow_table *flow_table; | ||
2750 | struct rps_dev_flow *rflow; | ||
2751 | bool expire = true; | ||
2752 | int cpu; | ||
2753 | |||
2754 | rcu_read_lock(); | ||
2755 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2756 | if (flow_table && flow_id <= flow_table->mask) { | ||
2757 | rflow = &flow_table->flows[flow_id]; | ||
2758 | cpu = ACCESS_ONCE(rflow->cpu); | ||
2759 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | ||
2760 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | ||
2761 | rflow->last_qtail) < | ||
2762 | (int)(10 * flow_table->mask))) | ||
2763 | expire = false; | ||
2764 | } | ||
2765 | rcu_read_unlock(); | ||
2766 | return expire; | ||
2767 | } | ||
2768 | EXPORT_SYMBOL(rps_may_expire_flow); | ||
2769 | |||
2770 | #endif /* CONFIG_RFS_ACCEL */ | ||
2771 | |||
2687 | /* Called from hardirq (IPI) context */ | 2772 | /* Called from hardirq (IPI) context */ |
2688 | static void rps_trigger_softirq(void *data) | 2773 | static void rps_trigger_softirq(void *data) |
2689 | { | 2774 | { |