From a2b12f3c7ac1ea43ae646db74faf0b56c2bba563 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 12 Jan 2015 17:00:37 -0800 Subject: udp: pass udp_offload struct to UDP gro callbacks This patch introduces udp_offload_callbacks which has the same GRO functions (but not a GSO function) as offload_callbacks, except there is an argument to a udp_offload struct passed to gro_receive and gro_complete functions. This additional argument can be used to retrieve the per port structure of the encapsulation for use in gro processing (mostly by doing container_of on the structure). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 679e6e90aa4c..47921c291dd6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1969,7 +1969,7 @@ struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); + struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); }; @@ -1979,10 +1979,21 @@ struct packet_offload { struct list_head list; }; +struct udp_offload; + +struct udp_offload_callbacks { + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff); + int (*gro_complete)(struct sk_buff *skb, + int nhoff, + struct udp_offload *uoff); +}; + struct udp_offload { __be16 port; u8 ipproto; - struct offload_callbacks callbacks; + struct udp_offload_callbacks callbacks; }; /* often modified stats are per cpu, other are shared (netdev->stats) */ -- cgit v1.2.2 From 7866a621043fbaca3d7389e9b9f69dd1a2e5a855 Mon Sep 17 00:00:00 2001 From: Salam Noureddine Date: Tue, 27 Jan 2015 11:35:48 -0800 Subject: dev: add per net_device packet type chains When many pf_packet listeners are created on a lot of interfaces the current implementation using global packet type lists scales poorly. This patch adds per net_device packet type lists to fix this problem. The patch was originally written by Eric Biederman for linux-2.6.29. Tested on linux-3.16. Signed-off-by: "Eric W. Biederman" Signed-off-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 642d426a668f..3d37c6eb1732 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1514,6 +1514,8 @@ struct net_device { struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; + struct list_head ptype_all; + struct list_head ptype_specific; struct { struct list_head upper; -- cgit v1.2.2 From add511b38266aa10c1079f9248854e6a415c4dc2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 29 Jan 2015 22:40:12 -0800 Subject: bridge: add flags argument to ndo_bridge_setlink and ndo_bridge_dellink bridge flags are needed inside ndo_bridge_setlink/dellink handlers to avoid another call to parse IFLA_AF_SPEC inside these handlers This is used later in this series Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3d37c6eb1732..16251e96e6aa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1154,13 +1154,15 @@ struct net_device_ops { int idx); int (*ndo_bridge_setlink)(struct net_device *dev, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + u16 flags); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + u16 flags); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, -- cgit v1.2.2 From dcdc8994697faa789669c3fdaca1a8bc27a8f356 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 2 Feb 2015 16:07:34 -0800 Subject: net: add skb functions to process remote checksum offload This patch adds skb_remcsum_process and skb_gro_remcsum_process to perform the appropriate adjustments to the skb when receiving remote checksum offload. Updated vxlan and gue to use these functions. Tested: Ran TCP_RR and TCP_STREAM netperf for VXLAN and GUE, did not see any change in performance. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 16251e96e6aa..1347ac50d2af 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2318,6 +2318,21 @@ do { \ compute_pseudo(skb, proto)); \ } while (0) +static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + int start, int offset) +{ + __wsum delta; + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); + + /* Adjust skb->csum since we changed the packet */ + skb->csum = csum_add(skb->csum, delta); + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); +} + + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, -- cgit v1.2.2 From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:29 +0200 Subject: net/core: Add event for a change in slave state Add event which provides an indication on a change in the state of a bonding slave. The event handler should cast the pointer to the appropriate type (struct netdev_bonding_info) in order to get the full info about the slave. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1347ac50d2af..ce784d5018e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats { #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ #define NETDEV_CHANGEINFODATA 0x0018 +#define NETDEV_BONDING_INFO 0x0019 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); +struct netdev_bonding_info { + ifslave slave; + ifbond master; +}; + +struct netdev_notifier_bonding_info { + struct netdev_notifier_info info; /* must be first */ + struct netdev_bonding_info bonding_info; +}; + +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info); + static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { -- cgit v1.2.2 From 567e4b79731c352a17d73c483959f795d3593e03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Feb 2015 12:59:01 -0800 Subject: net: rfs: add hash collision detection Receive Flow Steering is a nice solution but suffers from hash collisions when a mix of connected and unconnected traffic is received on the host, when flow hash table is populated. Also, clearing flow in inet_release() makes RFS not very good for short lived flows, as many packets can follow close(). (FIN , ACK packets, ...) This patch extends the information stored into global hash table to not only include cpu number, but upper part of the hash value. I use a 32bit value, and dynamically split it in two parts. For host with less than 64 possible cpus, this gives 6 bits for the cpu number, and 26 (32-6) bits for the upper part of the hash. Since hash bucket selection use low order bits of the hash, we have a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big enough. If the hash found in flow table does not match, we fallback to RPS (if it is enabled for the rxqueue). This means that a packet for an non connected flow can avoid the IPI through a unrelated/victim CPU. This also means we no longer have to clear the table at socket close time, and this helps short lived flows performance. Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce784d5018e0..ab3b7cef4638 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -644,39 +644,39 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high order bits + * of flow hash, lower part is cpu number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - unsigned int mask; - u16 ents[0]; + u32 mask; + u32 ents[0]; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ - ((_num) * sizeof(u16))) +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff +extern u32 rps_cpu_mask; +extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; + static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { if (table && hash) { - unsigned int cpu, index = hash & table->mask; + unsigned int index = hash & table->mask; + u32 val = hash & ~rps_cpu_mask; /* We only give a hint, preemption can change cpu under us */ - cpu = raw_smp_processor_id(); + val |= raw_smp_processor_id(); - if (table->ents[index] != cpu) - table->ents[index] = cpu; + if (table->ents[index] != val) + table->ents[index] = val; } } -static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) - table->ents[hash & table->mask] = RPS_NO_CPU; -} - -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); -- cgit v1.2.2 From 93c1af6ca94c1e763efba76a127b5c135e3d23a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Feb 2015 20:39:13 -0800 Subject: net:rfs: adjust table size checking Make sure root user does not try something stupid. Also make sure mask field in struct rps_sock_flow_table does not share a cache line with the potentially often dirtied flow table. Signed-off-by: Eric Dumazet Fixes: 567e4b79731c ("net: rfs: add hash collision detection") Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab3b7cef4638..d115256ed5a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -653,7 +653,8 @@ struct rps_dev_flow_table { */ struct rps_sock_flow_table { u32 mask; - u32 ents[0]; + + u32 ents[0] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) -- cgit v1.2.2