diff options
author | Herbert Xu <herbert@gondor.apana.org.au> | 2009-02-08 13:00:37 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-02-08 23:22:18 -0500 |
commit | aa4b9f533ed5a22952e038b9fac2447ccc682124 (patch) | |
tree | 91722b13a63dcd0e49695388e633cfa91b856b80 | |
parent | 4ae5544f9a33e4ae306e337f96951eb3ff2df6d9 (diff) |
gro: Optimise Ethernet header comparison
This patch optimises the Ethernet header comparison to use 2-byte
and 4-byte xors instead of memcmp. In order to facilitate this,
the actual comparison is now carried out by the callers of the
shared dev_gro_receive function.
This has a significant impact when receiving 1500B packets through
10GbE.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/etherdevice.h | 21 | ||||
-rw-r--r-- | include/linux/netdevice.h | 7 | ||||
-rw-r--r-- | net/8021q/vlan_core.c | 4 | ||||
-rw-r--r-- | net/core/dev.c | 23 |
4 files changed, 33 insertions, 22 deletions
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 1cb0f0b90926..a1f17abba7dc 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h | |||
@@ -184,4 +184,25 @@ static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2], | |||
184 | } | 184 | } |
185 | #endif /* __KERNEL__ */ | 185 | #endif /* __KERNEL__ */ |
186 | 186 | ||
187 | /** | ||
188 | * compare_ether_header - Compare two Ethernet headers | ||
189 | * @a: Pointer to Ethernet header | ||
190 | * @b: Pointer to Ethernet header | ||
191 | * | ||
192 | * Compare two ethernet headers, returns 0 if equal. | ||
193 | * This assumes that the network header (i.e., IP header) is 4-byte | ||
194 | * aligned OR the platform can handle unaligned access. This is the | ||
195 | * case for all packets coming into netif_receive_skb or similar | ||
196 | * entry points. | ||
197 | */ | ||
198 | |||
199 | static inline int compare_ether_header(const void *a, const void *b) | ||
200 | { | ||
201 | u32 *a32 = (u32 *)((u8 *)a + 2); | ||
202 | u32 *b32 = (u32 *)((u8 *)b + 2); | ||
203 | |||
204 | return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | | ||
205 | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); | ||
206 | } | ||
207 | |||
187 | #endif /* _LINUX_ETHERDEVICE_H */ | 208 | #endif /* _LINUX_ETHERDEVICE_H */ |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ee344bc6c13..355662aac940 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1117,6 +1117,13 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb) | |||
1117 | NAPI_GRO_CB(skb)->data_offset = 0; | 1117 | NAPI_GRO_CB(skb)->data_offset = 0; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | static inline void *skb_gro_mac_header(struct sk_buff *skb) | ||
1121 | { | ||
1122 | return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : | ||
1123 | page_address(skb_shinfo(skb)->frags[0].page) + | ||
1124 | skb_shinfo(skb)->frags[0].page_offset; | ||
1125 | } | ||
1126 | |||
1120 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, | 1127 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, |
1121 | unsigned short type, | 1128 | unsigned short type, |
1122 | const void *daddr, const void *saddr, | 1129 | const void *daddr, const void *saddr, |
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 378fa69d625a..70435af153f2 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c | |||
@@ -85,7 +85,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, | |||
85 | goto drop; | 85 | goto drop; |
86 | 86 | ||
87 | for (p = napi->gro_list; p; p = p->next) { | 87 | for (p = napi->gro_list; p; p = p->next) { |
88 | NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; | 88 | NAPI_GRO_CB(p)->same_flow = |
89 | p->dev == skb->dev && !compare_ether_header( | ||
90 | skb_mac_header(p), skb_gro_mac_header(skb)); | ||
89 | NAPI_GRO_CB(p)->flush = 0; | 91 | NAPI_GRO_CB(p)->flush = 0; |
90 | } | 92 | } |
91 | 93 | ||
diff --git a/net/core/dev.c b/net/core/dev.c index ae0b66936abe..1e27a67df242 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -215,13 +215,6 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |||
215 | return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; | 215 | return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; |
216 | } | 216 | } |
217 | 217 | ||
218 | static inline void *skb_gro_mac_header(struct sk_buff *skb) | ||
219 | { | ||
220 | return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : | ||
221 | page_address(skb_shinfo(skb)->frags[0].page) + | ||
222 | skb_shinfo(skb)->frags[0].page_offset; | ||
223 | } | ||
224 | |||
225 | /* Device list insertion */ | 218 | /* Device list insertion */ |
226 | static int list_netdevice(struct net_device *dev) | 219 | static int list_netdevice(struct net_device *dev) |
227 | { | 220 | { |
@@ -2415,29 +2408,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2415 | 2408 | ||
2416 | rcu_read_lock(); | 2409 | rcu_read_lock(); |
2417 | list_for_each_entry_rcu(ptype, head, list) { | 2410 | list_for_each_entry_rcu(ptype, head, list) { |
2418 | struct sk_buff *p; | ||
2419 | void *mac; | ||
2420 | |||
2421 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) | 2411 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) |
2422 | continue; | 2412 | continue; |
2423 | 2413 | ||
2424 | skb_set_network_header(skb, skb_gro_offset(skb)); | 2414 | skb_set_network_header(skb, skb_gro_offset(skb)); |
2425 | mac = skb_gro_mac_header(skb); | ||
2426 | mac_len = skb->network_header - skb->mac_header; | 2415 | mac_len = skb->network_header - skb->mac_header; |
2427 | skb->mac_len = mac_len; | 2416 | skb->mac_len = mac_len; |
2428 | NAPI_GRO_CB(skb)->same_flow = 0; | 2417 | NAPI_GRO_CB(skb)->same_flow = 0; |
2429 | NAPI_GRO_CB(skb)->flush = 0; | 2418 | NAPI_GRO_CB(skb)->flush = 0; |
2430 | NAPI_GRO_CB(skb)->free = 0; | 2419 | NAPI_GRO_CB(skb)->free = 0; |
2431 | 2420 | ||
2432 | for (p = napi->gro_list; p; p = p->next) { | ||
2433 | if (!NAPI_GRO_CB(p)->same_flow) | ||
2434 | continue; | ||
2435 | |||
2436 | if (p->mac_len != mac_len || | ||
2437 | memcmp(skb_mac_header(p), mac, mac_len)) | ||
2438 | NAPI_GRO_CB(p)->same_flow = 0; | ||
2439 | } | ||
2440 | |||
2441 | pp = ptype->gro_receive(&napi->gro_list, skb); | 2421 | pp = ptype->gro_receive(&napi->gro_list, skb); |
2442 | break; | 2422 | break; |
2443 | } | 2423 | } |
@@ -2492,7 +2472,8 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2492 | struct sk_buff *p; | 2472 | struct sk_buff *p; |
2493 | 2473 | ||
2494 | for (p = napi->gro_list; p; p = p->next) { | 2474 | for (p = napi->gro_list; p; p = p->next) { |
2495 | NAPI_GRO_CB(p)->same_flow = 1; | 2475 | NAPI_GRO_CB(p)->same_flow = !compare_ether_header( |
2476 | skb_mac_header(p), skb_gro_mac_header(skb)); | ||
2496 | NAPI_GRO_CB(p)->flush = 0; | 2477 | NAPI_GRO_CB(p)->flush = 0; |
2497 | } | 2478 | } |
2498 | 2479 | ||