aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-02-08 13:00:37 -0500
committerDavid S. Miller <davem@davemloft.net>2009-02-08 23:22:18 -0500
commitaa4b9f533ed5a22952e038b9fac2447ccc682124 (patch)
tree91722b13a63dcd0e49695388e633cfa91b856b80
parent4ae5544f9a33e4ae306e337f96951eb3ff2df6d9 (diff)
gro: Optimise Ethernet header comparison
This patch optimises the Ethernet header comparison to use 2-byte and 4-byte xors instead of memcmp. In order to facilitate this, the actual comparison is now carried out by the callers of the shared dev_gro_receive function. This has a significant impact when receiving 1500B packets through 10GbE. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/etherdevice.h21
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/core/dev.c23
4 files changed, 33 insertions, 22 deletions
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1cb0f0b90926..a1f17abba7dc 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -184,4 +184,25 @@ static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2],
184} 184}
185#endif /* __KERNEL__ */ 185#endif /* __KERNEL__ */
186 186
187/**
188 * compare_ether_header - Compare two Ethernet headers
189 * @a: Pointer to Ethernet header
190 * @b: Pointer to Ethernet header
191 *
192 * Compare two ethernet headers, returns 0 if equal.
193 * This assumes that the network header (i.e., IP header) is 4-byte
194 * aligned OR the platform can handle unaligned access. This is the
195 * case for all packets coming into netif_receive_skb or similar
196 * entry points.
197 */
198
199static inline int compare_ether_header(const void *a, const void *b)
200{
201 u32 *a32 = (u32 *)((u8 *)a + 2);
202 u32 *b32 = (u32 *)((u8 *)b + 2);
203
204 return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) |
205 (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
206}
207
187#endif /* _LINUX_ETHERDEVICE_H */ 208#endif /* _LINUX_ETHERDEVICE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ee344bc6c13..355662aac940 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1117,6 +1117,13 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb)
1117 NAPI_GRO_CB(skb)->data_offset = 0; 1117 NAPI_GRO_CB(skb)->data_offset = 0;
1118} 1118}
1119 1119
1120static inline void *skb_gro_mac_header(struct sk_buff *skb)
1121{
1122 return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) :
1123 page_address(skb_shinfo(skb)->frags[0].page) +
1124 skb_shinfo(skb)->frags[0].page_offset;
1125}
1126
1120static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, 1127static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
1121 unsigned short type, 1128 unsigned short type,
1122 const void *daddr, const void *saddr, 1129 const void *daddr, const void *saddr,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 378fa69d625a..70435af153f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -85,7 +85,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
85 goto drop; 85 goto drop;
86 86
87 for (p = napi->gro_list; p; p = p->next) { 87 for (p = napi->gro_list; p; p = p->next) {
88 NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; 88 NAPI_GRO_CB(p)->same_flow =
89 p->dev == skb->dev && !compare_ether_header(
90 skb_mac_header(p), skb_gro_mac_header(skb));
89 NAPI_GRO_CB(p)->flush = 0; 91 NAPI_GRO_CB(p)->flush = 0;
90 } 92 }
91 93
diff --git a/net/core/dev.c b/net/core/dev.c
index ae0b66936abe..1e27a67df242 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -215,13 +215,6 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
215 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 215 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
216} 216}
217 217
218static inline void *skb_gro_mac_header(struct sk_buff *skb)
219{
220 return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) :
221 page_address(skb_shinfo(skb)->frags[0].page) +
222 skb_shinfo(skb)->frags[0].page_offset;
223}
224
225/* Device list insertion */ 218/* Device list insertion */
226static int list_netdevice(struct net_device *dev) 219static int list_netdevice(struct net_device *dev)
227{ 220{
@@ -2415,29 +2408,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2415 2408
2416 rcu_read_lock(); 2409 rcu_read_lock();
2417 list_for_each_entry_rcu(ptype, head, list) { 2410 list_for_each_entry_rcu(ptype, head, list) {
2418 struct sk_buff *p;
2419 void *mac;
2420
2421 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2411 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2422 continue; 2412 continue;
2423 2413
2424 skb_set_network_header(skb, skb_gro_offset(skb)); 2414 skb_set_network_header(skb, skb_gro_offset(skb));
2425 mac = skb_gro_mac_header(skb);
2426 mac_len = skb->network_header - skb->mac_header; 2415 mac_len = skb->network_header - skb->mac_header;
2427 skb->mac_len = mac_len; 2416 skb->mac_len = mac_len;
2428 NAPI_GRO_CB(skb)->same_flow = 0; 2417 NAPI_GRO_CB(skb)->same_flow = 0;
2429 NAPI_GRO_CB(skb)->flush = 0; 2418 NAPI_GRO_CB(skb)->flush = 0;
2430 NAPI_GRO_CB(skb)->free = 0; 2419 NAPI_GRO_CB(skb)->free = 0;
2431 2420
2432 for (p = napi->gro_list; p; p = p->next) {
2433 if (!NAPI_GRO_CB(p)->same_flow)
2434 continue;
2435
2436 if (p->mac_len != mac_len ||
2437 memcmp(skb_mac_header(p), mac, mac_len))
2438 NAPI_GRO_CB(p)->same_flow = 0;
2439 }
2440
2441 pp = ptype->gro_receive(&napi->gro_list, skb); 2421 pp = ptype->gro_receive(&napi->gro_list, skb);
2442 break; 2422 break;
2443 } 2423 }
@@ -2492,7 +2472,8 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2492 struct sk_buff *p; 2472 struct sk_buff *p;
2493 2473
2494 for (p = napi->gro_list; p; p = p->next) { 2474 for (p = napi->gro_list; p; p = p->next) {
2495 NAPI_GRO_CB(p)->same_flow = 1; 2475 NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
2476 skb_mac_header(p), skb_gro_mac_header(skb));
2496 NAPI_GRO_CB(p)->flush = 0; 2477 NAPI_GRO_CB(p)->flush = 0;
2497 } 2478 }
2498 2479