aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2008-12-16 02:38:52 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-16 02:38:52 -0500
commitd565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 (patch)
tree3526815ab2b60c37f474c25ad1d8fb207644efcc /include/linux
parent1a881f27c50b4fbd6858a8696a189263621136b0 (diff)
net: Add Generic Receive Offload infrastructure
This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netdevice.h80
-rw-r--r--include/linux/netpoll.h5
2 files changed, 28 insertions, 57 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bdf5465deb91..58856b6737fb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,8 +314,9 @@ struct napi_struct {
314 spinlock_t poll_lock; 314 spinlock_t poll_lock;
315 int poll_owner; 315 int poll_owner;
316 struct net_device *dev; 316 struct net_device *dev;
317 struct list_head dev_list;
318#endif 317#endif
318 struct list_head dev_list;
319 struct sk_buff *gro_list;
319}; 320};
320 321
321enum 322enum
@@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi)
376 * 377 *
377 * Mark NAPI processing as complete. 378 * Mark NAPI processing as complete.
378 */ 379 */
379static inline void __napi_complete(struct napi_struct *n) 380extern void __napi_complete(struct napi_struct *n);
380{ 381extern void napi_complete(struct napi_struct *n);
381 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
382 list_del(&n->poll_list);
383 smp_mb__before_clear_bit();
384 clear_bit(NAPI_STATE_SCHED, &n->state);
385}
386
387static inline void napi_complete(struct napi_struct *n)
388{
389 unsigned long flags;
390
391 local_irq_save(flags);
392 __napi_complete(n);
393 local_irq_restore(flags);
394}
395 382
396/** 383/**
397 * napi_disable - prevent NAPI from scheduling 384 * napi_disable - prevent NAPI from scheduling
@@ -640,9 +627,7 @@ struct net_device
640 unsigned long state; 627 unsigned long state;
641 628
642 struct list_head dev_list; 629 struct list_head dev_list;
643#ifdef CONFIG_NETPOLL
644 struct list_head napi_list; 630 struct list_head napi_list;
645#endif
646 631
647 /* Net device features */ 632 /* Net device features */
648 unsigned long features; 633 unsigned long features;
@@ -661,6 +646,7 @@ struct net_device
661#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ 646#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
662 /* do not use LLTX in new drivers */ 647 /* do not use LLTX in new drivers */
663#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ 648#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
649#define NETIF_F_GRO 16384 /* Generic receive offload */
664#define NETIF_F_LRO 32768 /* large receive offload */ 650#define NETIF_F_LRO 32768 /* large receive offload */
665 651
666 /* Segmentation offload features */ 652 /* Segmentation offload features */
@@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev)
984 * netif_napi_add() must be used to initialize a napi context prior to calling 970 * netif_napi_add() must be used to initialize a napi context prior to calling
985 * *any* of the other napi related functions. 971 * *any* of the other napi related functions.
986 */ 972 */
987static inline void netif_napi_add(struct net_device *dev, 973void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
988 struct napi_struct *napi, 974 int (*poll)(struct napi_struct *, int), int weight);
989 int (*poll)(struct napi_struct *, int),
990 int weight)
991{
992 INIT_LIST_HEAD(&napi->poll_list);
993 napi->poll = poll;
994 napi->weight = weight;
995#ifdef CONFIG_NETPOLL
996 napi->dev = dev;
997 list_add(&napi->dev_list, &dev->napi_list);
998 spin_lock_init(&napi->poll_lock);
999 napi->poll_owner = -1;
1000#endif
1001 set_bit(NAPI_STATE_SCHED, &napi->state);
1002}
1003 975
1004/** 976/**
1005 * netif_napi_del - remove a napi context 977 * netif_napi_del - remove a napi context
@@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev,
1007 * 979 *
1008 * netif_napi_del() removes a napi context from the network device napi list 980 * netif_napi_del() removes a napi context from the network device napi list
1009 */ 981 */
1010static inline void netif_napi_del(struct napi_struct *napi) 982void netif_napi_del(struct napi_struct *napi);
1011{ 983
1012#ifdef CONFIG_NETPOLL 984struct napi_gro_cb {
1013 list_del(&napi->dev_list); 985 /* This is non-zero if the packet may be of the same flow. */
1014#endif 986 int same_flow;
1015} 987
988 /* This is non-zero if the packet cannot be merged with the new skb. */
989 int flush;
990
991 /* Number of segments aggregated. */
992 int count;
993};
994
995#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
1016 996
1017struct packet_type { 997struct packet_type {
1018 __be16 type; /* This is really htons(ether_type). */ 998 __be16 type; /* This is really htons(ether_type). */
@@ -1024,6 +1004,9 @@ struct packet_type {
1024 struct sk_buff *(*gso_segment)(struct sk_buff *skb, 1004 struct sk_buff *(*gso_segment)(struct sk_buff *skb,
1025 int features); 1005 int features);
1026 int (*gso_send_check)(struct sk_buff *skb); 1006 int (*gso_send_check)(struct sk_buff *skb);
1007 struct sk_buff **(*gro_receive)(struct sk_buff **head,
1008 struct sk_buff *skb);
1009 int (*gro_complete)(struct sk_buff *skb);
1027 void *af_packet_priv; 1010 void *af_packet_priv;
1028 struct list_head list; 1011 struct list_head list;
1029}; 1012};
@@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb);
1377extern int netif_rx_ni(struct sk_buff *skb); 1360extern int netif_rx_ni(struct sk_buff *skb);
1378#define HAVE_NETIF_RECEIVE_SKB 1 1361#define HAVE_NETIF_RECEIVE_SKB 1
1379extern int netif_receive_skb(struct sk_buff *skb); 1362extern int netif_receive_skb(struct sk_buff *skb);
1363extern void napi_gro_flush(struct napi_struct *napi);
1364extern int napi_gro_receive(struct napi_struct *napi,
1365 struct sk_buff *skb);
1380extern void netif_nit_deliver(struct sk_buff *skb); 1366extern void netif_nit_deliver(struct sk_buff *skb);
1381extern int dev_valid_name(const char *name); 1367extern int dev_valid_name(const char *name);
1382extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); 1368extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
@@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev,
1621static inline void netif_rx_complete(struct net_device *dev, 1607static inline void netif_rx_complete(struct net_device *dev,
1622 struct napi_struct *napi) 1608 struct napi_struct *napi)
1623{ 1609{
1624 unsigned long flags; 1610 napi_complete(napi);
1625
1626 /*
1627 * don't let napi dequeue from the cpu poll list
1628 * just in case its running on a different cpu
1629 */
1630 if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
1631 return;
1632 local_irq_save(flags);
1633 __netif_rx_complete(dev, napi);
1634 local_irq_restore(flags);
1635} 1611}
1636 1612
1637static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) 1613static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e3d79593fb3a..e38d3c9dccda 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have)
94 rcu_read_unlock(); 94 rcu_read_unlock();
95} 95}
96 96
97static inline void netpoll_netdev_init(struct net_device *dev)
98{
99 INIT_LIST_HEAD(&dev->napi_list);
100}
101
102#else 97#else
103static inline int netpoll_rx(struct sk_buff *skb) 98static inline int netpoll_rx(struct sk_buff *skb)
104{ 99{