aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOr Gerlitz <ogerlitz@mellanox.com>2014-01-20 06:59:19 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-21 21:05:04 -0500
commitb582ef0990d457f7ce8ccf827af51a575ca0b4a6 (patch)
tree2893cba0f3c386795a7324c71851d165a68d891e
parent2618abb73c8953f0848511fc13f68da4d8337574 (diff)
net: Add GRO support for UDP encapsulating protocols
Add GRO handlers for protocols that do UDP encapsulation, with the intent of being able to coalesce packets which encapsulate packets belonging to the same TCP session. For GRO purposes, the destination UDP port takes the role of the ether type field in the ethernet header or the next protocol in the IP header. The UDP GRO handler will only attempt to coalesce packets whose destination port is registered to have gro handler. Use a mark on the skb GRO CB data to disallow (flush) running the udp gro receive code twice on a packet. This solves the problem of udp encapsulated packets whose inner VM packet is udp and happen to carry a port which has registered offloads. Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h10
-rw-r--r--include/net/protocol.h3
-rw-r--r--net/core/dev.c1
-rw-r--r--net/ipv4/udp_offload.c143
4 files changed, 156 insertions, 1 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83ce2aee65e6..c31022980e18 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1675,7 +1675,10 @@ struct napi_gro_cb {
1675 unsigned long age; 1675 unsigned long age;
1676 1676
1677 /* Used in ipv6_gro_receive() */ 1677 /* Used in ipv6_gro_receive() */
1678 int proto; 1678 u16 proto;
1679
1680 /* Used in udp_gro_receive */
1681 u16 udp_mark;
1679 1682
1680 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 1683 /* used to support CHECKSUM_COMPLETE for tunneling protocols */
1681 __wsum csum; 1684 __wsum csum;
@@ -1714,6 +1717,11 @@ struct packet_offload {
1714 struct list_head list; 1717 struct list_head list;
1715}; 1718};
1716 1719
1720struct udp_offload {
1721 __be16 port;
1722 struct offload_callbacks callbacks;
1723};
1724
1717/* often modified stats are per cpu, other are shared (netdev->stats) */ 1725/* often modified stats are per cpu, other are shared (netdev->stats) */
1718struct pcpu_sw_netstats { 1726struct pcpu_sw_netstats {
1719 u64 rx_packets; 1727 u64 rx_packets;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 0e5f8665d7fb..a7e986b08147 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -108,6 +108,9 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num);
108void inet_register_protosw(struct inet_protosw *p); 108void inet_register_protosw(struct inet_protosw *p);
109void inet_unregister_protosw(struct inet_protosw *p); 109void inet_unregister_protosw(struct inet_protosw *p);
110 110
111int udp_add_offload(struct udp_offload *prot);
112void udp_del_offload(struct udp_offload *prot);
113
111#if IS_ENABLED(CONFIG_IPV6) 114#if IS_ENABLED(CONFIG_IPV6)
112int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); 115int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
113int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num); 116int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
diff --git a/net/core/dev.c b/net/core/dev.c
index a578af589198..da92305c344f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3893,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3893 NAPI_GRO_CB(skb)->same_flow = 0; 3893 NAPI_GRO_CB(skb)->same_flow = 0;
3894 NAPI_GRO_CB(skb)->flush = 0; 3894 NAPI_GRO_CB(skb)->flush = 0;
3895 NAPI_GRO_CB(skb)->free = 0; 3895 NAPI_GRO_CB(skb)->free = 0;
3896 NAPI_GRO_CB(skb)->udp_mark = 0;
3896 3897
3897 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3898 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3898 break; 3899 break;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 79c62bdcd3c5..ee853c55deea 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -14,6 +14,15 @@
14#include <net/udp.h> 14#include <net/udp.h>
15#include <net/protocol.h> 15#include <net/protocol.h>
16 16
17static DEFINE_SPINLOCK(udp_offload_lock);
18static struct udp_offload_priv *udp_offload_base __read_mostly;
19
20struct udp_offload_priv {
21 struct udp_offload *offload;
22 struct rcu_head rcu;
23 struct udp_offload_priv __rcu *next;
24};
25
17static int udp4_ufo_send_check(struct sk_buff *skb) 26static int udp4_ufo_send_check(struct sk_buff *skb)
18{ 27{
19 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 28 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -89,10 +98,144 @@ out:
89 return segs; 98 return segs;
90} 99}
91 100
101int udp_add_offload(struct udp_offload *uo)
102{
103 struct udp_offload_priv **head = &udp_offload_base;
104 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
105
106 if (!new_offload)
107 return -ENOMEM;
108
109 new_offload->offload = uo;
110
111 spin_lock(&udp_offload_lock);
112 rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
113 rcu_assign_pointer(*head, rcu_dereference(new_offload));
114 spin_unlock(&udp_offload_lock);
115
116 return 0;
117}
118EXPORT_SYMBOL(udp_add_offload);
119
120static void udp_offload_free_routine(struct rcu_head *head)
121{
122 struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
123 kfree(ou_priv);
124}
125
126void udp_del_offload(struct udp_offload *uo)
127{
128 struct udp_offload_priv __rcu **head = &udp_offload_base;
129 struct udp_offload_priv *uo_priv;
130
131 spin_lock(&udp_offload_lock);
132
133 uo_priv = rcu_dereference(*head);
134 for (; uo_priv != NULL;
135 uo_priv = rcu_dereference(*head)) {
136
137 if (uo_priv->offload == uo) {
138 rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
139 goto unlock;
140 }
141 head = &uo_priv->next;
142 }
143 pr_warn("udp_del_offload: didn't find offload for port %d\n", htons(uo->port));
144unlock:
145 spin_unlock(&udp_offload_lock);
146 if (uo_priv != NULL)
147 call_rcu(&uo_priv->rcu, udp_offload_free_routine);
148}
149EXPORT_SYMBOL(udp_del_offload);
150
151static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
152{
153 struct udp_offload_priv *uo_priv;
154 struct sk_buff *p, **pp = NULL;
155 struct udphdr *uh, *uh2;
156 unsigned int hlen, off;
157 int flush = 1;
158
159 if (NAPI_GRO_CB(skb)->udp_mark ||
160 (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
161 goto out;
162
163 /* mark that this skb passed once through the udp gro layer */
164 NAPI_GRO_CB(skb)->udp_mark = 1;
165
166 off = skb_gro_offset(skb);
167 hlen = off + sizeof(*uh);
168 uh = skb_gro_header_fast(skb, off);
169 if (skb_gro_header_hard(skb, hlen)) {
170 uh = skb_gro_header_slow(skb, hlen, off);
171 if (unlikely(!uh))
172 goto out;
173 }
174
175 rcu_read_lock();
176 uo_priv = rcu_dereference(udp_offload_base);
177 for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
178 if (uo_priv->offload->port == uh->dest &&
179 uo_priv->offload->callbacks.gro_receive)
180 goto unflush;
181 }
182 goto out_unlock;
183
184unflush:
185 flush = 0;
186
187 for (p = *head; p; p = p->next) {
188 if (!NAPI_GRO_CB(p)->same_flow)
189 continue;
190
191 uh2 = (struct udphdr *)(p->data + off);
192 if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
193 NAPI_GRO_CB(p)->same_flow = 0;
194 continue;
195 }
196 }
197
198 skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
199 pp = uo_priv->offload->callbacks.gro_receive(head, skb);
200
201out_unlock:
202 rcu_read_unlock();
203out:
204 NAPI_GRO_CB(skb)->flush |= flush;
205 return pp;
206}
207
208static int udp_gro_complete(struct sk_buff *skb, int nhoff)
209{
210 struct udp_offload_priv *uo_priv;
211 __be16 newlen = htons(skb->len - nhoff);
212 struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
213 int err = -ENOSYS;
214
215 uh->len = newlen;
216
217 rcu_read_lock();
218
219 uo_priv = rcu_dereference(udp_offload_base);
220 for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
221 if (uo_priv->offload->port == uh->dest &&
222 uo_priv->offload->callbacks.gro_complete)
223 break;
224 }
225
226 if (uo_priv != NULL)
227 err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
228
229 rcu_read_unlock();
230 return err;
231}
232
92static const struct net_offload udpv4_offload = { 233static const struct net_offload udpv4_offload = {
93 .callbacks = { 234 .callbacks = {
94 .gso_send_check = udp4_ufo_send_check, 235 .gso_send_check = udp4_ufo_send_check,
95 .gso_segment = udp4_ufo_fragment, 236 .gso_segment = udp4_ufo_fragment,
237 .gro_receive = udp_gro_receive,
238 .gro_complete = udp_gro_complete,
96 }, 239 },
97}; 240};
98 241