aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h18
-rw-r--r--net/core/dev.c26
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/gre_offload.c160
-rw-r--r--net/ipv4/tcp_offload.c7
-rw-r--r--net/ipv6/ip6_offload.c2
6 files changed, 216 insertions, 7 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d9c961aa6a7f..a2a70cc70e7b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1632,7 +1632,10 @@ struct napi_gro_cb {
1632 int data_offset; 1632 int data_offset;
1633 1633
1634 /* This is non-zero if the packet cannot be merged with the new skb. */ 1634 /* This is non-zero if the packet cannot be merged with the new skb. */
1635 int flush; 1635 u16 flush;
1636
1637 /* Save the IP ID here and check when we get to the transport layer */
1638 u16 flush_id;
1636 1639
1637 /* Number of segments aggregated. */ 1640 /* Number of segments aggregated. */
1638 u16 count; 1641 u16 count;
@@ -1651,6 +1654,9 @@ struct napi_gro_cb {
1651 /* Used in ipv6_gro_receive() */ 1654 /* Used in ipv6_gro_receive() */
1652 int proto; 1655 int proto;
1653 1656
1657 /* used to support CHECKSUM_COMPLETE for tunneling protocols */
1658 __wsum csum;
1659
1654 /* used in skb_gro_receive() slow path */ 1660 /* used in skb_gro_receive() slow path */
1655 struct sk_buff *last; 1661 struct sk_buff *last;
1656}; 1662};
@@ -1900,6 +1906,14 @@ static inline void *skb_gro_network_header(struct sk_buff *skb)
1900 skb_network_offset(skb); 1906 skb_network_offset(skb);
1901} 1907}
1902 1908
1909static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
1910 const void *start, unsigned int len)
1911{
1912 if (skb->ip_summed == CHECKSUM_COMPLETE)
1913 NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
1914 csum_partial(start, len, 0));
1915}
1916
1903static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, 1917static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
1904 unsigned short type, 1918 unsigned short type,
1905 const void *daddr, const void *saddr, 1919 const void *daddr, const void *saddr,
@@ -2440,6 +2454,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
2440void napi_gro_flush(struct napi_struct *napi, bool flush_old); 2454void napi_gro_flush(struct napi_struct *napi, bool flush_old);
2441struct sk_buff *napi_get_frags(struct napi_struct *napi); 2455struct sk_buff *napi_get_frags(struct napi_struct *napi);
2442gro_result_t napi_gro_frags(struct napi_struct *napi); 2456gro_result_t napi_gro_frags(struct napi_struct *napi);
2457struct packet_offload *gro_find_receive_by_type(__be16 type);
2458struct packet_offload *gro_find_complete_by_type(__be16 type);
2443 2459
2444static inline void napi_free_frags(struct napi_struct *napi) 2460static inline void napi_free_frags(struct napi_struct *napi)
2445{ 2461{
diff --git a/net/core/dev.c b/net/core/dev.c
index b3c574a88026..ce01847793c0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3846,6 +3846,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3846 3846
3847 skb_gro_reset_offset(skb); 3847 skb_gro_reset_offset(skb);
3848 gro_list_prepare(napi, skb); 3848 gro_list_prepare(napi, skb);
3849 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3849 3850
3850 rcu_read_lock(); 3851 rcu_read_lock();
3851 list_for_each_entry_rcu(ptype, head, list) { 3852 list_for_each_entry_rcu(ptype, head, list) {
@@ -3922,6 +3923,31 @@ normal:
3922 goto pull; 3923 goto pull;
3923} 3924}
3924 3925
3926struct packet_offload *gro_find_receive_by_type(__be16 type)
3927{
3928 struct list_head *offload_head = &offload_base;
3929 struct packet_offload *ptype;
3930
3931 list_for_each_entry_rcu(ptype, offload_head, list) {
3932 if (ptype->type != type || !ptype->callbacks.gro_receive)
3933 continue;
3934 return ptype;
3935 }
3936 return NULL;
3937}
3938
3939struct packet_offload *gro_find_complete_by_type(__be16 type)
3940{
3941 struct list_head *offload_head = &offload_base;
3942 struct packet_offload *ptype;
3943
3944 list_for_each_entry_rcu(ptype, offload_head, list) {
3945 if (ptype->type != type || !ptype->callbacks.gro_complete)
3946 continue;
3947 return ptype;
3948 }
3949 return NULL;
3950}
3925 3951
3926static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 3952static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3927{ 3953{
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b8bc1a3d5cf1..6268a4751e64 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1391,9 +1391,15 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1391 NAPI_GRO_CB(p)->flush |= 1391 NAPI_GRO_CB(p)->flush |=
1392 (iph->ttl ^ iph2->ttl) | 1392 (iph->ttl ^ iph2->ttl) |
1393 (iph->tos ^ iph2->tos) | 1393 (iph->tos ^ iph2->tos) |
1394 (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | 1394 ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
1395 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1396 1395
1396 /* Save the IP ID check to be included later when we get to
1397 * the transport layer so only the inner most IP ID is checked.
1398 * This is because some GSO/TSO implementations do not
1399 * correctly increment the IP ID for the outer hdrs.
1400 */
1401 NAPI_GRO_CB(p)->flush_id =
1402 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1397 NAPI_GRO_CB(p)->flush |= flush; 1403 NAPI_GRO_CB(p)->flush |= flush;
1398 } 1404 }
1399 1405
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 9138cfb10140..746a7b10d434 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -116,10 +116,170 @@ out:
116 return segs; 116 return segs;
117} 117}
118 118
119/* Compute the whole skb csum in s/w and store it, then verify GRO csum
120 * starting from gro_offset.
121 */
122static __sum16 gro_skb_checksum(struct sk_buff *skb)
123{
124 __sum16 sum;
125
126 skb->csum = skb_checksum(skb, 0, skb->len, 0);
127 NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum,
128 csum_partial(skb->data, skb_gro_offset(skb), 0));
129 sum = csum_fold(NAPI_GRO_CB(skb)->csum);
130 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
131 if (unlikely(!sum))
132 netdev_rx_csum_fault(skb->dev);
133 } else
134 skb->ip_summed = CHECKSUM_COMPLETE;
135
136 return sum;
137}
138
139static struct sk_buff **gre_gro_receive(struct sk_buff **head,
140 struct sk_buff *skb)
141{
142 struct sk_buff **pp = NULL;
143 struct sk_buff *p;
144 const struct gre_base_hdr *greh;
145 unsigned int hlen, grehlen;
146 unsigned int off;
147 int flush = 1;
148 struct packet_offload *ptype;
149 __be16 type;
150
151 off = skb_gro_offset(skb);
152 hlen = off + sizeof(*greh);
153 greh = skb_gro_header_fast(skb, off);
154 if (skb_gro_header_hard(skb, hlen)) {
155 greh = skb_gro_header_slow(skb, hlen, off);
156 if (unlikely(!greh))
157 goto out;
158 }
159
160 /* Only support version 0 and K (key), C (csum) flags. Note that
161 * although the support for the S (seq#) flag can be added easily
162 * for GRO, this is problematic for GSO hence can not be enabled
163 * here because a GRO pkt may end up in the forwarding path, thus
164 * requiring GSO support to break it up correctly.
165 */
166 if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
167 goto out;
168
169 type = greh->protocol;
170
171 rcu_read_lock();
172 ptype = gro_find_receive_by_type(type);
173 if (ptype == NULL)
174 goto out_unlock;
175
176 grehlen = GRE_HEADER_SECTION;
177
178 if (greh->flags & GRE_KEY)
179 grehlen += GRE_HEADER_SECTION;
180
181 if (greh->flags & GRE_CSUM)
182 grehlen += GRE_HEADER_SECTION;
183
184 hlen = off + grehlen;
185 if (skb_gro_header_hard(skb, hlen)) {
186 greh = skb_gro_header_slow(skb, hlen, off);
187 if (unlikely(!greh))
188 goto out_unlock;
189 }
190 if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */
191 __sum16 csum = 0;
192
193 if (skb->ip_summed == CHECKSUM_COMPLETE)
194 csum = csum_fold(NAPI_GRO_CB(skb)->csum);
195 /* Don't trust csum error calculated/reported by h/w */
196 if (skb->ip_summed == CHECKSUM_NONE || csum != 0)
197 csum = gro_skb_checksum(skb);
198
199 /* GRE CSUM is the 1's complement of the 1's complement sum
200 * of the GRE hdr plus payload so it should add up to 0xffff
201 * (and 0 after csum_fold()) just like the IPv4 hdr csum.
202 */
203 if (csum)
204 goto out_unlock;
205 }
206 flush = 0;
207
208 for (p = *head; p; p = p->next) {
209 const struct gre_base_hdr *greh2;
210
211 if (!NAPI_GRO_CB(p)->same_flow)
212 continue;
213
214 /* The following checks are needed to ensure only pkts
215 * from the same tunnel are considered for aggregation.
216 * The criteria for "the same tunnel" includes:
217 * 1) same version (we only support version 0 here)
218 * 2) same protocol (we only support ETH_P_IP for now)
219 * 3) same set of flags
220 * 4) same key if the key field is present.
221 */
222 greh2 = (struct gre_base_hdr *)(p->data + off);
223
224 if (greh2->flags != greh->flags ||
225 greh2->protocol != greh->protocol) {
226 NAPI_GRO_CB(p)->same_flow = 0;
227 continue;
228 }
229 if (greh->flags & GRE_KEY) {
230 /* compare keys */
231 if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) {
232 NAPI_GRO_CB(p)->same_flow = 0;
233 continue;
234 }
235 }
236 }
237
238 skb_gro_pull(skb, grehlen);
239
240 /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
241 skb_gro_postpull_rcsum(skb, greh, grehlen);
242
243 pp = ptype->callbacks.gro_receive(head, skb);
244
245out_unlock:
246 rcu_read_unlock();
247out:
248 NAPI_GRO_CB(skb)->flush |= flush;
249
250 return pp;
251}
252
253int gre_gro_complete(struct sk_buff *skb, int nhoff)
254{
255 struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff);
256 struct packet_offload *ptype;
257 unsigned int grehlen = sizeof(*greh);
258 int err = -ENOENT;
259 __be16 type;
260
261 type = greh->protocol;
262 if (greh->flags & GRE_KEY)
263 grehlen += GRE_HEADER_SECTION;
264
265 if (greh->flags & GRE_CSUM)
266 grehlen += GRE_HEADER_SECTION;
267
268 rcu_read_lock();
269 ptype = gro_find_complete_by_type(type);
270 if (ptype != NULL)
271 err = ptype->callbacks.gro_complete(skb, nhoff + grehlen);
272
273 rcu_read_unlock();
274 return err;
275}
276
119static const struct net_offload gre_offload = { 277static const struct net_offload gre_offload = {
120 .callbacks = { 278 .callbacks = {
121 .gso_send_check = gre_gso_send_check, 279 .gso_send_check = gre_gso_send_check,
122 .gso_segment = gre_gso_segment, 280 .gso_segment = gre_gso_segment,
281 .gro_receive = gre_gro_receive,
282 .gro_complete = gre_gro_complete,
123 }, 283 },
124}; 284};
125 285
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 2658a27f540d..771a3950d87a 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -197,7 +197,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
197 goto out_check_final; 197 goto out_check_final;
198 198
199found: 199found:
200 flush = NAPI_GRO_CB(p)->flush; 200 /* Include the IP ID check below from the inner most IP hdr */
201 flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id;
201 flush |= (__force int)(flags & TCP_FLAG_CWR); 202 flush |= (__force int)(flags & TCP_FLAG_CWR);
202 flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 203 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
203 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); 204 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -230,7 +231,7 @@ out_check_final:
230 pp = head; 231 pp = head;
231 232
232out: 233out:
233 NAPI_GRO_CB(skb)->flush |= flush; 234 NAPI_GRO_CB(skb)->flush |= (flush != 0);
234 235
235 return pp; 236 return pp;
236} 237}
@@ -280,7 +281,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *
280 if (NAPI_GRO_CB(skb)->flush) 281 if (NAPI_GRO_CB(skb)->flush)
281 goto skip_csum; 282 goto skip_csum;
282 283
283 wsum = skb->csum; 284 wsum = NAPI_GRO_CB(skb)->csum;
284 285
285 switch (skb->ip_summed) { 286 switch (skb->ip_summed) {
286 case CHECKSUM_NONE: 287 case CHECKSUM_NONE:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 6fb4162fa785..1e8683b135bb 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -190,7 +190,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
190 unsigned int nlen; 190 unsigned int nlen;
191 unsigned int hlen; 191 unsigned int hlen;
192 unsigned int off; 192 unsigned int off;
193 int flush = 1; 193 u16 flush = 1;
194 int proto; 194 int proto;
195 __wsum csum; 195 __wsum csum;
196 196