aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-03-17 13:15:55 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2013-03-19 12:02:24 -0400
commitae08ce0021087a5d812d2714fb2a326ef9f8c450 (patch)
tree800cbac2e233a3ec3936e3baa6f38e65f4b3ea1d /net
parente844a928431fa8f1359d1f4f2cef53d9b446bf52 (diff)
netfilter: nfnetlink_queue: zero copy support
nfqnl_build_packet_message() actually copy the packet inside the netlink message, while it can instead use zero copy. Make sure the skb 'copy' is the last component of the cooked netlink message, as we cant add anything after it. Patch cooked in Copenhagen at Netfilter Workshop ;) Still to be addressed in separate patches : -GRO/GSO packets are segmented in nf_queue() and checksummed in nfqnl_build_packet_message(). Proper support for GSO/GRO packets (no segmentation, and no checksumming) needs application cooperation, if we want no regressions. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nfnetlink_queue_core.c94
1 files changed, 72 insertions, 22 deletions
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 350c50fbfd4d..da91b8676ddb 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -217,14 +217,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
217 spin_unlock_bh(&queue->lock); 217 spin_unlock_bh(&queue->lock);
218} 218}
219 219
220static void
221nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
222{
223 int i, j = 0;
224 int plen = 0; /* length of skb->head fragment */
225 struct page *page;
226 unsigned int offset;
227
228 /* dont bother with small payloads */
229 if (len <= skb_tailroom(to)) {
230 skb_copy_bits(from, 0, skb_put(to, len), len);
231 return;
232 }
233
234 if (hlen) {
235 skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
236 len -= hlen;
237 } else {
238 plen = min_t(int, skb_headlen(from), len);
239 if (plen) {
240 page = virt_to_head_page(from->head);
241 offset = from->data - (unsigned char *)page_address(page);
242 __skb_fill_page_desc(to, 0, page, offset, plen);
243 get_page(page);
244 j = 1;
245 len -= plen;
246 }
247 }
248
249 to->truesize += len + plen;
250 to->len += len + plen;
251 to->data_len += len + plen;
252
253 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
254 if (!len)
255 break;
256 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
257 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
258 len -= skb_shinfo(to)->frags[j].size;
259 skb_frag_ref(to, j);
260 j++;
261 }
262 skb_shinfo(to)->nr_frags = j;
263}
264
220static struct sk_buff * 265static struct sk_buff *
221nfqnl_build_packet_message(struct nfqnl_instance *queue, 266nfqnl_build_packet_message(struct nfqnl_instance *queue,
222 struct nf_queue_entry *entry, 267 struct nf_queue_entry *entry,
223 __be32 **packet_id_ptr) 268 __be32 **packet_id_ptr)
224{ 269{
225 sk_buff_data_t old_tail;
226 size_t size; 270 size_t size;
227 size_t data_len = 0, cap_len = 0; 271 size_t data_len = 0, cap_len = 0;
272 int hlen = 0;
228 struct sk_buff *skb; 273 struct sk_buff *skb;
229 struct nlattr *nla; 274 struct nlattr *nla;
230 struct nfqnl_msg_packet_hdr *pmsg; 275 struct nfqnl_msg_packet_hdr *pmsg;
@@ -246,8 +291,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
246#endif 291#endif
247 + nla_total_size(sizeof(u_int32_t)) /* mark */ 292 + nla_total_size(sizeof(u_int32_t)) /* mark */
248 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) 293 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
249 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) 294 + nla_total_size(sizeof(u_int32_t)); /* cap_len */
250 + nla_total_size(sizeof(u_int32_t))); /* cap_len */ 295
296 if (entskb->tstamp.tv64)
297 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
251 298
252 outdev = entry->outdev; 299 outdev = entry->outdev;
253 300
@@ -265,7 +312,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
265 if (data_len == 0 || data_len > entskb->len) 312 if (data_len == 0 || data_len > entskb->len)
266 data_len = entskb->len; 313 data_len = entskb->len;
267 314
268 size += nla_total_size(data_len); 315
316 if (!entskb->head_frag ||
317 skb_headlen(entskb) < L1_CACHE_BYTES ||
318 skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
319 hlen = skb_headlen(entskb);
320
321 if (skb_has_frag_list(entskb))
322 hlen = entskb->len;
323 hlen = min_t(int, data_len, hlen);
324 size += sizeof(struct nlattr) + hlen;
269 cap_len = entskb->len; 325 cap_len = entskb->len;
270 break; 326 break;
271 } 327 }
@@ -277,7 +333,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
277 if (!skb) 333 if (!skb)
278 return NULL; 334 return NULL;
279 335
280 old_tail = skb->tail;
281 nlh = nlmsg_put(skb, 0, 0, 336 nlh = nlmsg_put(skb, 0, 0,
282 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 337 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
283 sizeof(struct nfgenmsg), 0); 338 sizeof(struct nfgenmsg), 0);
@@ -382,31 +437,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
382 goto nla_put_failure; 437 goto nla_put_failure;
383 } 438 }
384 439
440 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
441 goto nla_put_failure;
442
443 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
444 goto nla_put_failure;
445
385 if (data_len) { 446 if (data_len) {
386 struct nlattr *nla; 447 struct nlattr *nla;
387 int sz = nla_attr_size(data_len);
388 448
389 if (skb_tailroom(skb) < nla_total_size(data_len)) { 449 if (skb_tailroom(skb) < sizeof(*nla) + hlen)
390 printk(KERN_WARNING "nf_queue: no tailroom!\n"); 450 goto nla_put_failure;
391 kfree_skb(skb);
392 return NULL;
393 }
394 451
395 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); 452 nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
396 nla->nla_type = NFQA_PAYLOAD; 453 nla->nla_type = NFQA_PAYLOAD;
397 nla->nla_len = sz; 454 nla->nla_len = nla_attr_size(data_len);
398 455
399 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) 456 nfqnl_zcopy(skb, entskb, data_len, hlen);
400 BUG();
401 } 457 }
402 458
403 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 459 nlh->nlmsg_len = skb->len;
404 goto nla_put_failure;
405
406 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
407 goto nla_put_failure;
408
409 nlh->nlmsg_len = skb->tail - old_tail;
410 return skb; 460 return skb;
411 461
412nla_put_failure: 462nla_put_failure: