aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2013-04-19 00:58:25 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2013-04-29 14:09:05 -0400
commita5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (patch)
tree05d4736fcb475aba8b7031f3f41d1954c1255a32 /net
parent4bd60443cc44c93ff37d483d69674647a0c48e4e (diff)
netfilter: move skb_gso_segment into nfnetlink_queue module
skb_gso_segment is expensive, so it would be nice if we could avoid it in the future. However, userspace needs to be prepared to receive larger-than-mtu-packets (which will also have incorrect l3/l4 checksums), so we cannot simply remove it. The plan is to add a per-queue feature flag that userspace can set when binding the queue. The problem is that in nf_queue, we only have a queue number, not the queue context/configuration settings. This patch should have no impact other than the skb_gso_segment call now being in a function that has access to the queue config data. A new size attribute in nf_queue_entry is needed so nfnetlink_queue can duplicate the entry of the gso skb when segmenting the skb while also copying the route key. The follow up patch adds switch to disable skb_gso_segment when queue config says so. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nf_queue.c96
-rw-r--r--net/netfilter/nfnetlink_queue_core.c154
2 files changed, 146 insertions, 104 deletions
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 1d91e77ba4c2..5d24b1fdb593 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void)
45} 45}
46EXPORT_SYMBOL(nf_unregister_queue_handler); 46EXPORT_SYMBOL(nf_unregister_queue_handler);
47 47
48static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) 48void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
49{ 49{
50 /* Release those devices we held, or Alexey will kill me. */ 50 /* Release those devices we held, or Alexey will kill me. */
51 if (entry->indev) 51 if (entry->indev)
@@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
65 /* Drop reference to owner of hook which queued us. */ 65 /* Drop reference to owner of hook which queued us. */
66 module_put(entry->elem->owner); 66 module_put(entry->elem->owner);
67} 67}
68EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
68 69
69/* Bump dev refs so they don't vanish while packet is out */ 70/* Bump dev refs so they don't vanish while packet is out */
70static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) 71bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
71{ 72{
72 if (!try_module_get(entry->elem->owner)) 73 if (!try_module_get(entry->elem->owner))
73 return false; 74 return false;
@@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
92 93
93 return true; 94 return true;
94} 95}
96EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
95 97
96/* 98/*
97 * Any packet that leaves via this function must come back 99 * Any packet that leaves via this function must come back
98 * through nf_reinject(). 100 * through nf_reinject().
99 */ 101 */
100static int __nf_queue(struct sk_buff *skb, 102int nf_queue(struct sk_buff *skb,
101 struct nf_hook_ops *elem, 103 struct nf_hook_ops *elem,
102 u_int8_t pf, unsigned int hook, 104 u_int8_t pf, unsigned int hook,
103 struct net_device *indev, 105 struct net_device *indev,
@@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb,
137 .indev = indev, 139 .indev = indev,
138 .outdev = outdev, 140 .outdev = outdev,
139 .okfn = okfn, 141 .okfn = okfn,
142 .size = sizeof(*entry) + afinfo->route_key_size,
140 }; 143 };
141 144
142 if (!nf_queue_entry_get_refs(entry)) { 145 if (!nf_queue_entry_get_refs(entry)) {
@@ -163,87 +166,6 @@ err:
163 return status; 166 return status;
164} 167}
165 168
166#ifdef CONFIG_BRIDGE_NETFILTER
167/* When called from bridge netfilter, skb->data must point to MAC header
168 * before calling skb_gso_segment(). Else, original MAC header is lost
169 * and segmented skbs will be sent to wrong destination.
170 */
171static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
172{
173 if (skb->nf_bridge)
174 __skb_push(skb, skb->network_header - skb->mac_header);
175}
176
177static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
178{
179 if (skb->nf_bridge)
180 __skb_pull(skb, skb->network_header - skb->mac_header);
181}
182#else
183#define nf_bridge_adjust_skb_data(s) do {} while (0)
184#define nf_bridge_adjust_segmented_data(s) do {} while (0)
185#endif
186
187int nf_queue(struct sk_buff *skb,
188 struct nf_hook_ops *elem,
189 u_int8_t pf, unsigned int hook,
190 struct net_device *indev,
191 struct net_device *outdev,
192 int (*okfn)(struct sk_buff *),
193 unsigned int queuenum)
194{
195 struct sk_buff *segs;
196 int err = -EINVAL;
197 unsigned int queued;
198
199 if (!skb_is_gso(skb))
200 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
201 queuenum);
202
203 switch (pf) {
204 case NFPROTO_IPV4:
205 skb->protocol = htons(ETH_P_IP);
206 break;
207 case NFPROTO_IPV6:
208 skb->protocol = htons(ETH_P_IPV6);
209 break;
210 }
211
212 nf_bridge_adjust_skb_data(skb);
213 segs = skb_gso_segment(skb, 0);
214 /* Does not use PTR_ERR to limit the number of error codes that can be
215 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
216 * 'ignore this hook'.
217 */
218 if (IS_ERR(segs))
219 goto out_err;
220 queued = 0;
221 err = 0;
222 do {
223 struct sk_buff *nskb = segs->next;
224
225 segs->next = NULL;
226 if (err == 0) {
227 nf_bridge_adjust_segmented_data(segs);
228 err = __nf_queue(segs, elem, pf, hook, indev,
229 outdev, okfn, queuenum);
230 }
231 if (err == 0)
232 queued++;
233 else
234 kfree_skb(segs);
235 segs = nskb;
236 } while (segs);
237
238 if (queued) {
239 kfree_skb(skb);
240 return 0;
241 }
242 out_err:
243 nf_bridge_adjust_segmented_data(skb);
244 return err;
245}
246
247void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 169void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
248{ 170{
249 struct sk_buff *skb = entry->skb; 171 struct sk_buff *skb = entry->skb;
@@ -283,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
283 local_bh_enable(); 205 local_bh_enable();
284 break; 206 break;
285 case NF_QUEUE: 207 case NF_QUEUE:
286 err = __nf_queue(skb, elem, entry->pf, entry->hook, 208 err = nf_queue(skb, elem, entry->pf, entry->hook,
287 entry->indev, entry->outdev, entry->okfn, 209 entry->indev, entry->outdev, entry->okfn,
288 verdict >> NF_VERDICT_QBITS); 210 verdict >> NF_VERDICT_QBITS);
289 if (err < 0) { 211 if (err < 0) {
290 if (err == -ECANCELED) 212 if (err == -ECANCELED)
291 goto next_hook; 213 goto next_hook;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index ef3cdb4bfeea..edbae4caf753 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -477,28 +477,13 @@ nla_put_failure:
477} 477}
478 478
479static int 479static int
480nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) 480__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
481 struct nf_queue_entry *entry)
481{ 482{
482 struct sk_buff *nskb; 483 struct sk_buff *nskb;
483 struct nfqnl_instance *queue;
484 int err = -ENOBUFS; 484 int err = -ENOBUFS;
485 __be32 *packet_id_ptr; 485 __be32 *packet_id_ptr;
486 int failopen = 0; 486 int failopen = 0;
487 struct net *net = dev_net(entry->indev ?
488 entry->indev : entry->outdev);
489 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
490
491 /* rcu_read_lock()ed by nf_hook_slow() */
492 queue = instance_lookup(q, queuenum);
493 if (!queue) {
494 err = -ESRCH;
495 goto err_out;
496 }
497
498 if (queue->copy_mode == NFQNL_COPY_NONE) {
499 err = -EINVAL;
500 goto err_out;
501 }
502 487
503 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); 488 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
504 if (nskb == NULL) { 489 if (nskb == NULL) {
@@ -547,6 +532,141 @@ err_out:
547 return err; 532 return err;
548} 533}
549 534
535static struct nf_queue_entry *
536nf_queue_entry_dup(struct nf_queue_entry *e)
537{
538 struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
539 if (entry) {
540 if (nf_queue_entry_get_refs(entry))
541 return entry;
542 kfree(entry);
543 }
544 return NULL;
545}
546
547#ifdef CONFIG_BRIDGE_NETFILTER
548/* When called from bridge netfilter, skb->data must point to MAC header
549 * before calling skb_gso_segment(). Else, original MAC header is lost
550 * and segmented skbs will be sent to wrong destination.
551 */
552static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
553{
554 if (skb->nf_bridge)
555 __skb_push(skb, skb->network_header - skb->mac_header);
556}
557
558static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
559{
560 if (skb->nf_bridge)
561 __skb_pull(skb, skb->network_header - skb->mac_header);
562}
563#else
564#define nf_bridge_adjust_skb_data(s) do {} while (0)
565#define nf_bridge_adjust_segmented_data(s) do {} while (0)
566#endif
567
568static void free_entry(struct nf_queue_entry *entry)
569{
570 nf_queue_entry_release_refs(entry);
571 kfree(entry);
572}
573
574static int
575__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
576 struct sk_buff *skb, struct nf_queue_entry *entry)
577{
578 int ret = -ENOMEM;
579 struct nf_queue_entry *entry_seg;
580
581 nf_bridge_adjust_segmented_data(skb);
582
583 if (skb->next == NULL) { /* last packet, no need to copy entry */
584 struct sk_buff *gso_skb = entry->skb;
585 entry->skb = skb;
586 ret = __nfqnl_enqueue_packet(net, queue, entry);
587 if (ret)
588 entry->skb = gso_skb;
589 return ret;
590 }
591
592 skb->next = NULL;
593
594 entry_seg = nf_queue_entry_dup(entry);
595 if (entry_seg) {
596 entry_seg->skb = skb;
597 ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
598 if (ret)
599 free_entry(entry_seg);
600 }
601 return ret;
602}
603
604static int
605nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
606{
607 unsigned int queued;
608 struct nfqnl_instance *queue;
609 struct sk_buff *skb, *segs;
610 int err = -ENOBUFS;
611 struct net *net = dev_net(entry->indev ?
612 entry->indev : entry->outdev);
613 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
614
615 /* rcu_read_lock()ed by nf_hook_slow() */
616 queue = instance_lookup(q, queuenum);
617 if (!queue)
618 return -ESRCH;
619
620 if (queue->copy_mode == NFQNL_COPY_NONE)
621 return -EINVAL;
622
623 if (!skb_is_gso(entry->skb))
624 return __nfqnl_enqueue_packet(net, queue, entry);
625
626 skb = entry->skb;
627
628 switch (entry->pf) {
629 case NFPROTO_IPV4:
630 skb->protocol = htons(ETH_P_IP);
631 break;
632 case NFPROTO_IPV6:
633 skb->protocol = htons(ETH_P_IPV6);
634 break;
635 }
636
637 nf_bridge_adjust_skb_data(skb);
638 segs = skb_gso_segment(skb, 0);
639 /* Does not use PTR_ERR to limit the number of error codes that can be
640 * returned by nf_queue. For instance, callers rely on -ECANCELED to
641 * mean 'ignore this hook'.
642 */
643 if (IS_ERR(segs))
644 goto out_err;
645 queued = 0;
646 err = 0;
647 do {
648 struct sk_buff *nskb = segs->next;
649 if (err == 0)
650 err = __nfqnl_enqueue_packet_gso(net, queue,
651 segs, entry);
652 if (err == 0)
653 queued++;
654 else
655 kfree_skb(segs);
656 segs = nskb;
657 } while (segs);
658
659 if (queued) {
660 if (err) /* some segments are already queued */
661 free_entry(entry);
662 kfree_skb(skb);
663 return 0;
664 }
665 out_err:
666 nf_bridge_adjust_segmented_data(skb);
667 return err;
668}
669
550static int 670static int
551nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) 671nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
552{ 672{