diff options
author | Florian Westphal <fw@strlen.de> | 2013-04-19 00:58:25 -0400 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2013-04-29 14:09:05 -0400 |
commit | a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (patch) | |
tree | 05d4736fcb475aba8b7031f3f41d1954c1255a32 /net | |
parent | 4bd60443cc44c93ff37d483d69674647a0c48e4e (diff) |
netfilter: move skb_gso_segment into nfnetlink_queue module
skb_gso_segment is expensive, so it would be nice if we could
avoid it in the future. However, userspace needs to be prepared
to receive larger-than-mtu-packets (which will also have incorrect
l3/l4 checksums), so we cannot simply remove it.
The plan is to add a per-queue feature flag that userspace can
set when binding the queue.
The problem is that in nf_queue, we only have a queue number,
not the queue context/configuration settings.
This patch should have no impact other than the skb_gso_segment
call now being in a function that has access to the queue config
data.
A new size attribute in nf_queue_entry is needed so
nfnetlink_queue can duplicate the entry of the gso skb
when segmenting the skb while also copying the route key.
The follow up patch adds switch to disable skb_gso_segment when
queue config says so.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/nf_queue.c | 96 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue_core.c | 154 |
2 files changed, 146 insertions, 104 deletions
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 1d91e77ba4c2..5d24b1fdb593 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void) | |||
45 | } | 45 | } |
46 | EXPORT_SYMBOL(nf_unregister_queue_handler); | 46 | EXPORT_SYMBOL(nf_unregister_queue_handler); |
47 | 47 | ||
48 | static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) | 48 | void nf_queue_entry_release_refs(struct nf_queue_entry *entry) |
49 | { | 49 | { |
50 | /* Release those devices we held, or Alexey will kill me. */ | 50 | /* Release those devices we held, or Alexey will kill me. */ |
51 | if (entry->indev) | 51 | if (entry->indev) |
@@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) | |||
65 | /* Drop reference to owner of hook which queued us. */ | 65 | /* Drop reference to owner of hook which queued us. */ |
66 | module_put(entry->elem->owner); | 66 | module_put(entry->elem->owner); |
67 | } | 67 | } |
68 | EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); | ||
68 | 69 | ||
69 | /* Bump dev refs so they don't vanish while packet is out */ | 70 | /* Bump dev refs so they don't vanish while packet is out */ |
70 | static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) | 71 | bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) |
71 | { | 72 | { |
72 | if (!try_module_get(entry->elem->owner)) | 73 | if (!try_module_get(entry->elem->owner)) |
73 | return false; | 74 | return false; |
@@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) | |||
92 | 93 | ||
93 | return true; | 94 | return true; |
94 | } | 95 | } |
96 | EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); | ||
95 | 97 | ||
96 | /* | 98 | /* |
97 | * Any packet that leaves via this function must come back | 99 | * Any packet that leaves via this function must come back |
98 | * through nf_reinject(). | 100 | * through nf_reinject(). |
99 | */ | 101 | */ |
100 | static int __nf_queue(struct sk_buff *skb, | 102 | int nf_queue(struct sk_buff *skb, |
101 | struct nf_hook_ops *elem, | 103 | struct nf_hook_ops *elem, |
102 | u_int8_t pf, unsigned int hook, | 104 | u_int8_t pf, unsigned int hook, |
103 | struct net_device *indev, | 105 | struct net_device *indev, |
@@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb, | |||
137 | .indev = indev, | 139 | .indev = indev, |
138 | .outdev = outdev, | 140 | .outdev = outdev, |
139 | .okfn = okfn, | 141 | .okfn = okfn, |
142 | .size = sizeof(*entry) + afinfo->route_key_size, | ||
140 | }; | 143 | }; |
141 | 144 | ||
142 | if (!nf_queue_entry_get_refs(entry)) { | 145 | if (!nf_queue_entry_get_refs(entry)) { |
@@ -163,87 +166,6 @@ err: | |||
163 | return status; | 166 | return status; |
164 | } | 167 | } |
165 | 168 | ||
166 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
167 | /* When called from bridge netfilter, skb->data must point to MAC header | ||
168 | * before calling skb_gso_segment(). Else, original MAC header is lost | ||
169 | * and segmented skbs will be sent to wrong destination. | ||
170 | */ | ||
171 | static void nf_bridge_adjust_skb_data(struct sk_buff *skb) | ||
172 | { | ||
173 | if (skb->nf_bridge) | ||
174 | __skb_push(skb, skb->network_header - skb->mac_header); | ||
175 | } | ||
176 | |||
177 | static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) | ||
178 | { | ||
179 | if (skb->nf_bridge) | ||
180 | __skb_pull(skb, skb->network_header - skb->mac_header); | ||
181 | } | ||
182 | #else | ||
183 | #define nf_bridge_adjust_skb_data(s) do {} while (0) | ||
184 | #define nf_bridge_adjust_segmented_data(s) do {} while (0) | ||
185 | #endif | ||
186 | |||
187 | int nf_queue(struct sk_buff *skb, | ||
188 | struct nf_hook_ops *elem, | ||
189 | u_int8_t pf, unsigned int hook, | ||
190 | struct net_device *indev, | ||
191 | struct net_device *outdev, | ||
192 | int (*okfn)(struct sk_buff *), | ||
193 | unsigned int queuenum) | ||
194 | { | ||
195 | struct sk_buff *segs; | ||
196 | int err = -EINVAL; | ||
197 | unsigned int queued; | ||
198 | |||
199 | if (!skb_is_gso(skb)) | ||
200 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | ||
201 | queuenum); | ||
202 | |||
203 | switch (pf) { | ||
204 | case NFPROTO_IPV4: | ||
205 | skb->protocol = htons(ETH_P_IP); | ||
206 | break; | ||
207 | case NFPROTO_IPV6: | ||
208 | skb->protocol = htons(ETH_P_IPV6); | ||
209 | break; | ||
210 | } | ||
211 | |||
212 | nf_bridge_adjust_skb_data(skb); | ||
213 | segs = skb_gso_segment(skb, 0); | ||
214 | /* Does not use PTR_ERR to limit the number of error codes that can be | ||
215 | * returned by nf_queue. For instance, callers rely on -ECANCELED to mean | ||
216 | * 'ignore this hook'. | ||
217 | */ | ||
218 | if (IS_ERR(segs)) | ||
219 | goto out_err; | ||
220 | queued = 0; | ||
221 | err = 0; | ||
222 | do { | ||
223 | struct sk_buff *nskb = segs->next; | ||
224 | |||
225 | segs->next = NULL; | ||
226 | if (err == 0) { | ||
227 | nf_bridge_adjust_segmented_data(segs); | ||
228 | err = __nf_queue(segs, elem, pf, hook, indev, | ||
229 | outdev, okfn, queuenum); | ||
230 | } | ||
231 | if (err == 0) | ||
232 | queued++; | ||
233 | else | ||
234 | kfree_skb(segs); | ||
235 | segs = nskb; | ||
236 | } while (segs); | ||
237 | |||
238 | if (queued) { | ||
239 | kfree_skb(skb); | ||
240 | return 0; | ||
241 | } | ||
242 | out_err: | ||
243 | nf_bridge_adjust_segmented_data(skb); | ||
244 | return err; | ||
245 | } | ||
246 | |||
247 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | 169 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) |
248 | { | 170 | { |
249 | struct sk_buff *skb = entry->skb; | 171 | struct sk_buff *skb = entry->skb; |
@@ -283,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | |||
283 | local_bh_enable(); | 205 | local_bh_enable(); |
284 | break; | 206 | break; |
285 | case NF_QUEUE: | 207 | case NF_QUEUE: |
286 | err = __nf_queue(skb, elem, entry->pf, entry->hook, | 208 | err = nf_queue(skb, elem, entry->pf, entry->hook, |
287 | entry->indev, entry->outdev, entry->okfn, | 209 | entry->indev, entry->outdev, entry->okfn, |
288 | verdict >> NF_VERDICT_QBITS); | 210 | verdict >> NF_VERDICT_QBITS); |
289 | if (err < 0) { | 211 | if (err < 0) { |
290 | if (err == -ECANCELED) | 212 | if (err == -ECANCELED) |
291 | goto next_hook; | 213 | goto next_hook; |
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ef3cdb4bfeea..edbae4caf753 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c | |||
@@ -477,28 +477,13 @@ nla_put_failure: | |||
477 | } | 477 | } |
478 | 478 | ||
479 | static int | 479 | static int |
480 | nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | 480 | __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, |
481 | struct nf_queue_entry *entry) | ||
481 | { | 482 | { |
482 | struct sk_buff *nskb; | 483 | struct sk_buff *nskb; |
483 | struct nfqnl_instance *queue; | ||
484 | int err = -ENOBUFS; | 484 | int err = -ENOBUFS; |
485 | __be32 *packet_id_ptr; | 485 | __be32 *packet_id_ptr; |
486 | int failopen = 0; | 486 | int failopen = 0; |
487 | struct net *net = dev_net(entry->indev ? | ||
488 | entry->indev : entry->outdev); | ||
489 | struct nfnl_queue_net *q = nfnl_queue_pernet(net); | ||
490 | |||
491 | /* rcu_read_lock()ed by nf_hook_slow() */ | ||
492 | queue = instance_lookup(q, queuenum); | ||
493 | if (!queue) { | ||
494 | err = -ESRCH; | ||
495 | goto err_out; | ||
496 | } | ||
497 | |||
498 | if (queue->copy_mode == NFQNL_COPY_NONE) { | ||
499 | err = -EINVAL; | ||
500 | goto err_out; | ||
501 | } | ||
502 | 487 | ||
503 | nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); | 488 | nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); |
504 | if (nskb == NULL) { | 489 | if (nskb == NULL) { |
@@ -547,6 +532,141 @@ err_out: | |||
547 | return err; | 532 | return err; |
548 | } | 533 | } |
549 | 534 | ||
535 | static struct nf_queue_entry * | ||
536 | nf_queue_entry_dup(struct nf_queue_entry *e) | ||
537 | { | ||
538 | struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); | ||
539 | if (entry) { | ||
540 | if (nf_queue_entry_get_refs(entry)) | ||
541 | return entry; | ||
542 | kfree(entry); | ||
543 | } | ||
544 | return NULL; | ||
545 | } | ||
546 | |||
547 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
548 | /* When called from bridge netfilter, skb->data must point to MAC header | ||
549 | * before calling skb_gso_segment(). Else, original MAC header is lost | ||
550 | * and segmented skbs will be sent to wrong destination. | ||
551 | */ | ||
552 | static void nf_bridge_adjust_skb_data(struct sk_buff *skb) | ||
553 | { | ||
554 | if (skb->nf_bridge) | ||
555 | __skb_push(skb, skb->network_header - skb->mac_header); | ||
556 | } | ||
557 | |||
558 | static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) | ||
559 | { | ||
560 | if (skb->nf_bridge) | ||
561 | __skb_pull(skb, skb->network_header - skb->mac_header); | ||
562 | } | ||
563 | #else | ||
564 | #define nf_bridge_adjust_skb_data(s) do {} while (0) | ||
565 | #define nf_bridge_adjust_segmented_data(s) do {} while (0) | ||
566 | #endif | ||
567 | |||
568 | static void free_entry(struct nf_queue_entry *entry) | ||
569 | { | ||
570 | nf_queue_entry_release_refs(entry); | ||
571 | kfree(entry); | ||
572 | } | ||
573 | |||
574 | static int | ||
575 | __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue, | ||
576 | struct sk_buff *skb, struct nf_queue_entry *entry) | ||
577 | { | ||
578 | int ret = -ENOMEM; | ||
579 | struct nf_queue_entry *entry_seg; | ||
580 | |||
581 | nf_bridge_adjust_segmented_data(skb); | ||
582 | |||
583 | if (skb->next == NULL) { /* last packet, no need to copy entry */ | ||
584 | struct sk_buff *gso_skb = entry->skb; | ||
585 | entry->skb = skb; | ||
586 | ret = __nfqnl_enqueue_packet(net, queue, entry); | ||
587 | if (ret) | ||
588 | entry->skb = gso_skb; | ||
589 | return ret; | ||
590 | } | ||
591 | |||
592 | skb->next = NULL; | ||
593 | |||
594 | entry_seg = nf_queue_entry_dup(entry); | ||
595 | if (entry_seg) { | ||
596 | entry_seg->skb = skb; | ||
597 | ret = __nfqnl_enqueue_packet(net, queue, entry_seg); | ||
598 | if (ret) | ||
599 | free_entry(entry_seg); | ||
600 | } | ||
601 | return ret; | ||
602 | } | ||
603 | |||
604 | static int | ||
605 | nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
606 | { | ||
607 | unsigned int queued; | ||
608 | struct nfqnl_instance *queue; | ||
609 | struct sk_buff *skb, *segs; | ||
610 | int err = -ENOBUFS; | ||
611 | struct net *net = dev_net(entry->indev ? | ||
612 | entry->indev : entry->outdev); | ||
613 | struct nfnl_queue_net *q = nfnl_queue_pernet(net); | ||
614 | |||
615 | /* rcu_read_lock()ed by nf_hook_slow() */ | ||
616 | queue = instance_lookup(q, queuenum); | ||
617 | if (!queue) | ||
618 | return -ESRCH; | ||
619 | |||
620 | if (queue->copy_mode == NFQNL_COPY_NONE) | ||
621 | return -EINVAL; | ||
622 | |||
623 | if (!skb_is_gso(entry->skb)) | ||
624 | return __nfqnl_enqueue_packet(net, queue, entry); | ||
625 | |||
626 | skb = entry->skb; | ||
627 | |||
628 | switch (entry->pf) { | ||
629 | case NFPROTO_IPV4: | ||
630 | skb->protocol = htons(ETH_P_IP); | ||
631 | break; | ||
632 | case NFPROTO_IPV6: | ||
633 | skb->protocol = htons(ETH_P_IPV6); | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | nf_bridge_adjust_skb_data(skb); | ||
638 | segs = skb_gso_segment(skb, 0); | ||
639 | /* Does not use PTR_ERR to limit the number of error codes that can be | ||
640 | * returned by nf_queue. For instance, callers rely on -ECANCELED to | ||
641 | * mean 'ignore this hook'. | ||
642 | */ | ||
643 | if (IS_ERR(segs)) | ||
644 | goto out_err; | ||
645 | queued = 0; | ||
646 | err = 0; | ||
647 | do { | ||
648 | struct sk_buff *nskb = segs->next; | ||
649 | if (err == 0) | ||
650 | err = __nfqnl_enqueue_packet_gso(net, queue, | ||
651 | segs, entry); | ||
652 | if (err == 0) | ||
653 | queued++; | ||
654 | else | ||
655 | kfree_skb(segs); | ||
656 | segs = nskb; | ||
657 | } while (segs); | ||
658 | |||
659 | if (queued) { | ||
660 | if (err) /* some segments are already queued */ | ||
661 | free_entry(entry); | ||
662 | kfree_skb(skb); | ||
663 | return 0; | ||
664 | } | ||
665 | out_err: | ||
666 | nf_bridge_adjust_segmented_data(skb); | ||
667 | return err; | ||
668 | } | ||
669 | |||
550 | static int | 670 | static int |
551 | nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) | 671 | nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) |
552 | { | 672 | { |