diff options
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r-- | net/packet/af_packet.c | 438 |
1 files changed, 374 insertions, 64 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c0c3cda19712..c698cec0a445 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -187,9 +187,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); | |||
187 | 187 | ||
188 | static void packet_flush_mclist(struct sock *sk); | 188 | static void packet_flush_mclist(struct sock *sk); |
189 | 189 | ||
190 | struct packet_fanout; | ||
190 | struct packet_sock { | 191 | struct packet_sock { |
191 | /* struct sock has to be the first member of packet_sock */ | 192 | /* struct sock has to be the first member of packet_sock */ |
192 | struct sock sk; | 193 | struct sock sk; |
194 | struct packet_fanout *fanout; | ||
193 | struct tpacket_stats stats; | 195 | struct tpacket_stats stats; |
194 | struct packet_ring_buffer rx_ring; | 196 | struct packet_ring_buffer rx_ring; |
195 | struct packet_ring_buffer tx_ring; | 197 | struct packet_ring_buffer tx_ring; |
@@ -212,6 +214,24 @@ struct packet_sock { | |||
212 | struct packet_type prot_hook ____cacheline_aligned_in_smp; | 214 | struct packet_type prot_hook ____cacheline_aligned_in_smp; |
213 | }; | 215 | }; |
214 | 216 | ||
217 | #define PACKET_FANOUT_MAX 256 | ||
218 | |||
219 | struct packet_fanout { | ||
220 | #ifdef CONFIG_NET_NS | ||
221 | struct net *net; | ||
222 | #endif | ||
223 | unsigned int num_members; | ||
224 | u16 id; | ||
225 | u8 type; | ||
226 | u8 defrag; | ||
227 | atomic_t rr_cur; | ||
228 | struct list_head list; | ||
229 | struct sock *arr[PACKET_FANOUT_MAX]; | ||
230 | spinlock_t lock; | ||
231 | atomic_t sk_ref; | ||
232 | struct packet_type prot_hook ____cacheline_aligned_in_smp; | ||
233 | }; | ||
234 | |||
215 | struct packet_skb_cb { | 235 | struct packet_skb_cb { |
216 | unsigned int origlen; | 236 | unsigned int origlen; |
217 | union { | 237 | union { |
@@ -222,6 +242,64 @@ struct packet_skb_cb { | |||
222 | 242 | ||
223 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) | 243 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) |
224 | 244 | ||
245 | static inline struct packet_sock *pkt_sk(struct sock *sk) | ||
246 | { | ||
247 | return (struct packet_sock *)sk; | ||
248 | } | ||
249 | |||
250 | static void __fanout_unlink(struct sock *sk, struct packet_sock *po); | ||
251 | static void __fanout_link(struct sock *sk, struct packet_sock *po); | ||
252 | |||
253 | /* register_prot_hook must be invoked with the po->bind_lock held, | ||
254 | * or from a context in which asynchronous accesses to the packet | ||
255 | * socket is not possible (packet_create()). | ||
256 | */ | ||
257 | static void register_prot_hook(struct sock *sk) | ||
258 | { | ||
259 | struct packet_sock *po = pkt_sk(sk); | ||
260 | if (!po->running) { | ||
261 | if (po->fanout) | ||
262 | __fanout_link(sk, po); | ||
263 | else | ||
264 | dev_add_pack(&po->prot_hook); | ||
265 | sock_hold(sk); | ||
266 | po->running = 1; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* {,__}unregister_prot_hook() must be invoked with the po->bind_lock | ||
271 | * held. If the sync parameter is true, we will temporarily drop | ||
272 | * the po->bind_lock and do a synchronize_net to make sure no | ||
273 | * asynchronous packet processing paths still refer to the elements | ||
274 | * of po->prot_hook. If the sync parameter is false, it is the | ||
275 | * callers responsibility to take care of this. | ||
276 | */ | ||
277 | static void __unregister_prot_hook(struct sock *sk, bool sync) | ||
278 | { | ||
279 | struct packet_sock *po = pkt_sk(sk); | ||
280 | |||
281 | po->running = 0; | ||
282 | if (po->fanout) | ||
283 | __fanout_unlink(sk, po); | ||
284 | else | ||
285 | __dev_remove_pack(&po->prot_hook); | ||
286 | __sock_put(sk); | ||
287 | |||
288 | if (sync) { | ||
289 | spin_unlock(&po->bind_lock); | ||
290 | synchronize_net(); | ||
291 | spin_lock(&po->bind_lock); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static void unregister_prot_hook(struct sock *sk, bool sync) | ||
296 | { | ||
297 | struct packet_sock *po = pkt_sk(sk); | ||
298 | |||
299 | if (po->running) | ||
300 | __unregister_prot_hook(sk, sync); | ||
301 | } | ||
302 | |||
225 | static inline __pure struct page *pgv_to_page(void *addr) | 303 | static inline __pure struct page *pgv_to_page(void *addr) |
226 | { | 304 | { |
227 | if (is_vmalloc_addr(addr)) | 305 | if (is_vmalloc_addr(addr)) |
@@ -324,11 +402,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff) | |||
324 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; | 402 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; |
325 | } | 403 | } |
326 | 404 | ||
327 | static inline struct packet_sock *pkt_sk(struct sock *sk) | ||
328 | { | ||
329 | return (struct packet_sock *)sk; | ||
330 | } | ||
331 | |||
332 | static void packet_sock_destruct(struct sock *sk) | 405 | static void packet_sock_destruct(struct sock *sk) |
333 | { | 406 | { |
334 | skb_queue_purge(&sk->sk_error_queue); | 407 | skb_queue_purge(&sk->sk_error_queue); |
@@ -344,6 +417,240 @@ static void packet_sock_destruct(struct sock *sk) | |||
344 | sk_refcnt_debug_dec(sk); | 417 | sk_refcnt_debug_dec(sk); |
345 | } | 418 | } |
346 | 419 | ||
420 | static int fanout_rr_next(struct packet_fanout *f, unsigned int num) | ||
421 | { | ||
422 | int x = atomic_read(&f->rr_cur) + 1; | ||
423 | |||
424 | if (x >= num) | ||
425 | x = 0; | ||
426 | |||
427 | return x; | ||
428 | } | ||
429 | |||
430 | static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
431 | { | ||
432 | u32 idx, hash = skb->rxhash; | ||
433 | |||
434 | idx = ((u64)hash * num) >> 32; | ||
435 | |||
436 | return f->arr[idx]; | ||
437 | } | ||
438 | |||
439 | static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
440 | { | ||
441 | int cur, old; | ||
442 | |||
443 | cur = atomic_read(&f->rr_cur); | ||
444 | while ((old = atomic_cmpxchg(&f->rr_cur, cur, | ||
445 | fanout_rr_next(f, num))) != cur) | ||
446 | cur = old; | ||
447 | return f->arr[cur]; | ||
448 | } | ||
449 | |||
450 | static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
451 | { | ||
452 | unsigned int cpu = smp_processor_id(); | ||
453 | |||
454 | return f->arr[cpu % num]; | ||
455 | } | ||
456 | |||
457 | static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) | ||
458 | { | ||
459 | #ifdef CONFIG_INET | ||
460 | const struct iphdr *iph; | ||
461 | u32 len; | ||
462 | |||
463 | if (skb->protocol != htons(ETH_P_IP)) | ||
464 | return skb; | ||
465 | |||
466 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | ||
467 | return skb; | ||
468 | |||
469 | iph = ip_hdr(skb); | ||
470 | if (iph->ihl < 5 || iph->version != 4) | ||
471 | return skb; | ||
472 | if (!pskb_may_pull(skb, iph->ihl*4)) | ||
473 | return skb; | ||
474 | iph = ip_hdr(skb); | ||
475 | len = ntohs(iph->tot_len); | ||
476 | if (skb->len < len || len < (iph->ihl * 4)) | ||
477 | return skb; | ||
478 | |||
479 | if (ip_is_fragment(ip_hdr(skb))) { | ||
480 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
481 | if (skb) { | ||
482 | if (pskb_trim_rcsum(skb, len)) | ||
483 | return skb; | ||
484 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | ||
485 | if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) | ||
486 | return NULL; | ||
487 | skb->rxhash = 0; | ||
488 | } | ||
489 | } | ||
490 | #endif | ||
491 | return skb; | ||
492 | } | ||
493 | |||
494 | static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, | ||
495 | struct packet_type *pt, struct net_device *orig_dev) | ||
496 | { | ||
497 | struct packet_fanout *f = pt->af_packet_priv; | ||
498 | unsigned int num = f->num_members; | ||
499 | struct packet_sock *po; | ||
500 | struct sock *sk; | ||
501 | |||
502 | if (!net_eq(dev_net(dev), read_pnet(&f->net)) || | ||
503 | !num) { | ||
504 | kfree_skb(skb); | ||
505 | return 0; | ||
506 | } | ||
507 | |||
508 | switch (f->type) { | ||
509 | case PACKET_FANOUT_HASH: | ||
510 | default: | ||
511 | if (f->defrag) { | ||
512 | skb = fanout_check_defrag(skb); | ||
513 | if (!skb) | ||
514 | return 0; | ||
515 | } | ||
516 | skb_get_rxhash(skb); | ||
517 | sk = fanout_demux_hash(f, skb, num); | ||
518 | break; | ||
519 | case PACKET_FANOUT_LB: | ||
520 | sk = fanout_demux_lb(f, skb, num); | ||
521 | break; | ||
522 | case PACKET_FANOUT_CPU: | ||
523 | sk = fanout_demux_cpu(f, skb, num); | ||
524 | break; | ||
525 | } | ||
526 | |||
527 | po = pkt_sk(sk); | ||
528 | |||
529 | return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); | ||
530 | } | ||
531 | |||
532 | static DEFINE_MUTEX(fanout_mutex); | ||
533 | static LIST_HEAD(fanout_list); | ||
534 | |||
535 | static void __fanout_link(struct sock *sk, struct packet_sock *po) | ||
536 | { | ||
537 | struct packet_fanout *f = po->fanout; | ||
538 | |||
539 | spin_lock(&f->lock); | ||
540 | f->arr[f->num_members] = sk; | ||
541 | smp_wmb(); | ||
542 | f->num_members++; | ||
543 | spin_unlock(&f->lock); | ||
544 | } | ||
545 | |||
546 | static void __fanout_unlink(struct sock *sk, struct packet_sock *po) | ||
547 | { | ||
548 | struct packet_fanout *f = po->fanout; | ||
549 | int i; | ||
550 | |||
551 | spin_lock(&f->lock); | ||
552 | for (i = 0; i < f->num_members; i++) { | ||
553 | if (f->arr[i] == sk) | ||
554 | break; | ||
555 | } | ||
556 | BUG_ON(i >= f->num_members); | ||
557 | f->arr[i] = f->arr[f->num_members - 1]; | ||
558 | f->num_members--; | ||
559 | spin_unlock(&f->lock); | ||
560 | } | ||
561 | |||
562 | static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | ||
563 | { | ||
564 | struct packet_sock *po = pkt_sk(sk); | ||
565 | struct packet_fanout *f, *match; | ||
566 | u8 type = type_flags & 0xff; | ||
567 | u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; | ||
568 | int err; | ||
569 | |||
570 | switch (type) { | ||
571 | case PACKET_FANOUT_HASH: | ||
572 | case PACKET_FANOUT_LB: | ||
573 | case PACKET_FANOUT_CPU: | ||
574 | break; | ||
575 | default: | ||
576 | return -EINVAL; | ||
577 | } | ||
578 | |||
579 | if (!po->running) | ||
580 | return -EINVAL; | ||
581 | |||
582 | if (po->fanout) | ||
583 | return -EALREADY; | ||
584 | |||
585 | mutex_lock(&fanout_mutex); | ||
586 | match = NULL; | ||
587 | list_for_each_entry(f, &fanout_list, list) { | ||
588 | if (f->id == id && | ||
589 | read_pnet(&f->net) == sock_net(sk)) { | ||
590 | match = f; | ||
591 | break; | ||
592 | } | ||
593 | } | ||
594 | err = -EINVAL; | ||
595 | if (match && match->defrag != defrag) | ||
596 | goto out; | ||
597 | if (!match) { | ||
598 | err = -ENOMEM; | ||
599 | match = kzalloc(sizeof(*match), GFP_KERNEL); | ||
600 | if (!match) | ||
601 | goto out; | ||
602 | write_pnet(&match->net, sock_net(sk)); | ||
603 | match->id = id; | ||
604 | match->type = type; | ||
605 | match->defrag = defrag; | ||
606 | atomic_set(&match->rr_cur, 0); | ||
607 | INIT_LIST_HEAD(&match->list); | ||
608 | spin_lock_init(&match->lock); | ||
609 | atomic_set(&match->sk_ref, 0); | ||
610 | match->prot_hook.type = po->prot_hook.type; | ||
611 | match->prot_hook.dev = po->prot_hook.dev; | ||
612 | match->prot_hook.func = packet_rcv_fanout; | ||
613 | match->prot_hook.af_packet_priv = match; | ||
614 | dev_add_pack(&match->prot_hook); | ||
615 | list_add(&match->list, &fanout_list); | ||
616 | } | ||
617 | err = -EINVAL; | ||
618 | if (match->type == type && | ||
619 | match->prot_hook.type == po->prot_hook.type && | ||
620 | match->prot_hook.dev == po->prot_hook.dev) { | ||
621 | err = -ENOSPC; | ||
622 | if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { | ||
623 | __dev_remove_pack(&po->prot_hook); | ||
624 | po->fanout = match; | ||
625 | atomic_inc(&match->sk_ref); | ||
626 | __fanout_link(sk, po); | ||
627 | err = 0; | ||
628 | } | ||
629 | } | ||
630 | out: | ||
631 | mutex_unlock(&fanout_mutex); | ||
632 | return err; | ||
633 | } | ||
634 | |||
635 | static void fanout_release(struct sock *sk) | ||
636 | { | ||
637 | struct packet_sock *po = pkt_sk(sk); | ||
638 | struct packet_fanout *f; | ||
639 | |||
640 | f = po->fanout; | ||
641 | if (!f) | ||
642 | return; | ||
643 | |||
644 | po->fanout = NULL; | ||
645 | |||
646 | mutex_lock(&fanout_mutex); | ||
647 | if (atomic_dec_and_test(&f->sk_ref)) { | ||
648 | list_del(&f->list); | ||
649 | dev_remove_pack(&f->prot_hook); | ||
650 | kfree(f); | ||
651 | } | ||
652 | mutex_unlock(&fanout_mutex); | ||
653 | } | ||
347 | 654 | ||
348 | static const struct proto_ops packet_ops; | 655 | static const struct proto_ops packet_ops; |
349 | 656 | ||
@@ -822,7 +1129,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
822 | else | 1129 | else |
823 | sll->sll_ifindex = dev->ifindex; | 1130 | sll->sll_ifindex = dev->ifindex; |
824 | 1131 | ||
825 | __packet_set_status(po, h.raw, status); | ||
826 | smp_mb(); | 1132 | smp_mb(); |
827 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | 1133 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 |
828 | { | 1134 | { |
@@ -831,8 +1137,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
831 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); | 1137 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); |
832 | for (start = h.raw; start < end; start += PAGE_SIZE) | 1138 | for (start = h.raw; start < end; start += PAGE_SIZE) |
833 | flush_dcache_page(pgv_to_page(start)); | 1139 | flush_dcache_page(pgv_to_page(start)); |
1140 | smp_wmb(); | ||
834 | } | 1141 | } |
835 | #endif | 1142 | #endif |
1143 | __packet_set_status(po, h.raw, status); | ||
836 | 1144 | ||
837 | sk->sk_data_ready(sk, 0); | 1145 | sk->sk_data_ready(sk, 0); |
838 | 1146 | ||
@@ -975,7 +1283,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
975 | struct sk_buff *skb; | 1283 | struct sk_buff *skb; |
976 | struct net_device *dev; | 1284 | struct net_device *dev; |
977 | __be16 proto; | 1285 | __be16 proto; |
978 | int ifindex, err, reserve = 0; | 1286 | bool need_rls_dev = false; |
1287 | int err, reserve = 0; | ||
979 | void *ph; | 1288 | void *ph; |
980 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; | 1289 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; |
981 | int tp_len, size_max; | 1290 | int tp_len, size_max; |
@@ -987,7 +1296,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
987 | 1296 | ||
988 | err = -EBUSY; | 1297 | err = -EBUSY; |
989 | if (saddr == NULL) { | 1298 | if (saddr == NULL) { |
990 | ifindex = po->ifindex; | 1299 | dev = po->prot_hook.dev; |
991 | proto = po->num; | 1300 | proto = po->num; |
992 | addr = NULL; | 1301 | addr = NULL; |
993 | } else { | 1302 | } else { |
@@ -998,12 +1307,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
998 | + offsetof(struct sockaddr_ll, | 1307 | + offsetof(struct sockaddr_ll, |
999 | sll_addr))) | 1308 | sll_addr))) |
1000 | goto out; | 1309 | goto out; |
1001 | ifindex = saddr->sll_ifindex; | ||
1002 | proto = saddr->sll_protocol; | 1310 | proto = saddr->sll_protocol; |
1003 | addr = saddr->sll_addr; | 1311 | addr = saddr->sll_addr; |
1312 | dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); | ||
1313 | need_rls_dev = true; | ||
1004 | } | 1314 | } |
1005 | 1315 | ||
1006 | dev = dev_get_by_index(sock_net(&po->sk), ifindex); | ||
1007 | err = -ENXIO; | 1316 | err = -ENXIO; |
1008 | if (unlikely(dev == NULL)) | 1317 | if (unlikely(dev == NULL)) |
1009 | goto out; | 1318 | goto out; |
@@ -1089,7 +1398,8 @@ out_status: | |||
1089 | __packet_set_status(po, ph, status); | 1398 | __packet_set_status(po, ph, status); |
1090 | kfree_skb(skb); | 1399 | kfree_skb(skb); |
1091 | out_put: | 1400 | out_put: |
1092 | dev_put(dev); | 1401 | if (need_rls_dev) |
1402 | dev_put(dev); | ||
1093 | out: | 1403 | out: |
1094 | mutex_unlock(&po->pg_vec_lock); | 1404 | mutex_unlock(&po->pg_vec_lock); |
1095 | return err; | 1405 | return err; |
@@ -1127,8 +1437,9 @@ static int packet_snd(struct socket *sock, | |||
1127 | struct sk_buff *skb; | 1437 | struct sk_buff *skb; |
1128 | struct net_device *dev; | 1438 | struct net_device *dev; |
1129 | __be16 proto; | 1439 | __be16 proto; |
1440 | bool need_rls_dev = false; | ||
1130 | unsigned char *addr; | 1441 | unsigned char *addr; |
1131 | int ifindex, err, reserve = 0; | 1442 | int err, reserve = 0; |
1132 | struct virtio_net_hdr vnet_hdr = { 0 }; | 1443 | struct virtio_net_hdr vnet_hdr = { 0 }; |
1133 | int offset = 0; | 1444 | int offset = 0; |
1134 | int vnet_hdr_len; | 1445 | int vnet_hdr_len; |
@@ -1140,7 +1451,7 @@ static int packet_snd(struct socket *sock, | |||
1140 | */ | 1451 | */ |
1141 | 1452 | ||
1142 | if (saddr == NULL) { | 1453 | if (saddr == NULL) { |
1143 | ifindex = po->ifindex; | 1454 | dev = po->prot_hook.dev; |
1144 | proto = po->num; | 1455 | proto = po->num; |
1145 | addr = NULL; | 1456 | addr = NULL; |
1146 | } else { | 1457 | } else { |
@@ -1149,13 +1460,12 @@ static int packet_snd(struct socket *sock, | |||
1149 | goto out; | 1460 | goto out; |
1150 | if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) | 1461 | if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) |
1151 | goto out; | 1462 | goto out; |
1152 | ifindex = saddr->sll_ifindex; | ||
1153 | proto = saddr->sll_protocol; | 1463 | proto = saddr->sll_protocol; |
1154 | addr = saddr->sll_addr; | 1464 | addr = saddr->sll_addr; |
1465 | dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); | ||
1466 | need_rls_dev = true; | ||
1155 | } | 1467 | } |
1156 | 1468 | ||
1157 | |||
1158 | dev = dev_get_by_index(sock_net(sk), ifindex); | ||
1159 | err = -ENXIO; | 1469 | err = -ENXIO; |
1160 | if (dev == NULL) | 1470 | if (dev == NULL) |
1161 | goto out_unlock; | 1471 | goto out_unlock; |
@@ -1286,14 +1596,15 @@ static int packet_snd(struct socket *sock, | |||
1286 | if (err > 0 && (err = net_xmit_errno(err)) != 0) | 1596 | if (err > 0 && (err = net_xmit_errno(err)) != 0) |
1287 | goto out_unlock; | 1597 | goto out_unlock; |
1288 | 1598 | ||
1289 | dev_put(dev); | 1599 | if (need_rls_dev) |
1600 | dev_put(dev); | ||
1290 | 1601 | ||
1291 | return len; | 1602 | return len; |
1292 | 1603 | ||
1293 | out_free: | 1604 | out_free: |
1294 | kfree_skb(skb); | 1605 | kfree_skb(skb); |
1295 | out_unlock: | 1606 | out_unlock: |
1296 | if (dev) | 1607 | if (dev && need_rls_dev) |
1297 | dev_put(dev); | 1608 | dev_put(dev); |
1298 | out: | 1609 | out: |
1299 | return err; | 1610 | return err; |
@@ -1334,14 +1645,10 @@ static int packet_release(struct socket *sock) | |||
1334 | spin_unlock_bh(&net->packet.sklist_lock); | 1645 | spin_unlock_bh(&net->packet.sklist_lock); |
1335 | 1646 | ||
1336 | spin_lock(&po->bind_lock); | 1647 | spin_lock(&po->bind_lock); |
1337 | if (po->running) { | 1648 | unregister_prot_hook(sk, false); |
1338 | /* | 1649 | if (po->prot_hook.dev) { |
1339 | * Remove from protocol table | 1650 | dev_put(po->prot_hook.dev); |
1340 | */ | 1651 | po->prot_hook.dev = NULL; |
1341 | po->running = 0; | ||
1342 | po->num = 0; | ||
1343 | __dev_remove_pack(&po->prot_hook); | ||
1344 | __sock_put(sk); | ||
1345 | } | 1652 | } |
1346 | spin_unlock(&po->bind_lock); | 1653 | spin_unlock(&po->bind_lock); |
1347 | 1654 | ||
@@ -1355,6 +1662,8 @@ static int packet_release(struct socket *sock) | |||
1355 | if (po->tx_ring.pg_vec) | 1662 | if (po->tx_ring.pg_vec) |
1356 | packet_set_ring(sk, &req, 1, 1); | 1663 | packet_set_ring(sk, &req, 1, 1); |
1357 | 1664 | ||
1665 | fanout_release(sk); | ||
1666 | |||
1358 | synchronize_net(); | 1667 | synchronize_net(); |
1359 | /* | 1668 | /* |
1360 | * Now the socket is dead. No more input will appear. | 1669 | * Now the socket is dead. No more input will appear. |
@@ -1378,24 +1687,18 @@ static int packet_release(struct socket *sock) | |||
1378 | static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) | 1687 | static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) |
1379 | { | 1688 | { |
1380 | struct packet_sock *po = pkt_sk(sk); | 1689 | struct packet_sock *po = pkt_sk(sk); |
1381 | /* | 1690 | |
1382 | * Detach an existing hook if present. | 1691 | if (po->fanout) |
1383 | */ | 1692 | return -EINVAL; |
1384 | 1693 | ||
1385 | lock_sock(sk); | 1694 | lock_sock(sk); |
1386 | 1695 | ||
1387 | spin_lock(&po->bind_lock); | 1696 | spin_lock(&po->bind_lock); |
1388 | if (po->running) { | 1697 | unregister_prot_hook(sk, true); |
1389 | __sock_put(sk); | ||
1390 | po->running = 0; | ||
1391 | po->num = 0; | ||
1392 | spin_unlock(&po->bind_lock); | ||
1393 | dev_remove_pack(&po->prot_hook); | ||
1394 | spin_lock(&po->bind_lock); | ||
1395 | } | ||
1396 | |||
1397 | po->num = protocol; | 1698 | po->num = protocol; |
1398 | po->prot_hook.type = protocol; | 1699 | po->prot_hook.type = protocol; |
1700 | if (po->prot_hook.dev) | ||
1701 | dev_put(po->prot_hook.dev); | ||
1399 | po->prot_hook.dev = dev; | 1702 | po->prot_hook.dev = dev; |
1400 | 1703 | ||
1401 | po->ifindex = dev ? dev->ifindex : 0; | 1704 | po->ifindex = dev ? dev->ifindex : 0; |
@@ -1404,9 +1707,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc | |||
1404 | goto out_unlock; | 1707 | goto out_unlock; |
1405 | 1708 | ||
1406 | if (!dev || (dev->flags & IFF_UP)) { | 1709 | if (!dev || (dev->flags & IFF_UP)) { |
1407 | dev_add_pack(&po->prot_hook); | 1710 | register_prot_hook(sk); |
1408 | sock_hold(sk); | ||
1409 | po->running = 1; | ||
1410 | } else { | 1711 | } else { |
1411 | sk->sk_err = ENETDOWN; | 1712 | sk->sk_err = ENETDOWN; |
1412 | if (!sock_flag(sk, SOCK_DEAD)) | 1713 | if (!sock_flag(sk, SOCK_DEAD)) |
@@ -1440,10 +1741,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
1440 | strlcpy(name, uaddr->sa_data, sizeof(name)); | 1741 | strlcpy(name, uaddr->sa_data, sizeof(name)); |
1441 | 1742 | ||
1442 | dev = dev_get_by_name(sock_net(sk), name); | 1743 | dev = dev_get_by_name(sock_net(sk), name); |
1443 | if (dev) { | 1744 | if (dev) |
1444 | err = packet_do_bind(sk, dev, pkt_sk(sk)->num); | 1745 | err = packet_do_bind(sk, dev, pkt_sk(sk)->num); |
1445 | dev_put(dev); | ||
1446 | } | ||
1447 | return err; | 1746 | return err; |
1448 | } | 1747 | } |
1449 | 1748 | ||
@@ -1471,8 +1770,6 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len | |||
1471 | goto out; | 1770 | goto out; |
1472 | } | 1771 | } |
1473 | err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); | 1772 | err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); |
1474 | if (dev) | ||
1475 | dev_put(dev); | ||
1476 | 1773 | ||
1477 | out: | 1774 | out: |
1478 | return err; | 1775 | return err; |
@@ -1537,9 +1834,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |||
1537 | 1834 | ||
1538 | if (proto) { | 1835 | if (proto) { |
1539 | po->prot_hook.type = proto; | 1836 | po->prot_hook.type = proto; |
1540 | dev_add_pack(&po->prot_hook); | 1837 | register_prot_hook(sk); |
1541 | sock_hold(sk); | ||
1542 | po->running = 1; | ||
1543 | } | 1838 | } |
1544 | 1839 | ||
1545 | spin_lock_bh(&net->packet.sklist_lock); | 1840 | spin_lock_bh(&net->packet.sklist_lock); |
@@ -1681,6 +1976,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1681 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | 1976 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; |
1682 | vnet_hdr.csum_start = skb_checksum_start_offset(skb); | 1977 | vnet_hdr.csum_start = skb_checksum_start_offset(skb); |
1683 | vnet_hdr.csum_offset = skb->csum_offset; | 1978 | vnet_hdr.csum_offset = skb->csum_offset; |
1979 | } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { | ||
1980 | vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; | ||
1684 | } /* else everything is zero */ | 1981 | } /* else everything is zero */ |
1685 | 1982 | ||
1686 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, | 1983 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, |
@@ -2102,6 +2399,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
2102 | po->tp_tstamp = val; | 2399 | po->tp_tstamp = val; |
2103 | return 0; | 2400 | return 0; |
2104 | } | 2401 | } |
2402 | case PACKET_FANOUT: | ||
2403 | { | ||
2404 | int val; | ||
2405 | |||
2406 | if (optlen != sizeof(val)) | ||
2407 | return -EINVAL; | ||
2408 | if (copy_from_user(&val, optval, sizeof(val))) | ||
2409 | return -EFAULT; | ||
2410 | |||
2411 | return fanout_add(sk, val & 0xffff, val >> 16); | ||
2412 | } | ||
2105 | default: | 2413 | default: |
2106 | return -ENOPROTOOPT; | 2414 | return -ENOPROTOOPT; |
2107 | } | 2415 | } |
@@ -2200,6 +2508,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
2200 | val = po->tp_tstamp; | 2508 | val = po->tp_tstamp; |
2201 | data = &val; | 2509 | data = &val; |
2202 | break; | 2510 | break; |
2511 | case PACKET_FANOUT: | ||
2512 | if (len > sizeof(int)) | ||
2513 | len = sizeof(int); | ||
2514 | val = (po->fanout ? | ||
2515 | ((u32)po->fanout->id | | ||
2516 | ((u32)po->fanout->type << 16)) : | ||
2517 | 0); | ||
2518 | data = &val; | ||
2519 | break; | ||
2203 | default: | 2520 | default: |
2204 | return -ENOPROTOOPT; | 2521 | return -ENOPROTOOPT; |
2205 | } | 2522 | } |
@@ -2233,15 +2550,15 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2233 | if (dev->ifindex == po->ifindex) { | 2550 | if (dev->ifindex == po->ifindex) { |
2234 | spin_lock(&po->bind_lock); | 2551 | spin_lock(&po->bind_lock); |
2235 | if (po->running) { | 2552 | if (po->running) { |
2236 | __dev_remove_pack(&po->prot_hook); | 2553 | __unregister_prot_hook(sk, false); |
2237 | __sock_put(sk); | ||
2238 | po->running = 0; | ||
2239 | sk->sk_err = ENETDOWN; | 2554 | sk->sk_err = ENETDOWN; |
2240 | if (!sock_flag(sk, SOCK_DEAD)) | 2555 | if (!sock_flag(sk, SOCK_DEAD)) |
2241 | sk->sk_error_report(sk); | 2556 | sk->sk_error_report(sk); |
2242 | } | 2557 | } |
2243 | if (msg == NETDEV_UNREGISTER) { | 2558 | if (msg == NETDEV_UNREGISTER) { |
2244 | po->ifindex = -1; | 2559 | po->ifindex = -1; |
2560 | if (po->prot_hook.dev) | ||
2561 | dev_put(po->prot_hook.dev); | ||
2245 | po->prot_hook.dev = NULL; | 2562 | po->prot_hook.dev = NULL; |
2246 | } | 2563 | } |
2247 | spin_unlock(&po->bind_lock); | 2564 | spin_unlock(&po->bind_lock); |
@@ -2250,11 +2567,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2250 | case NETDEV_UP: | 2567 | case NETDEV_UP: |
2251 | if (dev->ifindex == po->ifindex) { | 2568 | if (dev->ifindex == po->ifindex) { |
2252 | spin_lock(&po->bind_lock); | 2569 | spin_lock(&po->bind_lock); |
2253 | if (po->num && !po->running) { | 2570 | if (po->num) |
2254 | dev_add_pack(&po->prot_hook); | 2571 | register_prot_hook(sk); |
2255 | sock_hold(sk); | ||
2256 | po->running = 1; | ||
2257 | } | ||
2258 | spin_unlock(&po->bind_lock); | 2572 | spin_unlock(&po->bind_lock); |
2259 | } | 2573 | } |
2260 | break; | 2574 | break; |
@@ -2521,10 +2835,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
2521 | was_running = po->running; | 2835 | was_running = po->running; |
2522 | num = po->num; | 2836 | num = po->num; |
2523 | if (was_running) { | 2837 | if (was_running) { |
2524 | __dev_remove_pack(&po->prot_hook); | ||
2525 | po->num = 0; | 2838 | po->num = 0; |
2526 | po->running = 0; | 2839 | __unregister_prot_hook(sk, false); |
2527 | __sock_put(sk); | ||
2528 | } | 2840 | } |
2529 | spin_unlock(&po->bind_lock); | 2841 | spin_unlock(&po->bind_lock); |
2530 | 2842 | ||
@@ -2555,11 +2867,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
2555 | mutex_unlock(&po->pg_vec_lock); | 2867 | mutex_unlock(&po->pg_vec_lock); |
2556 | 2868 | ||
2557 | spin_lock(&po->bind_lock); | 2869 | spin_lock(&po->bind_lock); |
2558 | if (was_running && !po->running) { | 2870 | if (was_running) { |
2559 | sock_hold(sk); | ||
2560 | po->running = 1; | ||
2561 | po->num = num; | 2871 | po->num = num; |
2562 | dev_add_pack(&po->prot_hook); | 2872 | register_prot_hook(sk); |
2563 | } | 2873 | } |
2564 | spin_unlock(&po->bind_lock); | 2874 | spin_unlock(&po->bind_lock); |
2565 | 2875 | ||