diff options
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r-- | net/packet/af_packet.c | 443 |
1 files changed, 378 insertions, 65 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c0c3cda1971..fabb4fafa28 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -187,9 +187,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); | |||
187 | 187 | ||
188 | static void packet_flush_mclist(struct sock *sk); | 188 | static void packet_flush_mclist(struct sock *sk); |
189 | 189 | ||
190 | struct packet_fanout; | ||
190 | struct packet_sock { | 191 | struct packet_sock { |
191 | /* struct sock has to be the first member of packet_sock */ | 192 | /* struct sock has to be the first member of packet_sock */ |
192 | struct sock sk; | 193 | struct sock sk; |
194 | struct packet_fanout *fanout; | ||
193 | struct tpacket_stats stats; | 195 | struct tpacket_stats stats; |
194 | struct packet_ring_buffer rx_ring; | 196 | struct packet_ring_buffer rx_ring; |
195 | struct packet_ring_buffer tx_ring; | 197 | struct packet_ring_buffer tx_ring; |
@@ -212,6 +214,24 @@ struct packet_sock { | |||
212 | struct packet_type prot_hook ____cacheline_aligned_in_smp; | 214 | struct packet_type prot_hook ____cacheline_aligned_in_smp; |
213 | }; | 215 | }; |
214 | 216 | ||
217 | #define PACKET_FANOUT_MAX 256 | ||
218 | |||
219 | struct packet_fanout { | ||
220 | #ifdef CONFIG_NET_NS | ||
221 | struct net *net; | ||
222 | #endif | ||
223 | unsigned int num_members; | ||
224 | u16 id; | ||
225 | u8 type; | ||
226 | u8 defrag; | ||
227 | atomic_t rr_cur; | ||
228 | struct list_head list; | ||
229 | struct sock *arr[PACKET_FANOUT_MAX]; | ||
230 | spinlock_t lock; | ||
231 | atomic_t sk_ref; | ||
232 | struct packet_type prot_hook ____cacheline_aligned_in_smp; | ||
233 | }; | ||
234 | |||
215 | struct packet_skb_cb { | 235 | struct packet_skb_cb { |
216 | unsigned int origlen; | 236 | unsigned int origlen; |
217 | union { | 237 | union { |
@@ -222,6 +242,64 @@ struct packet_skb_cb { | |||
222 | 242 | ||
223 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) | 243 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) |
224 | 244 | ||
245 | static inline struct packet_sock *pkt_sk(struct sock *sk) | ||
246 | { | ||
247 | return (struct packet_sock *)sk; | ||
248 | } | ||
249 | |||
250 | static void __fanout_unlink(struct sock *sk, struct packet_sock *po); | ||
251 | static void __fanout_link(struct sock *sk, struct packet_sock *po); | ||
252 | |||
253 | /* register_prot_hook must be invoked with the po->bind_lock held, | ||
254 | * or from a context in which asynchronous accesses to the packet | ||
255 | * socket is not possible (packet_create()). | ||
256 | */ | ||
257 | static void register_prot_hook(struct sock *sk) | ||
258 | { | ||
259 | struct packet_sock *po = pkt_sk(sk); | ||
260 | if (!po->running) { | ||
261 | if (po->fanout) | ||
262 | __fanout_link(sk, po); | ||
263 | else | ||
264 | dev_add_pack(&po->prot_hook); | ||
265 | sock_hold(sk); | ||
266 | po->running = 1; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* {,__}unregister_prot_hook() must be invoked with the po->bind_lock | ||
271 | * held. If the sync parameter is true, we will temporarily drop | ||
272 | * the po->bind_lock and do a synchronize_net to make sure no | ||
273 | * asynchronous packet processing paths still refer to the elements | ||
274 | * of po->prot_hook. If the sync parameter is false, it is the | ||
275 | * callers responsibility to take care of this. | ||
276 | */ | ||
277 | static void __unregister_prot_hook(struct sock *sk, bool sync) | ||
278 | { | ||
279 | struct packet_sock *po = pkt_sk(sk); | ||
280 | |||
281 | po->running = 0; | ||
282 | if (po->fanout) | ||
283 | __fanout_unlink(sk, po); | ||
284 | else | ||
285 | __dev_remove_pack(&po->prot_hook); | ||
286 | __sock_put(sk); | ||
287 | |||
288 | if (sync) { | ||
289 | spin_unlock(&po->bind_lock); | ||
290 | synchronize_net(); | ||
291 | spin_lock(&po->bind_lock); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static void unregister_prot_hook(struct sock *sk, bool sync) | ||
296 | { | ||
297 | struct packet_sock *po = pkt_sk(sk); | ||
298 | |||
299 | if (po->running) | ||
300 | __unregister_prot_hook(sk, sync); | ||
301 | } | ||
302 | |||
225 | static inline __pure struct page *pgv_to_page(void *addr) | 303 | static inline __pure struct page *pgv_to_page(void *addr) |
226 | { | 304 | { |
227 | if (is_vmalloc_addr(addr)) | 305 | if (is_vmalloc_addr(addr)) |
@@ -324,11 +402,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff) | |||
324 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; | 402 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; |
325 | } | 403 | } |
326 | 404 | ||
327 | static inline struct packet_sock *pkt_sk(struct sock *sk) | ||
328 | { | ||
329 | return (struct packet_sock *)sk; | ||
330 | } | ||
331 | |||
332 | static void packet_sock_destruct(struct sock *sk) | 405 | static void packet_sock_destruct(struct sock *sk) |
333 | { | 406 | { |
334 | skb_queue_purge(&sk->sk_error_queue); | 407 | skb_queue_purge(&sk->sk_error_queue); |
@@ -344,6 +417,240 @@ static void packet_sock_destruct(struct sock *sk) | |||
344 | sk_refcnt_debug_dec(sk); | 417 | sk_refcnt_debug_dec(sk); |
345 | } | 418 | } |
346 | 419 | ||
420 | static int fanout_rr_next(struct packet_fanout *f, unsigned int num) | ||
421 | { | ||
422 | int x = atomic_read(&f->rr_cur) + 1; | ||
423 | |||
424 | if (x >= num) | ||
425 | x = 0; | ||
426 | |||
427 | return x; | ||
428 | } | ||
429 | |||
430 | static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
431 | { | ||
432 | u32 idx, hash = skb->rxhash; | ||
433 | |||
434 | idx = ((u64)hash * num) >> 32; | ||
435 | |||
436 | return f->arr[idx]; | ||
437 | } | ||
438 | |||
439 | static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
440 | { | ||
441 | int cur, old; | ||
442 | |||
443 | cur = atomic_read(&f->rr_cur); | ||
444 | while ((old = atomic_cmpxchg(&f->rr_cur, cur, | ||
445 | fanout_rr_next(f, num))) != cur) | ||
446 | cur = old; | ||
447 | return f->arr[cur]; | ||
448 | } | ||
449 | |||
450 | static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) | ||
451 | { | ||
452 | unsigned int cpu = smp_processor_id(); | ||
453 | |||
454 | return f->arr[cpu % num]; | ||
455 | } | ||
456 | |||
457 | static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) | ||
458 | { | ||
459 | #ifdef CONFIG_INET | ||
460 | const struct iphdr *iph; | ||
461 | u32 len; | ||
462 | |||
463 | if (skb->protocol != htons(ETH_P_IP)) | ||
464 | return skb; | ||
465 | |||
466 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | ||
467 | return skb; | ||
468 | |||
469 | iph = ip_hdr(skb); | ||
470 | if (iph->ihl < 5 || iph->version != 4) | ||
471 | return skb; | ||
472 | if (!pskb_may_pull(skb, iph->ihl*4)) | ||
473 | return skb; | ||
474 | iph = ip_hdr(skb); | ||
475 | len = ntohs(iph->tot_len); | ||
476 | if (skb->len < len || len < (iph->ihl * 4)) | ||
477 | return skb; | ||
478 | |||
479 | if (ip_is_fragment(ip_hdr(skb))) { | ||
480 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
481 | if (skb) { | ||
482 | if (pskb_trim_rcsum(skb, len)) | ||
483 | return skb; | ||
484 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | ||
485 | if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) | ||
486 | return NULL; | ||
487 | skb->rxhash = 0; | ||
488 | } | ||
489 | } | ||
490 | #endif | ||
491 | return skb; | ||
492 | } | ||
493 | |||
494 | static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, | ||
495 | struct packet_type *pt, struct net_device *orig_dev) | ||
496 | { | ||
497 | struct packet_fanout *f = pt->af_packet_priv; | ||
498 | unsigned int num = f->num_members; | ||
499 | struct packet_sock *po; | ||
500 | struct sock *sk; | ||
501 | |||
502 | if (!net_eq(dev_net(dev), read_pnet(&f->net)) || | ||
503 | !num) { | ||
504 | kfree_skb(skb); | ||
505 | return 0; | ||
506 | } | ||
507 | |||
508 | switch (f->type) { | ||
509 | case PACKET_FANOUT_HASH: | ||
510 | default: | ||
511 | if (f->defrag) { | ||
512 | skb = fanout_check_defrag(skb); | ||
513 | if (!skb) | ||
514 | return 0; | ||
515 | } | ||
516 | skb_get_rxhash(skb); | ||
517 | sk = fanout_demux_hash(f, skb, num); | ||
518 | break; | ||
519 | case PACKET_FANOUT_LB: | ||
520 | sk = fanout_demux_lb(f, skb, num); | ||
521 | break; | ||
522 | case PACKET_FANOUT_CPU: | ||
523 | sk = fanout_demux_cpu(f, skb, num); | ||
524 | break; | ||
525 | } | ||
526 | |||
527 | po = pkt_sk(sk); | ||
528 | |||
529 | return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); | ||
530 | } | ||
531 | |||
532 | static DEFINE_MUTEX(fanout_mutex); | ||
533 | static LIST_HEAD(fanout_list); | ||
534 | |||
535 | static void __fanout_link(struct sock *sk, struct packet_sock *po) | ||
536 | { | ||
537 | struct packet_fanout *f = po->fanout; | ||
538 | |||
539 | spin_lock(&f->lock); | ||
540 | f->arr[f->num_members] = sk; | ||
541 | smp_wmb(); | ||
542 | f->num_members++; | ||
543 | spin_unlock(&f->lock); | ||
544 | } | ||
545 | |||
546 | static void __fanout_unlink(struct sock *sk, struct packet_sock *po) | ||
547 | { | ||
548 | struct packet_fanout *f = po->fanout; | ||
549 | int i; | ||
550 | |||
551 | spin_lock(&f->lock); | ||
552 | for (i = 0; i < f->num_members; i++) { | ||
553 | if (f->arr[i] == sk) | ||
554 | break; | ||
555 | } | ||
556 | BUG_ON(i >= f->num_members); | ||
557 | f->arr[i] = f->arr[f->num_members - 1]; | ||
558 | f->num_members--; | ||
559 | spin_unlock(&f->lock); | ||
560 | } | ||
561 | |||
562 | static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | ||
563 | { | ||
564 | struct packet_sock *po = pkt_sk(sk); | ||
565 | struct packet_fanout *f, *match; | ||
566 | u8 type = type_flags & 0xff; | ||
567 | u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; | ||
568 | int err; | ||
569 | |||
570 | switch (type) { | ||
571 | case PACKET_FANOUT_HASH: | ||
572 | case PACKET_FANOUT_LB: | ||
573 | case PACKET_FANOUT_CPU: | ||
574 | break; | ||
575 | default: | ||
576 | return -EINVAL; | ||
577 | } | ||
578 | |||
579 | if (!po->running) | ||
580 | return -EINVAL; | ||
581 | |||
582 | if (po->fanout) | ||
583 | return -EALREADY; | ||
584 | |||
585 | mutex_lock(&fanout_mutex); | ||
586 | match = NULL; | ||
587 | list_for_each_entry(f, &fanout_list, list) { | ||
588 | if (f->id == id && | ||
589 | read_pnet(&f->net) == sock_net(sk)) { | ||
590 | match = f; | ||
591 | break; | ||
592 | } | ||
593 | } | ||
594 | err = -EINVAL; | ||
595 | if (match && match->defrag != defrag) | ||
596 | goto out; | ||
597 | if (!match) { | ||
598 | err = -ENOMEM; | ||
599 | match = kzalloc(sizeof(*match), GFP_KERNEL); | ||
600 | if (!match) | ||
601 | goto out; | ||
602 | write_pnet(&match->net, sock_net(sk)); | ||
603 | match->id = id; | ||
604 | match->type = type; | ||
605 | match->defrag = defrag; | ||
606 | atomic_set(&match->rr_cur, 0); | ||
607 | INIT_LIST_HEAD(&match->list); | ||
608 | spin_lock_init(&match->lock); | ||
609 | atomic_set(&match->sk_ref, 0); | ||
610 | match->prot_hook.type = po->prot_hook.type; | ||
611 | match->prot_hook.dev = po->prot_hook.dev; | ||
612 | match->prot_hook.func = packet_rcv_fanout; | ||
613 | match->prot_hook.af_packet_priv = match; | ||
614 | dev_add_pack(&match->prot_hook); | ||
615 | list_add(&match->list, &fanout_list); | ||
616 | } | ||
617 | err = -EINVAL; | ||
618 | if (match->type == type && | ||
619 | match->prot_hook.type == po->prot_hook.type && | ||
620 | match->prot_hook.dev == po->prot_hook.dev) { | ||
621 | err = -ENOSPC; | ||
622 | if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { | ||
623 | __dev_remove_pack(&po->prot_hook); | ||
624 | po->fanout = match; | ||
625 | atomic_inc(&match->sk_ref); | ||
626 | __fanout_link(sk, po); | ||
627 | err = 0; | ||
628 | } | ||
629 | } | ||
630 | out: | ||
631 | mutex_unlock(&fanout_mutex); | ||
632 | return err; | ||
633 | } | ||
634 | |||
635 | static void fanout_release(struct sock *sk) | ||
636 | { | ||
637 | struct packet_sock *po = pkt_sk(sk); | ||
638 | struct packet_fanout *f; | ||
639 | |||
640 | f = po->fanout; | ||
641 | if (!f) | ||
642 | return; | ||
643 | |||
644 | po->fanout = NULL; | ||
645 | |||
646 | mutex_lock(&fanout_mutex); | ||
647 | if (atomic_dec_and_test(&f->sk_ref)) { | ||
648 | list_del(&f->list); | ||
649 | dev_remove_pack(&f->prot_hook); | ||
650 | kfree(f); | ||
651 | } | ||
652 | mutex_unlock(&fanout_mutex); | ||
653 | } | ||
347 | 654 | ||
348 | static const struct proto_ops packet_ops; | 655 | static const struct proto_ops packet_ops; |
349 | 656 | ||
@@ -654,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
654 | return 0; | 961 | return 0; |
655 | 962 | ||
656 | drop_n_acct: | 963 | drop_n_acct: |
657 | po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); | 964 | spin_lock(&sk->sk_receive_queue.lock); |
965 | po->stats.tp_drops++; | ||
966 | atomic_inc(&sk->sk_drops); | ||
967 | spin_unlock(&sk->sk_receive_queue.lock); | ||
658 | 968 | ||
659 | drop_n_restore: | 969 | drop_n_restore: |
660 | if (skb_head != skb->data && skb_shared(skb)) { | 970 | if (skb_head != skb->data && skb_shared(skb)) { |
@@ -822,7 +1132,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
822 | else | 1132 | else |
823 | sll->sll_ifindex = dev->ifindex; | 1133 | sll->sll_ifindex = dev->ifindex; |
824 | 1134 | ||
825 | __packet_set_status(po, h.raw, status); | ||
826 | smp_mb(); | 1135 | smp_mb(); |
827 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | 1136 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 |
828 | { | 1137 | { |
@@ -831,8 +1140,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
831 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); | 1140 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); |
832 | for (start = h.raw; start < end; start += PAGE_SIZE) | 1141 | for (start = h.raw; start < end; start += PAGE_SIZE) |
833 | flush_dcache_page(pgv_to_page(start)); | 1142 | flush_dcache_page(pgv_to_page(start)); |
1143 | smp_wmb(); | ||
834 | } | 1144 | } |
835 | #endif | 1145 | #endif |
1146 | __packet_set_status(po, h.raw, status); | ||
836 | 1147 | ||
837 | sk->sk_data_ready(sk, 0); | 1148 | sk->sk_data_ready(sk, 0); |
838 | 1149 | ||
@@ -975,7 +1286,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
975 | struct sk_buff *skb; | 1286 | struct sk_buff *skb; |
976 | struct net_device *dev; | 1287 | struct net_device *dev; |
977 | __be16 proto; | 1288 | __be16 proto; |
978 | int ifindex, err, reserve = 0; | 1289 | bool need_rls_dev = false; |
1290 | int err, reserve = 0; | ||
979 | void *ph; | 1291 | void *ph; |
980 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; | 1292 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; |
981 | int tp_len, size_max; | 1293 | int tp_len, size_max; |
@@ -987,7 +1299,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
987 | 1299 | ||
988 | err = -EBUSY; | 1300 | err = -EBUSY; |
989 | if (saddr == NULL) { | 1301 | if (saddr == NULL) { |
990 | ifindex = po->ifindex; | 1302 | dev = po->prot_hook.dev; |
991 | proto = po->num; | 1303 | proto = po->num; |
992 | addr = NULL; | 1304 | addr = NULL; |
993 | } else { | 1305 | } else { |
@@ -998,12 +1310,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
998 | + offsetof(struct sockaddr_ll, | 1310 | + offsetof(struct sockaddr_ll, |
999 | sll_addr))) | 1311 | sll_addr))) |
1000 | goto out; | 1312 | goto out; |
1001 | ifindex = saddr->sll_ifindex; | ||
1002 | proto = saddr->sll_protocol; | 1313 | proto = saddr->sll_protocol; |
1003 | addr = saddr->sll_addr; | 1314 | addr = saddr->sll_addr; |
1315 | dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); | ||
1316 | need_rls_dev = true; | ||
1004 | } | 1317 | } |
1005 | 1318 | ||
1006 | dev = dev_get_by_index(sock_net(&po->sk), ifindex); | ||
1007 | err = -ENXIO; | 1319 | err = -ENXIO; |
1008 | if (unlikely(dev == NULL)) | 1320 | if (unlikely(dev == NULL)) |
1009 | goto out; | 1321 | goto out; |
@@ -1089,7 +1401,8 @@ out_status: | |||
1089 | __packet_set_status(po, ph, status); | 1401 | __packet_set_status(po, ph, status); |
1090 | kfree_skb(skb); | 1402 | kfree_skb(skb); |
1091 | out_put: | 1403 | out_put: |
1092 | dev_put(dev); | 1404 | if (need_rls_dev) |
1405 | dev_put(dev); | ||
1093 | out: | 1406 | out: |
1094 | mutex_unlock(&po->pg_vec_lock); | 1407 | mutex_unlock(&po->pg_vec_lock); |
1095 | return err; | 1408 | return err; |
@@ -1127,8 +1440,9 @@ static int packet_snd(struct socket *sock, | |||
1127 | struct sk_buff *skb; | 1440 | struct sk_buff *skb; |
1128 | struct net_device *dev; | 1441 | struct net_device *dev; |
1129 | __be16 proto; | 1442 | __be16 proto; |
1443 | bool need_rls_dev = false; | ||
1130 | unsigned char *addr; | 1444 | unsigned char *addr; |
1131 | int ifindex, err, reserve = 0; | 1445 | int err, reserve = 0; |
1132 | struct virtio_net_hdr vnet_hdr = { 0 }; | 1446 | struct virtio_net_hdr vnet_hdr = { 0 }; |
1133 | int offset = 0; | 1447 | int offset = 0; |
1134 | int vnet_hdr_len; | 1448 | int vnet_hdr_len; |
@@ -1140,7 +1454,7 @@ static int packet_snd(struct socket *sock, | |||
1140 | */ | 1454 | */ |
1141 | 1455 | ||
1142 | if (saddr == NULL) { | 1456 | if (saddr == NULL) { |
1143 | ifindex = po->ifindex; | 1457 | dev = po->prot_hook.dev; |
1144 | proto = po->num; | 1458 | proto = po->num; |
1145 | addr = NULL; | 1459 | addr = NULL; |
1146 | } else { | 1460 | } else { |
@@ -1149,13 +1463,12 @@ static int packet_snd(struct socket *sock, | |||
1149 | goto out; | 1463 | goto out; |
1150 | if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) | 1464 | if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) |
1151 | goto out; | 1465 | goto out; |
1152 | ifindex = saddr->sll_ifindex; | ||
1153 | proto = saddr->sll_protocol; | 1466 | proto = saddr->sll_protocol; |
1154 | addr = saddr->sll_addr; | 1467 | addr = saddr->sll_addr; |
1468 | dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); | ||
1469 | need_rls_dev = true; | ||
1155 | } | 1470 | } |
1156 | 1471 | ||
1157 | |||
1158 | dev = dev_get_by_index(sock_net(sk), ifindex); | ||
1159 | err = -ENXIO; | 1472 | err = -ENXIO; |
1160 | if (dev == NULL) | 1473 | if (dev == NULL) |
1161 | goto out_unlock; | 1474 | goto out_unlock; |
@@ -1286,14 +1599,15 @@ static int packet_snd(struct socket *sock, | |||
1286 | if (err > 0 && (err = net_xmit_errno(err)) != 0) | 1599 | if (err > 0 && (err = net_xmit_errno(err)) != 0) |
1287 | goto out_unlock; | 1600 | goto out_unlock; |
1288 | 1601 | ||
1289 | dev_put(dev); | 1602 | if (need_rls_dev) |
1603 | dev_put(dev); | ||
1290 | 1604 | ||
1291 | return len; | 1605 | return len; |
1292 | 1606 | ||
1293 | out_free: | 1607 | out_free: |
1294 | kfree_skb(skb); | 1608 | kfree_skb(skb); |
1295 | out_unlock: | 1609 | out_unlock: |
1296 | if (dev) | 1610 | if (dev && need_rls_dev) |
1297 | dev_put(dev); | 1611 | dev_put(dev); |
1298 | out: | 1612 | out: |
1299 | return err; | 1613 | return err; |
@@ -1334,14 +1648,10 @@ static int packet_release(struct socket *sock) | |||
1334 | spin_unlock_bh(&net->packet.sklist_lock); | 1648 | spin_unlock_bh(&net->packet.sklist_lock); |
1335 | 1649 | ||
1336 | spin_lock(&po->bind_lock); | 1650 | spin_lock(&po->bind_lock); |
1337 | if (po->running) { | 1651 | unregister_prot_hook(sk, false); |
1338 | /* | 1652 | if (po->prot_hook.dev) { |
1339 | * Remove from protocol table | 1653 | dev_put(po->prot_hook.dev); |
1340 | */ | 1654 | po->prot_hook.dev = NULL; |
1341 | po->running = 0; | ||
1342 | po->num = 0; | ||
1343 | __dev_remove_pack(&po->prot_hook); | ||
1344 | __sock_put(sk); | ||
1345 | } | 1655 | } |
1346 | spin_unlock(&po->bind_lock); | 1656 | spin_unlock(&po->bind_lock); |
1347 | 1657 | ||
@@ -1355,6 +1665,8 @@ static int packet_release(struct socket *sock) | |||
1355 | if (po->tx_ring.pg_vec) | 1665 | if (po->tx_ring.pg_vec) |
1356 | packet_set_ring(sk, &req, 1, 1); | 1666 | packet_set_ring(sk, &req, 1, 1); |
1357 | 1667 | ||
1668 | fanout_release(sk); | ||
1669 | |||
1358 | synchronize_net(); | 1670 | synchronize_net(); |
1359 | /* | 1671 | /* |
1360 | * Now the socket is dead. No more input will appear. | 1672 | * Now the socket is dead. No more input will appear. |
@@ -1378,24 +1690,18 @@ static int packet_release(struct socket *sock) | |||
1378 | static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) | 1690 | static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) |
1379 | { | 1691 | { |
1380 | struct packet_sock *po = pkt_sk(sk); | 1692 | struct packet_sock *po = pkt_sk(sk); |
1381 | /* | 1693 | |
1382 | * Detach an existing hook if present. | 1694 | if (po->fanout) |
1383 | */ | 1695 | return -EINVAL; |
1384 | 1696 | ||
1385 | lock_sock(sk); | 1697 | lock_sock(sk); |
1386 | 1698 | ||
1387 | spin_lock(&po->bind_lock); | 1699 | spin_lock(&po->bind_lock); |
1388 | if (po->running) { | 1700 | unregister_prot_hook(sk, true); |
1389 | __sock_put(sk); | ||
1390 | po->running = 0; | ||
1391 | po->num = 0; | ||
1392 | spin_unlock(&po->bind_lock); | ||
1393 | dev_remove_pack(&po->prot_hook); | ||
1394 | spin_lock(&po->bind_lock); | ||
1395 | } | ||
1396 | |||
1397 | po->num = protocol; | 1701 | po->num = protocol; |
1398 | po->prot_hook.type = protocol; | 1702 | po->prot_hook.type = protocol; |
1703 | if (po->prot_hook.dev) | ||
1704 | dev_put(po->prot_hook.dev); | ||
1399 | po->prot_hook.dev = dev; | 1705 | po->prot_hook.dev = dev; |
1400 | 1706 | ||
1401 | po->ifindex = dev ? dev->ifindex : 0; | 1707 | po->ifindex = dev ? dev->ifindex : 0; |
@@ -1404,9 +1710,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc | |||
1404 | goto out_unlock; | 1710 | goto out_unlock; |
1405 | 1711 | ||
1406 | if (!dev || (dev->flags & IFF_UP)) { | 1712 | if (!dev || (dev->flags & IFF_UP)) { |
1407 | dev_add_pack(&po->prot_hook); | 1713 | register_prot_hook(sk); |
1408 | sock_hold(sk); | ||
1409 | po->running = 1; | ||
1410 | } else { | 1714 | } else { |
1411 | sk->sk_err = ENETDOWN; | 1715 | sk->sk_err = ENETDOWN; |
1412 | if (!sock_flag(sk, SOCK_DEAD)) | 1716 | if (!sock_flag(sk, SOCK_DEAD)) |
@@ -1440,10 +1744,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
1440 | strlcpy(name, uaddr->sa_data, sizeof(name)); | 1744 | strlcpy(name, uaddr->sa_data, sizeof(name)); |
1441 | 1745 | ||
1442 | dev = dev_get_by_name(sock_net(sk), name); | 1746 | dev = dev_get_by_name(sock_net(sk), name); |
1443 | if (dev) { | 1747 | if (dev) |
1444 | err = packet_do_bind(sk, dev, pkt_sk(sk)->num); | 1748 | err = packet_do_bind(sk, dev, pkt_sk(sk)->num); |
1445 | dev_put(dev); | ||
1446 | } | ||
1447 | return err; | 1749 | return err; |
1448 | } | 1750 | } |
1449 | 1751 | ||
@@ -1471,8 +1773,6 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len | |||
1471 | goto out; | 1773 | goto out; |
1472 | } | 1774 | } |
1473 | err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); | 1775 | err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); |
1474 | if (dev) | ||
1475 | dev_put(dev); | ||
1476 | 1776 | ||
1477 | out: | 1777 | out: |
1478 | return err; | 1778 | return err; |
@@ -1537,9 +1837,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |||
1537 | 1837 | ||
1538 | if (proto) { | 1838 | if (proto) { |
1539 | po->prot_hook.type = proto; | 1839 | po->prot_hook.type = proto; |
1540 | dev_add_pack(&po->prot_hook); | 1840 | register_prot_hook(sk); |
1541 | sock_hold(sk); | ||
1542 | po->running = 1; | ||
1543 | } | 1841 | } |
1544 | 1842 | ||
1545 | spin_lock_bh(&net->packet.sklist_lock); | 1843 | spin_lock_bh(&net->packet.sklist_lock); |
@@ -1681,6 +1979,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1681 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | 1979 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; |
1682 | vnet_hdr.csum_start = skb_checksum_start_offset(skb); | 1980 | vnet_hdr.csum_start = skb_checksum_start_offset(skb); |
1683 | vnet_hdr.csum_offset = skb->csum_offset; | 1981 | vnet_hdr.csum_offset = skb->csum_offset; |
1982 | } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { | ||
1983 | vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; | ||
1684 | } /* else everything is zero */ | 1984 | } /* else everything is zero */ |
1685 | 1985 | ||
1686 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, | 1986 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, |
@@ -2102,6 +2402,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
2102 | po->tp_tstamp = val; | 2402 | po->tp_tstamp = val; |
2103 | return 0; | 2403 | return 0; |
2104 | } | 2404 | } |
2405 | case PACKET_FANOUT: | ||
2406 | { | ||
2407 | int val; | ||
2408 | |||
2409 | if (optlen != sizeof(val)) | ||
2410 | return -EINVAL; | ||
2411 | if (copy_from_user(&val, optval, sizeof(val))) | ||
2412 | return -EFAULT; | ||
2413 | |||
2414 | return fanout_add(sk, val & 0xffff, val >> 16); | ||
2415 | } | ||
2105 | default: | 2416 | default: |
2106 | return -ENOPROTOOPT; | 2417 | return -ENOPROTOOPT; |
2107 | } | 2418 | } |
@@ -2200,6 +2511,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
2200 | val = po->tp_tstamp; | 2511 | val = po->tp_tstamp; |
2201 | data = &val; | 2512 | data = &val; |
2202 | break; | 2513 | break; |
2514 | case PACKET_FANOUT: | ||
2515 | if (len > sizeof(int)) | ||
2516 | len = sizeof(int); | ||
2517 | val = (po->fanout ? | ||
2518 | ((u32)po->fanout->id | | ||
2519 | ((u32)po->fanout->type << 16)) : | ||
2520 | 0); | ||
2521 | data = &val; | ||
2522 | break; | ||
2203 | default: | 2523 | default: |
2204 | return -ENOPROTOOPT; | 2524 | return -ENOPROTOOPT; |
2205 | } | 2525 | } |
@@ -2233,15 +2553,15 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2233 | if (dev->ifindex == po->ifindex) { | 2553 | if (dev->ifindex == po->ifindex) { |
2234 | spin_lock(&po->bind_lock); | 2554 | spin_lock(&po->bind_lock); |
2235 | if (po->running) { | 2555 | if (po->running) { |
2236 | __dev_remove_pack(&po->prot_hook); | 2556 | __unregister_prot_hook(sk, false); |
2237 | __sock_put(sk); | ||
2238 | po->running = 0; | ||
2239 | sk->sk_err = ENETDOWN; | 2557 | sk->sk_err = ENETDOWN; |
2240 | if (!sock_flag(sk, SOCK_DEAD)) | 2558 | if (!sock_flag(sk, SOCK_DEAD)) |
2241 | sk->sk_error_report(sk); | 2559 | sk->sk_error_report(sk); |
2242 | } | 2560 | } |
2243 | if (msg == NETDEV_UNREGISTER) { | 2561 | if (msg == NETDEV_UNREGISTER) { |
2244 | po->ifindex = -1; | 2562 | po->ifindex = -1; |
2563 | if (po->prot_hook.dev) | ||
2564 | dev_put(po->prot_hook.dev); | ||
2245 | po->prot_hook.dev = NULL; | 2565 | po->prot_hook.dev = NULL; |
2246 | } | 2566 | } |
2247 | spin_unlock(&po->bind_lock); | 2567 | spin_unlock(&po->bind_lock); |
@@ -2250,11 +2570,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2250 | case NETDEV_UP: | 2570 | case NETDEV_UP: |
2251 | if (dev->ifindex == po->ifindex) { | 2571 | if (dev->ifindex == po->ifindex) { |
2252 | spin_lock(&po->bind_lock); | 2572 | spin_lock(&po->bind_lock); |
2253 | if (po->num && !po->running) { | 2573 | if (po->num) |
2254 | dev_add_pack(&po->prot_hook); | 2574 | register_prot_hook(sk); |
2255 | sock_hold(sk); | ||
2256 | po->running = 1; | ||
2257 | } | ||
2258 | spin_unlock(&po->bind_lock); | 2575 | spin_unlock(&po->bind_lock); |
2259 | } | 2576 | } |
2260 | break; | 2577 | break; |
@@ -2521,10 +2838,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
2521 | was_running = po->running; | 2838 | was_running = po->running; |
2522 | num = po->num; | 2839 | num = po->num; |
2523 | if (was_running) { | 2840 | if (was_running) { |
2524 | __dev_remove_pack(&po->prot_hook); | ||
2525 | po->num = 0; | 2841 | po->num = 0; |
2526 | po->running = 0; | 2842 | __unregister_prot_hook(sk, false); |
2527 | __sock_put(sk); | ||
2528 | } | 2843 | } |
2529 | spin_unlock(&po->bind_lock); | 2844 | spin_unlock(&po->bind_lock); |
2530 | 2845 | ||
@@ -2555,11 +2870,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
2555 | mutex_unlock(&po->pg_vec_lock); | 2870 | mutex_unlock(&po->pg_vec_lock); |
2556 | 2871 | ||
2557 | spin_lock(&po->bind_lock); | 2872 | spin_lock(&po->bind_lock); |
2558 | if (was_running && !po->running) { | 2873 | if (was_running) { |
2559 | sock_hold(sk); | ||
2560 | po->running = 1; | ||
2561 | po->num = num; | 2874 | po->num = num; |
2562 | dev_add_pack(&po->prot_hook); | 2875 | register_prot_hook(sk); |
2563 | } | 2876 | } |
2564 | spin_unlock(&po->bind_lock); | 2877 | spin_unlock(&po->bind_lock); |
2565 | 2878 | ||