aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet')
-rw-r--r--net/packet/af_packet.c438
1 files changed, 374 insertions, 64 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c0c3cda1971..c698cec0a44 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -187,9 +187,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
187 187
188static void packet_flush_mclist(struct sock *sk); 188static void packet_flush_mclist(struct sock *sk);
189 189
190struct packet_fanout;
190struct packet_sock { 191struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */ 192 /* struct sock has to be the first member of packet_sock */
192 struct sock sk; 193 struct sock sk;
194 struct packet_fanout *fanout;
193 struct tpacket_stats stats; 195 struct tpacket_stats stats;
194 struct packet_ring_buffer rx_ring; 196 struct packet_ring_buffer rx_ring;
195 struct packet_ring_buffer tx_ring; 197 struct packet_ring_buffer tx_ring;
@@ -212,6 +214,24 @@ struct packet_sock {
212 struct packet_type prot_hook ____cacheline_aligned_in_smp; 214 struct packet_type prot_hook ____cacheline_aligned_in_smp;
213}; 215};
214 216
217#define PACKET_FANOUT_MAX 256
218
219struct packet_fanout {
220#ifdef CONFIG_NET_NS
221 struct net *net;
222#endif
223 unsigned int num_members;
224 u16 id;
225 u8 type;
226 u8 defrag;
227 atomic_t rr_cur;
228 struct list_head list;
229 struct sock *arr[PACKET_FANOUT_MAX];
230 spinlock_t lock;
231 atomic_t sk_ref;
232 struct packet_type prot_hook ____cacheline_aligned_in_smp;
233};
234
215struct packet_skb_cb { 235struct packet_skb_cb {
216 unsigned int origlen; 236 unsigned int origlen;
217 union { 237 union {
@@ -222,6 +242,64 @@ struct packet_skb_cb {
222 242
223#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 243#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
224 244
245static inline struct packet_sock *pkt_sk(struct sock *sk)
246{
247 return (struct packet_sock *)sk;
248}
249
250static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
251static void __fanout_link(struct sock *sk, struct packet_sock *po);
252
253/* register_prot_hook must be invoked with the po->bind_lock held,
254 * or from a context in which asynchronous accesses to the packet
255 * socket is not possible (packet_create()).
256 */
257static void register_prot_hook(struct sock *sk)
258{
259 struct packet_sock *po = pkt_sk(sk);
260 if (!po->running) {
261 if (po->fanout)
262 __fanout_link(sk, po);
263 else
264 dev_add_pack(&po->prot_hook);
265 sock_hold(sk);
266 po->running = 1;
267 }
268}
269
270/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
271 * held. If the sync parameter is true, we will temporarily drop
272 * the po->bind_lock and do a synchronize_net to make sure no
273 * asynchronous packet processing paths still refer to the elements
274 * of po->prot_hook. If the sync parameter is false, it is the
275 * callers responsibility to take care of this.
276 */
277static void __unregister_prot_hook(struct sock *sk, bool sync)
278{
279 struct packet_sock *po = pkt_sk(sk);
280
281 po->running = 0;
282 if (po->fanout)
283 __fanout_unlink(sk, po);
284 else
285 __dev_remove_pack(&po->prot_hook);
286 __sock_put(sk);
287
288 if (sync) {
289 spin_unlock(&po->bind_lock);
290 synchronize_net();
291 spin_lock(&po->bind_lock);
292 }
293}
294
295static void unregister_prot_hook(struct sock *sk, bool sync)
296{
297 struct packet_sock *po = pkt_sk(sk);
298
299 if (po->running)
300 __unregister_prot_hook(sk, sync);
301}
302
225static inline __pure struct page *pgv_to_page(void *addr) 303static inline __pure struct page *pgv_to_page(void *addr)
226{ 304{
227 if (is_vmalloc_addr(addr)) 305 if (is_vmalloc_addr(addr))
@@ -324,11 +402,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff)
324 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 402 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
325} 403}
326 404
327static inline struct packet_sock *pkt_sk(struct sock *sk)
328{
329 return (struct packet_sock *)sk;
330}
331
332static void packet_sock_destruct(struct sock *sk) 405static void packet_sock_destruct(struct sock *sk)
333{ 406{
334 skb_queue_purge(&sk->sk_error_queue); 407 skb_queue_purge(&sk->sk_error_queue);
@@ -344,6 +417,240 @@ static void packet_sock_destruct(struct sock *sk)
344 sk_refcnt_debug_dec(sk); 417 sk_refcnt_debug_dec(sk);
345} 418}
346 419
420static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
421{
422 int x = atomic_read(&f->rr_cur) + 1;
423
424 if (x >= num)
425 x = 0;
426
427 return x;
428}
429
430static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
431{
432 u32 idx, hash = skb->rxhash;
433
434 idx = ((u64)hash * num) >> 32;
435
436 return f->arr[idx];
437}
438
439static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
440{
441 int cur, old;
442
443 cur = atomic_read(&f->rr_cur);
444 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
445 fanout_rr_next(f, num))) != cur)
446 cur = old;
447 return f->arr[cur];
448}
449
450static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
451{
452 unsigned int cpu = smp_processor_id();
453
454 return f->arr[cpu % num];
455}
456
457static struct sk_buff *fanout_check_defrag(struct sk_buff *skb)
458{
459#ifdef CONFIG_INET
460 const struct iphdr *iph;
461 u32 len;
462
463 if (skb->protocol != htons(ETH_P_IP))
464 return skb;
465
466 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
467 return skb;
468
469 iph = ip_hdr(skb);
470 if (iph->ihl < 5 || iph->version != 4)
471 return skb;
472 if (!pskb_may_pull(skb, iph->ihl*4))
473 return skb;
474 iph = ip_hdr(skb);
475 len = ntohs(iph->tot_len);
476 if (skb->len < len || len < (iph->ihl * 4))
477 return skb;
478
479 if (ip_is_fragment(ip_hdr(skb))) {
480 skb = skb_share_check(skb, GFP_ATOMIC);
481 if (skb) {
482 if (pskb_trim_rcsum(skb, len))
483 return skb;
484 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
485 if (ip_defrag(skb, IP_DEFRAG_AF_PACKET))
486 return NULL;
487 skb->rxhash = 0;
488 }
489 }
490#endif
491 return skb;
492}
493
494static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
495 struct packet_type *pt, struct net_device *orig_dev)
496{
497 struct packet_fanout *f = pt->af_packet_priv;
498 unsigned int num = f->num_members;
499 struct packet_sock *po;
500 struct sock *sk;
501
502 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
503 !num) {
504 kfree_skb(skb);
505 return 0;
506 }
507
508 switch (f->type) {
509 case PACKET_FANOUT_HASH:
510 default:
511 if (f->defrag) {
512 skb = fanout_check_defrag(skb);
513 if (!skb)
514 return 0;
515 }
516 skb_get_rxhash(skb);
517 sk = fanout_demux_hash(f, skb, num);
518 break;
519 case PACKET_FANOUT_LB:
520 sk = fanout_demux_lb(f, skb, num);
521 break;
522 case PACKET_FANOUT_CPU:
523 sk = fanout_demux_cpu(f, skb, num);
524 break;
525 }
526
527 po = pkt_sk(sk);
528
529 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
530}
531
532static DEFINE_MUTEX(fanout_mutex);
533static LIST_HEAD(fanout_list);
534
535static void __fanout_link(struct sock *sk, struct packet_sock *po)
536{
537 struct packet_fanout *f = po->fanout;
538
539 spin_lock(&f->lock);
540 f->arr[f->num_members] = sk;
541 smp_wmb();
542 f->num_members++;
543 spin_unlock(&f->lock);
544}
545
546static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
547{
548 struct packet_fanout *f = po->fanout;
549 int i;
550
551 spin_lock(&f->lock);
552 for (i = 0; i < f->num_members; i++) {
553 if (f->arr[i] == sk)
554 break;
555 }
556 BUG_ON(i >= f->num_members);
557 f->arr[i] = f->arr[f->num_members - 1];
558 f->num_members--;
559 spin_unlock(&f->lock);
560}
561
562static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
563{
564 struct packet_sock *po = pkt_sk(sk);
565 struct packet_fanout *f, *match;
566 u8 type = type_flags & 0xff;
567 u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
568 int err;
569
570 switch (type) {
571 case PACKET_FANOUT_HASH:
572 case PACKET_FANOUT_LB:
573 case PACKET_FANOUT_CPU:
574 break;
575 default:
576 return -EINVAL;
577 }
578
579 if (!po->running)
580 return -EINVAL;
581
582 if (po->fanout)
583 return -EALREADY;
584
585 mutex_lock(&fanout_mutex);
586 match = NULL;
587 list_for_each_entry(f, &fanout_list, list) {
588 if (f->id == id &&
589 read_pnet(&f->net) == sock_net(sk)) {
590 match = f;
591 break;
592 }
593 }
594 err = -EINVAL;
595 if (match && match->defrag != defrag)
596 goto out;
597 if (!match) {
598 err = -ENOMEM;
599 match = kzalloc(sizeof(*match), GFP_KERNEL);
600 if (!match)
601 goto out;
602 write_pnet(&match->net, sock_net(sk));
603 match->id = id;
604 match->type = type;
605 match->defrag = defrag;
606 atomic_set(&match->rr_cur, 0);
607 INIT_LIST_HEAD(&match->list);
608 spin_lock_init(&match->lock);
609 atomic_set(&match->sk_ref, 0);
610 match->prot_hook.type = po->prot_hook.type;
611 match->prot_hook.dev = po->prot_hook.dev;
612 match->prot_hook.func = packet_rcv_fanout;
613 match->prot_hook.af_packet_priv = match;
614 dev_add_pack(&match->prot_hook);
615 list_add(&match->list, &fanout_list);
616 }
617 err = -EINVAL;
618 if (match->type == type &&
619 match->prot_hook.type == po->prot_hook.type &&
620 match->prot_hook.dev == po->prot_hook.dev) {
621 err = -ENOSPC;
622 if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
623 __dev_remove_pack(&po->prot_hook);
624 po->fanout = match;
625 atomic_inc(&match->sk_ref);
626 __fanout_link(sk, po);
627 err = 0;
628 }
629 }
630out:
631 mutex_unlock(&fanout_mutex);
632 return err;
633}
634
635static void fanout_release(struct sock *sk)
636{
637 struct packet_sock *po = pkt_sk(sk);
638 struct packet_fanout *f;
639
640 f = po->fanout;
641 if (!f)
642 return;
643
644 po->fanout = NULL;
645
646 mutex_lock(&fanout_mutex);
647 if (atomic_dec_and_test(&f->sk_ref)) {
648 list_del(&f->list);
649 dev_remove_pack(&f->prot_hook);
650 kfree(f);
651 }
652 mutex_unlock(&fanout_mutex);
653}
347 654
348static const struct proto_ops packet_ops; 655static const struct proto_ops packet_ops;
349 656
@@ -822,7 +1129,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
822 else 1129 else
823 sll->sll_ifindex = dev->ifindex; 1130 sll->sll_ifindex = dev->ifindex;
824 1131
825 __packet_set_status(po, h.raw, status);
826 smp_mb(); 1132 smp_mb();
827#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 1133#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
828 { 1134 {
@@ -831,8 +1137,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
831 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); 1137 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
832 for (start = h.raw; start < end; start += PAGE_SIZE) 1138 for (start = h.raw; start < end; start += PAGE_SIZE)
833 flush_dcache_page(pgv_to_page(start)); 1139 flush_dcache_page(pgv_to_page(start));
1140 smp_wmb();
834 } 1141 }
835#endif 1142#endif
1143 __packet_set_status(po, h.raw, status);
836 1144
837 sk->sk_data_ready(sk, 0); 1145 sk->sk_data_ready(sk, 0);
838 1146
@@ -975,7 +1283,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
975 struct sk_buff *skb; 1283 struct sk_buff *skb;
976 struct net_device *dev; 1284 struct net_device *dev;
977 __be16 proto; 1285 __be16 proto;
978 int ifindex, err, reserve = 0; 1286 bool need_rls_dev = false;
1287 int err, reserve = 0;
979 void *ph; 1288 void *ph;
980 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; 1289 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
981 int tp_len, size_max; 1290 int tp_len, size_max;
@@ -987,7 +1296,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
987 1296
988 err = -EBUSY; 1297 err = -EBUSY;
989 if (saddr == NULL) { 1298 if (saddr == NULL) {
990 ifindex = po->ifindex; 1299 dev = po->prot_hook.dev;
991 proto = po->num; 1300 proto = po->num;
992 addr = NULL; 1301 addr = NULL;
993 } else { 1302 } else {
@@ -998,12 +1307,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
998 + offsetof(struct sockaddr_ll, 1307 + offsetof(struct sockaddr_ll,
999 sll_addr))) 1308 sll_addr)))
1000 goto out; 1309 goto out;
1001 ifindex = saddr->sll_ifindex;
1002 proto = saddr->sll_protocol; 1310 proto = saddr->sll_protocol;
1003 addr = saddr->sll_addr; 1311 addr = saddr->sll_addr;
1312 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
1313 need_rls_dev = true;
1004 } 1314 }
1005 1315
1006 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
1007 err = -ENXIO; 1316 err = -ENXIO;
1008 if (unlikely(dev == NULL)) 1317 if (unlikely(dev == NULL))
1009 goto out; 1318 goto out;
@@ -1089,7 +1398,8 @@ out_status:
1089 __packet_set_status(po, ph, status); 1398 __packet_set_status(po, ph, status);
1090 kfree_skb(skb); 1399 kfree_skb(skb);
1091out_put: 1400out_put:
1092 dev_put(dev); 1401 if (need_rls_dev)
1402 dev_put(dev);
1093out: 1403out:
1094 mutex_unlock(&po->pg_vec_lock); 1404 mutex_unlock(&po->pg_vec_lock);
1095 return err; 1405 return err;
@@ -1127,8 +1437,9 @@ static int packet_snd(struct socket *sock,
1127 struct sk_buff *skb; 1437 struct sk_buff *skb;
1128 struct net_device *dev; 1438 struct net_device *dev;
1129 __be16 proto; 1439 __be16 proto;
1440 bool need_rls_dev = false;
1130 unsigned char *addr; 1441 unsigned char *addr;
1131 int ifindex, err, reserve = 0; 1442 int err, reserve = 0;
1132 struct virtio_net_hdr vnet_hdr = { 0 }; 1443 struct virtio_net_hdr vnet_hdr = { 0 };
1133 int offset = 0; 1444 int offset = 0;
1134 int vnet_hdr_len; 1445 int vnet_hdr_len;
@@ -1140,7 +1451,7 @@ static int packet_snd(struct socket *sock,
1140 */ 1451 */
1141 1452
1142 if (saddr == NULL) { 1453 if (saddr == NULL) {
1143 ifindex = po->ifindex; 1454 dev = po->prot_hook.dev;
1144 proto = po->num; 1455 proto = po->num;
1145 addr = NULL; 1456 addr = NULL;
1146 } else { 1457 } else {
@@ -1149,13 +1460,12 @@ static int packet_snd(struct socket *sock,
1149 goto out; 1460 goto out;
1150 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) 1461 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1151 goto out; 1462 goto out;
1152 ifindex = saddr->sll_ifindex;
1153 proto = saddr->sll_protocol; 1463 proto = saddr->sll_protocol;
1154 addr = saddr->sll_addr; 1464 addr = saddr->sll_addr;
1465 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
1466 need_rls_dev = true;
1155 } 1467 }
1156 1468
1157
1158 dev = dev_get_by_index(sock_net(sk), ifindex);
1159 err = -ENXIO; 1469 err = -ENXIO;
1160 if (dev == NULL) 1470 if (dev == NULL)
1161 goto out_unlock; 1471 goto out_unlock;
@@ -1286,14 +1596,15 @@ static int packet_snd(struct socket *sock,
1286 if (err > 0 && (err = net_xmit_errno(err)) != 0) 1596 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1287 goto out_unlock; 1597 goto out_unlock;
1288 1598
1289 dev_put(dev); 1599 if (need_rls_dev)
1600 dev_put(dev);
1290 1601
1291 return len; 1602 return len;
1292 1603
1293out_free: 1604out_free:
1294 kfree_skb(skb); 1605 kfree_skb(skb);
1295out_unlock: 1606out_unlock:
1296 if (dev) 1607 if (dev && need_rls_dev)
1297 dev_put(dev); 1608 dev_put(dev);
1298out: 1609out:
1299 return err; 1610 return err;
@@ -1334,14 +1645,10 @@ static int packet_release(struct socket *sock)
1334 spin_unlock_bh(&net->packet.sklist_lock); 1645 spin_unlock_bh(&net->packet.sklist_lock);
1335 1646
1336 spin_lock(&po->bind_lock); 1647 spin_lock(&po->bind_lock);
1337 if (po->running) { 1648 unregister_prot_hook(sk, false);
1338 /* 1649 if (po->prot_hook.dev) {
1339 * Remove from protocol table 1650 dev_put(po->prot_hook.dev);
1340 */ 1651 po->prot_hook.dev = NULL;
1341 po->running = 0;
1342 po->num = 0;
1343 __dev_remove_pack(&po->prot_hook);
1344 __sock_put(sk);
1345 } 1652 }
1346 spin_unlock(&po->bind_lock); 1653 spin_unlock(&po->bind_lock);
1347 1654
@@ -1355,6 +1662,8 @@ static int packet_release(struct socket *sock)
1355 if (po->tx_ring.pg_vec) 1662 if (po->tx_ring.pg_vec)
1356 packet_set_ring(sk, &req, 1, 1); 1663 packet_set_ring(sk, &req, 1, 1);
1357 1664
1665 fanout_release(sk);
1666
1358 synchronize_net(); 1667 synchronize_net();
1359 /* 1668 /*
1360 * Now the socket is dead. No more input will appear. 1669 * Now the socket is dead. No more input will appear.
@@ -1378,24 +1687,18 @@ static int packet_release(struct socket *sock)
1378static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) 1687static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1379{ 1688{
1380 struct packet_sock *po = pkt_sk(sk); 1689 struct packet_sock *po = pkt_sk(sk);
1381 /* 1690
1382 * Detach an existing hook if present. 1691 if (po->fanout)
1383 */ 1692 return -EINVAL;
1384 1693
1385 lock_sock(sk); 1694 lock_sock(sk);
1386 1695
1387 spin_lock(&po->bind_lock); 1696 spin_lock(&po->bind_lock);
1388 if (po->running) { 1697 unregister_prot_hook(sk, true);
1389 __sock_put(sk);
1390 po->running = 0;
1391 po->num = 0;
1392 spin_unlock(&po->bind_lock);
1393 dev_remove_pack(&po->prot_hook);
1394 spin_lock(&po->bind_lock);
1395 }
1396
1397 po->num = protocol; 1698 po->num = protocol;
1398 po->prot_hook.type = protocol; 1699 po->prot_hook.type = protocol;
1700 if (po->prot_hook.dev)
1701 dev_put(po->prot_hook.dev);
1399 po->prot_hook.dev = dev; 1702 po->prot_hook.dev = dev;
1400 1703
1401 po->ifindex = dev ? dev->ifindex : 0; 1704 po->ifindex = dev ? dev->ifindex : 0;
@@ -1404,9 +1707,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
1404 goto out_unlock; 1707 goto out_unlock;
1405 1708
1406 if (!dev || (dev->flags & IFF_UP)) { 1709 if (!dev || (dev->flags & IFF_UP)) {
1407 dev_add_pack(&po->prot_hook); 1710 register_prot_hook(sk);
1408 sock_hold(sk);
1409 po->running = 1;
1410 } else { 1711 } else {
1411 sk->sk_err = ENETDOWN; 1712 sk->sk_err = ENETDOWN;
1412 if (!sock_flag(sk, SOCK_DEAD)) 1713 if (!sock_flag(sk, SOCK_DEAD))
@@ -1440,10 +1741,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1440 strlcpy(name, uaddr->sa_data, sizeof(name)); 1741 strlcpy(name, uaddr->sa_data, sizeof(name));
1441 1742
1442 dev = dev_get_by_name(sock_net(sk), name); 1743 dev = dev_get_by_name(sock_net(sk), name);
1443 if (dev) { 1744 if (dev)
1444 err = packet_do_bind(sk, dev, pkt_sk(sk)->num); 1745 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1445 dev_put(dev);
1446 }
1447 return err; 1746 return err;
1448} 1747}
1449 1748
@@ -1471,8 +1770,6 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
1471 goto out; 1770 goto out;
1472 } 1771 }
1473 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); 1772 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1474 if (dev)
1475 dev_put(dev);
1476 1773
1477out: 1774out:
1478 return err; 1775 return err;
@@ -1537,9 +1834,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
1537 1834
1538 if (proto) { 1835 if (proto) {
1539 po->prot_hook.type = proto; 1836 po->prot_hook.type = proto;
1540 dev_add_pack(&po->prot_hook); 1837 register_prot_hook(sk);
1541 sock_hold(sk);
1542 po->running = 1;
1543 } 1838 }
1544 1839
1545 spin_lock_bh(&net->packet.sklist_lock); 1840 spin_lock_bh(&net->packet.sklist_lock);
@@ -1681,6 +1976,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1681 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 1976 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1682 vnet_hdr.csum_start = skb_checksum_start_offset(skb); 1977 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
1683 vnet_hdr.csum_offset = skb->csum_offset; 1978 vnet_hdr.csum_offset = skb->csum_offset;
1979 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
1980 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
1684 } /* else everything is zero */ 1981 } /* else everything is zero */
1685 1982
1686 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, 1983 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
@@ -2102,6 +2399,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
2102 po->tp_tstamp = val; 2399 po->tp_tstamp = val;
2103 return 0; 2400 return 0;
2104 } 2401 }
2402 case PACKET_FANOUT:
2403 {
2404 int val;
2405
2406 if (optlen != sizeof(val))
2407 return -EINVAL;
2408 if (copy_from_user(&val, optval, sizeof(val)))
2409 return -EFAULT;
2410
2411 return fanout_add(sk, val & 0xffff, val >> 16);
2412 }
2105 default: 2413 default:
2106 return -ENOPROTOOPT; 2414 return -ENOPROTOOPT;
2107 } 2415 }
@@ -2200,6 +2508,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2200 val = po->tp_tstamp; 2508 val = po->tp_tstamp;
2201 data = &val; 2509 data = &val;
2202 break; 2510 break;
2511 case PACKET_FANOUT:
2512 if (len > sizeof(int))
2513 len = sizeof(int);
2514 val = (po->fanout ?
2515 ((u32)po->fanout->id |
2516 ((u32)po->fanout->type << 16)) :
2517 0);
2518 data = &val;
2519 break;
2203 default: 2520 default:
2204 return -ENOPROTOOPT; 2521 return -ENOPROTOOPT;
2205 } 2522 }
@@ -2233,15 +2550,15 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
2233 if (dev->ifindex == po->ifindex) { 2550 if (dev->ifindex == po->ifindex) {
2234 spin_lock(&po->bind_lock); 2551 spin_lock(&po->bind_lock);
2235 if (po->running) { 2552 if (po->running) {
2236 __dev_remove_pack(&po->prot_hook); 2553 __unregister_prot_hook(sk, false);
2237 __sock_put(sk);
2238 po->running = 0;
2239 sk->sk_err = ENETDOWN; 2554 sk->sk_err = ENETDOWN;
2240 if (!sock_flag(sk, SOCK_DEAD)) 2555 if (!sock_flag(sk, SOCK_DEAD))
2241 sk->sk_error_report(sk); 2556 sk->sk_error_report(sk);
2242 } 2557 }
2243 if (msg == NETDEV_UNREGISTER) { 2558 if (msg == NETDEV_UNREGISTER) {
2244 po->ifindex = -1; 2559 po->ifindex = -1;
2560 if (po->prot_hook.dev)
2561 dev_put(po->prot_hook.dev);
2245 po->prot_hook.dev = NULL; 2562 po->prot_hook.dev = NULL;
2246 } 2563 }
2247 spin_unlock(&po->bind_lock); 2564 spin_unlock(&po->bind_lock);
@@ -2250,11 +2567,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
2250 case NETDEV_UP: 2567 case NETDEV_UP:
2251 if (dev->ifindex == po->ifindex) { 2568 if (dev->ifindex == po->ifindex) {
2252 spin_lock(&po->bind_lock); 2569 spin_lock(&po->bind_lock);
2253 if (po->num && !po->running) { 2570 if (po->num)
2254 dev_add_pack(&po->prot_hook); 2571 register_prot_hook(sk);
2255 sock_hold(sk);
2256 po->running = 1;
2257 }
2258 spin_unlock(&po->bind_lock); 2572 spin_unlock(&po->bind_lock);
2259 } 2573 }
2260 break; 2574 break;
@@ -2521,10 +2835,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2521 was_running = po->running; 2835 was_running = po->running;
2522 num = po->num; 2836 num = po->num;
2523 if (was_running) { 2837 if (was_running) {
2524 __dev_remove_pack(&po->prot_hook);
2525 po->num = 0; 2838 po->num = 0;
2526 po->running = 0; 2839 __unregister_prot_hook(sk, false);
2527 __sock_put(sk);
2528 } 2840 }
2529 spin_unlock(&po->bind_lock); 2841 spin_unlock(&po->bind_lock);
2530 2842
@@ -2555,11 +2867,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2555 mutex_unlock(&po->pg_vec_lock); 2867 mutex_unlock(&po->pg_vec_lock);
2556 2868
2557 spin_lock(&po->bind_lock); 2869 spin_lock(&po->bind_lock);
2558 if (was_running && !po->running) { 2870 if (was_running) {
2559 sock_hold(sk);
2560 po->running = 1;
2561 po->num = num; 2871 po->num = num;
2562 dev_add_pack(&po->prot_hook); 2872 register_prot_hook(sk);
2563 } 2873 }
2564 spin_unlock(&po->bind_lock); 2874 spin_unlock(&po->bind_lock);
2565 2875