diff options
Diffstat (limited to 'net/packet')
| -rw-r--r-- | net/packet/af_packet.c | 1012 |
1 files changed, 915 insertions, 97 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec0a445..82a6f34d39d0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -40,6 +40,10 @@ | |||
| 40 | * byte arrays at the end of sockaddr_ll | 40 | * byte arrays at the end of sockaddr_ll |
| 41 | * and packet_mreq. | 41 | * and packet_mreq. |
| 42 | * Johann Baudy : Added TX RING. | 42 | * Johann Baudy : Added TX RING. |
| 43 | * Chetan Loke : Implemented TPACKET_V3 block abstraction | ||
| 44 | * layer. | ||
| 45 | * Copyright (C) 2011, <lokec@ccs.neu.edu> | ||
| 46 | * | ||
| 43 | * | 47 | * |
| 44 | * This program is free software; you can redistribute it and/or | 48 | * This program is free software; you can redistribute it and/or |
| 45 | * modify it under the terms of the GNU General Public License | 49 | * modify it under the terms of the GNU General Public License |
| @@ -161,9 +165,56 @@ struct packet_mreq_max { | |||
| 161 | unsigned char mr_address[MAX_ADDR_LEN]; | 165 | unsigned char mr_address[MAX_ADDR_LEN]; |
| 162 | }; | 166 | }; |
| 163 | 167 | ||
| 164 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 168 | static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, |
| 165 | int closing, int tx_ring); | 169 | int closing, int tx_ring); |
| 166 | 170 | ||
| 171 | |||
| 172 | #define V3_ALIGNMENT (8) | ||
| 173 | |||
| 174 | #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) | ||
| 175 | |||
| 176 | #define BLK_PLUS_PRIV(sz_of_priv) \ | ||
| 177 | (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) | ||
| 178 | |||
| 179 | /* kbdq - kernel block descriptor queue */ | ||
| 180 | struct tpacket_kbdq_core { | ||
| 181 | struct pgv *pkbdq; | ||
| 182 | unsigned int feature_req_word; | ||
| 183 | unsigned int hdrlen; | ||
| 184 | unsigned char reset_pending_on_curr_blk; | ||
| 185 | unsigned char delete_blk_timer; | ||
| 186 | unsigned short kactive_blk_num; | ||
| 187 | unsigned short blk_sizeof_priv; | ||
| 188 | |||
| 189 | /* last_kactive_blk_num: | ||
| 190 | * trick to see if user-space has caught up | ||
| 191 | * in order to avoid refreshing timer when every single pkt arrives. | ||
| 192 | */ | ||
| 193 | unsigned short last_kactive_blk_num; | ||
| 194 | |||
| 195 | char *pkblk_start; | ||
| 196 | char *pkblk_end; | ||
| 197 | int kblk_size; | ||
| 198 | unsigned int knum_blocks; | ||
| 199 | uint64_t knxt_seq_num; | ||
| 200 | char *prev; | ||
| 201 | char *nxt_offset; | ||
| 202 | struct sk_buff *skb; | ||
| 203 | |||
| 204 | atomic_t blk_fill_in_prog; | ||
| 205 | |||
| 206 | /* Default is set to 8ms */ | ||
| 207 | #define DEFAULT_PRB_RETIRE_TOV (8) | ||
| 208 | |||
| 209 | unsigned short retire_blk_tov; | ||
| 210 | unsigned short version; | ||
| 211 | unsigned long tov_in_jiffies; | ||
| 212 | |||
| 213 | /* timer to retire an outstanding block */ | ||
| 214 | struct timer_list retire_blk_timer; | ||
| 215 | }; | ||
| 216 | |||
| 217 | #define PGV_FROM_VMALLOC 1 | ||
| 167 | struct pgv { | 218 | struct pgv { |
| 168 | char *buffer; | 219 | char *buffer; |
| 169 | }; | 220 | }; |
| @@ -179,12 +230,44 @@ struct packet_ring_buffer { | |||
| 179 | unsigned int pg_vec_pages; | 230 | unsigned int pg_vec_pages; |
| 180 | unsigned int pg_vec_len; | 231 | unsigned int pg_vec_len; |
| 181 | 232 | ||
| 233 | struct tpacket_kbdq_core prb_bdqc; | ||
| 182 | atomic_t pending; | 234 | atomic_t pending; |
| 183 | }; | 235 | }; |
| 184 | 236 | ||
| 237 | #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) | ||
| 238 | #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) | ||
| 239 | #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) | ||
| 240 | #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) | ||
| 241 | #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) | ||
| 242 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) | ||
| 243 | #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) | ||
| 244 | |||
| 185 | struct packet_sock; | 245 | struct packet_sock; |
| 186 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); | 246 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); |
| 187 | 247 | ||
| 248 | static void *packet_previous_frame(struct packet_sock *po, | ||
| 249 | struct packet_ring_buffer *rb, | ||
| 250 | int status); | ||
| 251 | static void packet_increment_head(struct packet_ring_buffer *buff); | ||
| 252 | static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, | ||
| 253 | struct tpacket_block_desc *); | ||
| 254 | static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, | ||
| 255 | struct packet_sock *); | ||
| 256 | static void prb_retire_current_block(struct tpacket_kbdq_core *, | ||
| 257 | struct packet_sock *, unsigned int status); | ||
| 258 | static int prb_queue_frozen(struct tpacket_kbdq_core *); | ||
| 259 | static void prb_open_block(struct tpacket_kbdq_core *, | ||
| 260 | struct tpacket_block_desc *); | ||
| 261 | static void prb_retire_rx_blk_timer_expired(unsigned long); | ||
| 262 | static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); | ||
| 263 | static void prb_init_blk_timer(struct packet_sock *, | ||
| 264 | struct tpacket_kbdq_core *, | ||
| 265 | void (*func) (unsigned long)); | ||
| 266 | static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); | ||
| 267 | static void prb_clear_rxhash(struct tpacket_kbdq_core *, | ||
| 268 | struct tpacket3_hdr *); | ||
| 269 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *, | ||
| 270 | struct tpacket3_hdr *); | ||
| 188 | static void packet_flush_mclist(struct sock *sk); | 271 | static void packet_flush_mclist(struct sock *sk); |
| 189 | 272 | ||
| 190 | struct packet_fanout; | 273 | struct packet_fanout; |
| @@ -193,6 +276,7 @@ struct packet_sock { | |||
| 193 | struct sock sk; | 276 | struct sock sk; |
| 194 | struct packet_fanout *fanout; | 277 | struct packet_fanout *fanout; |
| 195 | struct tpacket_stats stats; | 278 | struct tpacket_stats stats; |
| 279 | union tpacket_stats_u stats_u; | ||
| 196 | struct packet_ring_buffer rx_ring; | 280 | struct packet_ring_buffer rx_ring; |
| 197 | struct packet_ring_buffer tx_ring; | 281 | struct packet_ring_buffer tx_ring; |
| 198 | int copy_thresh; | 282 | int copy_thresh; |
| @@ -242,7 +326,16 @@ struct packet_skb_cb { | |||
| 242 | 326 | ||
| 243 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) | 327 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) |
| 244 | 328 | ||
| 245 | static inline struct packet_sock *pkt_sk(struct sock *sk) | 329 | #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) |
| 330 | #define GET_PBLOCK_DESC(x, bid) \ | ||
| 331 | ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) | ||
| 332 | #define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ | ||
| 333 | ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) | ||
| 334 | #define GET_NEXT_PRB_BLK_NUM(x) \ | ||
| 335 | (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ | ||
| 336 | ((x)->kactive_blk_num+1) : 0) | ||
| 337 | |||
| 338 | static struct packet_sock *pkt_sk(struct sock *sk) | ||
| 246 | { | 339 | { |
| 247 | return (struct packet_sock *)sk; | 340 | return (struct packet_sock *)sk; |
| 248 | } | 341 | } |
| @@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) | |||
| 325 | h.h2->tp_status = status; | 418 | h.h2->tp_status = status; |
| 326 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); | 419 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
| 327 | break; | 420 | break; |
| 421 | case TPACKET_V3: | ||
| 328 | default: | 422 | default: |
| 329 | pr_err("TPACKET version not supported\n"); | 423 | WARN(1, "TPACKET version not supported.\n"); |
| 330 | BUG(); | 424 | BUG(); |
| 331 | } | 425 | } |
| 332 | 426 | ||
| @@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame) | |||
| 351 | case TPACKET_V2: | 445 | case TPACKET_V2: |
| 352 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); | 446 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
| 353 | return h.h2->tp_status; | 447 | return h.h2->tp_status; |
| 448 | case TPACKET_V3: | ||
| 354 | default: | 449 | default: |
| 355 | pr_err("TPACKET version not supported\n"); | 450 | WARN(1, "TPACKET version not supported.\n"); |
| 356 | BUG(); | 451 | BUG(); |
| 357 | return 0; | 452 | return 0; |
| 358 | } | 453 | } |
| @@ -382,14 +477,678 @@ static void *packet_lookup_frame(struct packet_sock *po, | |||
| 382 | return h.raw; | 477 | return h.raw; |
| 383 | } | 478 | } |
| 384 | 479 | ||
| 385 | static inline void *packet_current_frame(struct packet_sock *po, | 480 | static void *packet_current_frame(struct packet_sock *po, |
| 386 | struct packet_ring_buffer *rb, | 481 | struct packet_ring_buffer *rb, |
| 387 | int status) | 482 | int status) |
| 388 | { | 483 | { |
| 389 | return packet_lookup_frame(po, rb, rb->head, status); | 484 | return packet_lookup_frame(po, rb, rb->head, status); |
| 390 | } | 485 | } |
| 391 | 486 | ||
| 392 | static inline void *packet_previous_frame(struct packet_sock *po, | 487 | static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) |
| 488 | { | ||
| 489 | del_timer_sync(&pkc->retire_blk_timer); | ||
| 490 | } | ||
| 491 | |||
| 492 | static void prb_shutdown_retire_blk_timer(struct packet_sock *po, | ||
| 493 | int tx_ring, | ||
| 494 | struct sk_buff_head *rb_queue) | ||
| 495 | { | ||
| 496 | struct tpacket_kbdq_core *pkc; | ||
| 497 | |||
| 498 | pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; | ||
| 499 | |||
| 500 | spin_lock(&rb_queue->lock); | ||
| 501 | pkc->delete_blk_timer = 1; | ||
| 502 | spin_unlock(&rb_queue->lock); | ||
| 503 | |||
| 504 | prb_del_retire_blk_timer(pkc); | ||
| 505 | } | ||
| 506 | |||
| 507 | static void prb_init_blk_timer(struct packet_sock *po, | ||
| 508 | struct tpacket_kbdq_core *pkc, | ||
| 509 | void (*func) (unsigned long)) | ||
| 510 | { | ||
| 511 | init_timer(&pkc->retire_blk_timer); | ||
| 512 | pkc->retire_blk_timer.data = (long)po; | ||
| 513 | pkc->retire_blk_timer.function = func; | ||
| 514 | pkc->retire_blk_timer.expires = jiffies; | ||
| 515 | } | ||
| 516 | |||
| 517 | static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) | ||
| 518 | { | ||
| 519 | struct tpacket_kbdq_core *pkc; | ||
| 520 | |||
| 521 | if (tx_ring) | ||
| 522 | BUG(); | ||
| 523 | |||
| 524 | pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; | ||
| 525 | prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); | ||
| 526 | } | ||
| 527 | |||
| 528 | static int prb_calc_retire_blk_tmo(struct packet_sock *po, | ||
| 529 | int blk_size_in_bytes) | ||
| 530 | { | ||
| 531 | struct net_device *dev; | ||
| 532 | unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; | ||
| 533 | struct ethtool_cmd ecmd; | ||
| 534 | int err; | ||
| 535 | |||
| 536 | rtnl_lock(); | ||
| 537 | dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); | ||
| 538 | if (unlikely(!dev)) { | ||
| 539 | rtnl_unlock(); | ||
| 540 | return DEFAULT_PRB_RETIRE_TOV; | ||
| 541 | } | ||
| 542 | err = __ethtool_get_settings(dev, &ecmd); | ||
| 543 | rtnl_unlock(); | ||
| 544 | if (!err) { | ||
| 545 | switch (ecmd.speed) { | ||
| 546 | case SPEED_10000: | ||
| 547 | msec = 1; | ||
| 548 | div = 10000/1000; | ||
| 549 | break; | ||
| 550 | case SPEED_1000: | ||
| 551 | msec = 1; | ||
| 552 | div = 1000/1000; | ||
| 553 | break; | ||
| 554 | /* | ||
| 555 | * If the link speed is so slow you don't really | ||
| 556 | * need to worry about perf anyways | ||
| 557 | */ | ||
| 558 | case SPEED_100: | ||
| 559 | case SPEED_10: | ||
| 560 | default: | ||
| 561 | return DEFAULT_PRB_RETIRE_TOV; | ||
| 562 | } | ||
| 563 | } | ||
| 564 | |||
| 565 | mbits = (blk_size_in_bytes * 8) / (1024 * 1024); | ||
| 566 | |||
| 567 | if (div) | ||
| 568 | mbits /= div; | ||
| 569 | |||
| 570 | tmo = mbits * msec; | ||
| 571 | |||
| 572 | if (div) | ||
| 573 | return tmo+1; | ||
| 574 | return tmo; | ||
| 575 | } | ||
| 576 | |||
| 577 | static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, | ||
| 578 | union tpacket_req_u *req_u) | ||
| 579 | { | ||
| 580 | p1->feature_req_word = req_u->req3.tp_feature_req_word; | ||
| 581 | } | ||
| 582 | |||
| 583 | static void init_prb_bdqc(struct packet_sock *po, | ||
| 584 | struct packet_ring_buffer *rb, | ||
| 585 | struct pgv *pg_vec, | ||
| 586 | union tpacket_req_u *req_u, int tx_ring) | ||
| 587 | { | ||
| 588 | struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; | ||
| 589 | struct tpacket_block_desc *pbd; | ||
| 590 | |||
| 591 | memset(p1, 0x0, sizeof(*p1)); | ||
| 592 | |||
| 593 | p1->knxt_seq_num = 1; | ||
| 594 | p1->pkbdq = pg_vec; | ||
| 595 | pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; | ||
| 596 | p1->pkblk_start = (char *)pg_vec[0].buffer; | ||
| 597 | p1->kblk_size = req_u->req3.tp_block_size; | ||
| 598 | p1->knum_blocks = req_u->req3.tp_block_nr; | ||
| 599 | p1->hdrlen = po->tp_hdrlen; | ||
| 600 | p1->version = po->tp_version; | ||
| 601 | p1->last_kactive_blk_num = 0; | ||
| 602 | po->stats_u.stats3.tp_freeze_q_cnt = 0; | ||
| 603 | if (req_u->req3.tp_retire_blk_tov) | ||
| 604 | p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; | ||
| 605 | else | ||
| 606 | p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, | ||
| 607 | req_u->req3.tp_block_size); | ||
| 608 | p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); | ||
| 609 | p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; | ||
| 610 | |||
| 611 | prb_init_ft_ops(p1, req_u); | ||
| 612 | prb_setup_retire_blk_timer(po, tx_ring); | ||
| 613 | prb_open_block(p1, pbd); | ||
| 614 | } | ||
| 615 | |||
| 616 | /* Do NOT update the last_blk_num first. | ||
| 617 | * Assumes sk_buff_head lock is held. | ||
| 618 | */ | ||
| 619 | static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) | ||
| 620 | { | ||
| 621 | mod_timer(&pkc->retire_blk_timer, | ||
| 622 | jiffies + pkc->tov_in_jiffies); | ||
| 623 | pkc->last_kactive_blk_num = pkc->kactive_blk_num; | ||
| 624 | } | ||
| 625 | |||
| 626 | /* | ||
| 627 | * Timer logic: | ||
| 628 | * 1) We refresh the timer only when we open a block. | ||
| 629 | * By doing this we don't waste cycles refreshing the timer | ||
| 630 | * on packet-by-packet basis. | ||
| 631 | * | ||
| 632 | * With a 1MB block-size, on a 1Gbps line, it will take | ||
| 633 | * i) ~8 ms to fill a block + ii) memcpy etc. | ||
| 634 | * In this cut we are not accounting for the memcpy time. | ||
| 635 | * | ||
| 636 | * So, if the user sets the 'tmo' to 10ms then the timer | ||
| 637 | * will never fire while the block is still getting filled | ||
| 638 | * (which is what we want). However, the user could choose | ||
| 639 | * to close a block early and that's fine. | ||
| 640 | * | ||
| 641 | * But when the timer does fire, we check whether or not to refresh it. | ||
| 642 | * Since the tmo granularity is in msecs, it is not too expensive | ||
| 643 | * to refresh the timer, lets say every '8' msecs. | ||
| 644 | * Either the user can set the 'tmo' or we can derive it based on | ||
| 645 | * a) line-speed and b) block-size. | ||
| 646 | * prb_calc_retire_blk_tmo() calculates the tmo. | ||
| 647 | * | ||
| 648 | */ | ||
| 649 | static void prb_retire_rx_blk_timer_expired(unsigned long data) | ||
| 650 | { | ||
| 651 | struct packet_sock *po = (struct packet_sock *)data; | ||
| 652 | struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; | ||
| 653 | unsigned int frozen; | ||
| 654 | struct tpacket_block_desc *pbd; | ||
| 655 | |||
| 656 | spin_lock(&po->sk.sk_receive_queue.lock); | ||
| 657 | |||
| 658 | frozen = prb_queue_frozen(pkc); | ||
| 659 | pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); | ||
| 660 | |||
| 661 | if (unlikely(pkc->delete_blk_timer)) | ||
| 662 | goto out; | ||
| 663 | |||
| 664 | /* We only need to plug the race when the block is partially filled. | ||
| 665 | * tpacket_rcv: | ||
| 666 | * lock(); increment BLOCK_NUM_PKTS; unlock() | ||
| 667 | * copy_bits() is in progress ... | ||
| 668 | * timer fires on other cpu: | ||
| 669 | * we can't retire the current block because copy_bits | ||
| 670 | * is in progress. | ||
| 671 | * | ||
| 672 | */ | ||
| 673 | if (BLOCK_NUM_PKTS(pbd)) { | ||
| 674 | while (atomic_read(&pkc->blk_fill_in_prog)) { | ||
| 675 | /* Waiting for skb_copy_bits to finish... */ | ||
| 676 | cpu_relax(); | ||
| 677 | } | ||
| 678 | } | ||
| 679 | |||
| 680 | if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { | ||
| 681 | if (!frozen) { | ||
| 682 | prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); | ||
| 683 | if (!prb_dispatch_next_block(pkc, po)) | ||
| 684 | goto refresh_timer; | ||
| 685 | else | ||
| 686 | goto out; | ||
| 687 | } else { | ||
| 688 | /* Case 1. Queue was frozen because user-space was | ||
| 689 | * lagging behind. | ||
| 690 | */ | ||
| 691 | if (prb_curr_blk_in_use(pkc, pbd)) { | ||
| 692 | /* | ||
| 693 | * Ok, user-space is still behind. | ||
| 694 | * So just refresh the timer. | ||
| 695 | */ | ||
| 696 | goto refresh_timer; | ||
| 697 | } else { | ||
| 698 | /* Case 2. queue was frozen,user-space caught up, | ||
| 699 | * now the link went idle && the timer fired. | ||
| 700 | * We don't have a block to close.So we open this | ||
| 701 | * block and restart the timer. | ||
| 702 | * opening a block thaws the queue,restarts timer | ||
| 703 | * Thawing/timer-refresh is a side effect. | ||
| 704 | */ | ||
| 705 | prb_open_block(pkc, pbd); | ||
| 706 | goto out; | ||
| 707 | } | ||
| 708 | } | ||
| 709 | } | ||
| 710 | |||
| 711 | refresh_timer: | ||
| 712 | _prb_refresh_rx_retire_blk_timer(pkc); | ||
| 713 | |||
| 714 | out: | ||
| 715 | spin_unlock(&po->sk.sk_receive_queue.lock); | ||
| 716 | } | ||
| 717 | |||
| 718 | static void prb_flush_block(struct tpacket_kbdq_core *pkc1, | ||
| 719 | struct tpacket_block_desc *pbd1, __u32 status) | ||
| 720 | { | ||
| 721 | /* Flush everything minus the block header */ | ||
| 722 | |||
| 723 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
| 724 | u8 *start, *end; | ||
| 725 | |||
| 726 | start = (u8 *)pbd1; | ||
| 727 | |||
| 728 | /* Skip the block header(we know header WILL fit in 4K) */ | ||
| 729 | start += PAGE_SIZE; | ||
| 730 | |||
| 731 | end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); | ||
| 732 | for (; start < end; start += PAGE_SIZE) | ||
| 733 | flush_dcache_page(pgv_to_page(start)); | ||
| 734 | |||
| 735 | smp_wmb(); | ||
| 736 | #endif | ||
| 737 | |||
| 738 | /* Now update the block status. */ | ||
| 739 | |||
| 740 | BLOCK_STATUS(pbd1) = status; | ||
| 741 | |||
| 742 | /* Flush the block header */ | ||
| 743 | |||
| 744 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
| 745 | start = (u8 *)pbd1; | ||
| 746 | flush_dcache_page(pgv_to_page(start)); | ||
| 747 | |||
| 748 | smp_wmb(); | ||
| 749 | #endif | ||
| 750 | } | ||
| 751 | |||
| 752 | /* | ||
| 753 | * Side effect: | ||
| 754 | * | ||
| 755 | * 1) flush the block | ||
| 756 | * 2) Increment active_blk_num | ||
| 757 | * | ||
| 758 | * Note:We DONT refresh the timer on purpose. | ||
| 759 | * Because almost always the next block will be opened. | ||
| 760 | */ | ||
| 761 | static void prb_close_block(struct tpacket_kbdq_core *pkc1, | ||
| 762 | struct tpacket_block_desc *pbd1, | ||
| 763 | struct packet_sock *po, unsigned int stat) | ||
| 764 | { | ||
| 765 | __u32 status = TP_STATUS_USER | stat; | ||
| 766 | |||
| 767 | struct tpacket3_hdr *last_pkt; | ||
| 768 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; | ||
| 769 | |||
| 770 | if (po->stats.tp_drops) | ||
| 771 | status |= TP_STATUS_LOSING; | ||
| 772 | |||
| 773 | last_pkt = (struct tpacket3_hdr *)pkc1->prev; | ||
| 774 | last_pkt->tp_next_offset = 0; | ||
| 775 | |||
| 776 | /* Get the ts of the last pkt */ | ||
| 777 | if (BLOCK_NUM_PKTS(pbd1)) { | ||
| 778 | h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; | ||
| 779 | h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; | ||
| 780 | } else { | ||
| 781 | /* Ok, we tmo'd - so get the current time */ | ||
| 782 | struct timespec ts; | ||
| 783 | getnstimeofday(&ts); | ||
| 784 | h1->ts_last_pkt.ts_sec = ts.tv_sec; | ||
| 785 | h1->ts_last_pkt.ts_nsec = ts.tv_nsec; | ||
| 786 | } | ||
| 787 | |||
| 788 | smp_wmb(); | ||
| 789 | |||
| 790 | /* Flush the block */ | ||
| 791 | prb_flush_block(pkc1, pbd1, status); | ||
| 792 | |||
| 793 | pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); | ||
| 794 | } | ||
| 795 | |||
| 796 | static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) | ||
| 797 | { | ||
| 798 | pkc->reset_pending_on_curr_blk = 0; | ||
| 799 | } | ||
| 800 | |||
| 801 | /* | ||
| 802 | * Side effect of opening a block: | ||
| 803 | * | ||
| 804 | * 1) prb_queue is thawed. | ||
| 805 | * 2) retire_blk_timer is refreshed. | ||
| 806 | * | ||
| 807 | */ | ||
| 808 | static void prb_open_block(struct tpacket_kbdq_core *pkc1, | ||
| 809 | struct tpacket_block_desc *pbd1) | ||
| 810 | { | ||
| 811 | struct timespec ts; | ||
| 812 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; | ||
| 813 | |||
| 814 | smp_rmb(); | ||
| 815 | |||
| 816 | if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { | ||
| 817 | |||
| 818 | /* We could have just memset this but we will lose the | ||
| 819 | * flexibility of making the priv area sticky | ||
| 820 | */ | ||
| 821 | BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; | ||
| 822 | BLOCK_NUM_PKTS(pbd1) = 0; | ||
| 823 | BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); | ||
| 824 | getnstimeofday(&ts); | ||
| 825 | h1->ts_first_pkt.ts_sec = ts.tv_sec; | ||
| 826 | h1->ts_first_pkt.ts_nsec = ts.tv_nsec; | ||
| 827 | pkc1->pkblk_start = (char *)pbd1; | ||
| 828 | pkc1->nxt_offset = (char *)(pkc1->pkblk_start + | ||
| 829 | BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); | ||
| 830 | BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); | ||
| 831 | BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; | ||
| 832 | pbd1->version = pkc1->version; | ||
| 833 | pkc1->prev = pkc1->nxt_offset; | ||
| 834 | pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; | ||
| 835 | prb_thaw_queue(pkc1); | ||
| 836 | _prb_refresh_rx_retire_blk_timer(pkc1); | ||
| 837 | |||
| 838 | smp_wmb(); | ||
| 839 | |||
| 840 | return; | ||
| 841 | } | ||
| 842 | |||
| 843 | WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", | ||
| 844 | pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); | ||
| 845 | dump_stack(); | ||
| 846 | BUG(); | ||
| 847 | } | ||
| 848 | |||
| 849 | /* | ||
| 850 | * Queue freeze logic: | ||
| 851 | * 1) Assume tp_block_nr = 8 blocks. | ||
| 852 | * 2) At time 't0', user opens Rx ring. | ||
| 853 | * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 | ||
| 854 | * 4) user-space is either sleeping or processing block '0'. | ||
| 855 | * 5) tpacket_rcv is currently filling block '7', since there is no space left, | ||
| 856 | * it will close block-7,loop around and try to fill block '0'. | ||
| 857 | * call-flow: | ||
| 858 | * __packet_lookup_frame_in_block | ||
| 859 | * prb_retire_current_block() | ||
| 860 | * prb_dispatch_next_block() | ||
| 861 | * |->(BLOCK_STATUS == USER) evaluates to true | ||
| 862 | * 5.1) Since block-0 is currently in-use, we just freeze the queue. | ||
| 863 | * 6) Now there are two cases: | ||
| 864 | * 6.1) Link goes idle right after the queue is frozen. | ||
| 865 | * But remember, the last open_block() refreshed the timer. | ||
| 866 | * When this timer expires,it will refresh itself so that we can | ||
| 867 | * re-open block-0 in near future. | ||
| 868 | * 6.2) Link is busy and keeps on receiving packets. This is a simple | ||
| 869 | * case and __packet_lookup_frame_in_block will check if block-0 | ||
| 870 | * is free and can now be re-used. | ||
| 871 | */ | ||
| 872 | static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, | ||
| 873 | struct packet_sock *po) | ||
| 874 | { | ||
| 875 | pkc->reset_pending_on_curr_blk = 1; | ||
| 876 | po->stats_u.stats3.tp_freeze_q_cnt++; | ||
| 877 | } | ||
| 878 | |||
| 879 | #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) | ||
| 880 | |||
| 881 | /* | ||
| 882 | * If the next block is free then we will dispatch it | ||
| 883 | * and return a good offset. | ||
| 884 | * Else, we will freeze the queue. | ||
| 885 | * So, caller must check the return value. | ||
| 886 | */ | ||
| 887 | static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, | ||
| 888 | struct packet_sock *po) | ||
| 889 | { | ||
| 890 | struct tpacket_block_desc *pbd; | ||
| 891 | |||
| 892 | smp_rmb(); | ||
| 893 | |||
| 894 | /* 1. Get current block num */ | ||
| 895 | pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); | ||
| 896 | |||
| 897 | /* 2. If this block is currently in_use then freeze the queue */ | ||
| 898 | if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { | ||
| 899 | prb_freeze_queue(pkc, po); | ||
| 900 | return NULL; | ||
| 901 | } | ||
| 902 | |||
| 903 | /* | ||
| 904 | * 3. | ||
| 905 | * open this block and return the offset where the first packet | ||
| 906 | * needs to get stored. | ||
| 907 | */ | ||
| 908 | prb_open_block(pkc, pbd); | ||
| 909 | return (void *)pkc->nxt_offset; | ||
| 910 | } | ||
| 911 | |||
| 912 | static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, | ||
| 913 | struct packet_sock *po, unsigned int status) | ||
| 914 | { | ||
| 915 | struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); | ||
| 916 | |||
| 917 | /* retire/close the current block */ | ||
| 918 | if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { | ||
| 919 | /* | ||
| 920 | * Plug the case where copy_bits() is in progress on | ||
| 921 | * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't | ||
| 922 | * have space to copy the pkt in the current block and | ||
| 923 | * called prb_retire_current_block() | ||
| 924 | * | ||
| 925 | * We don't need to worry about the TMO case because | ||
| 926 | * the timer-handler already handled this case. | ||
| 927 | */ | ||
| 928 | if (!(status & TP_STATUS_BLK_TMO)) { | ||
| 929 | while (atomic_read(&pkc->blk_fill_in_prog)) { | ||
| 930 | /* Waiting for skb_copy_bits to finish... */ | ||
| 931 | cpu_relax(); | ||
| 932 | } | ||
| 933 | } | ||
| 934 | prb_close_block(pkc, pbd, po, status); | ||
| 935 | return; | ||
| 936 | } | ||
| 937 | |||
| 938 | WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); | ||
| 939 | dump_stack(); | ||
| 940 | BUG(); | ||
| 941 | } | ||
| 942 | |||
| 943 | static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, | ||
| 944 | struct tpacket_block_desc *pbd) | ||
| 945 | { | ||
| 946 | return TP_STATUS_USER & BLOCK_STATUS(pbd); | ||
| 947 | } | ||
| 948 | |||
| 949 | static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) | ||
| 950 | { | ||
| 951 | return pkc->reset_pending_on_curr_blk; | ||
| 952 | } | ||
| 953 | |||
| 954 | static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) | ||
| 955 | { | ||
| 956 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); | ||
| 957 | atomic_dec(&pkc->blk_fill_in_prog); | ||
| 958 | } | ||
| 959 | |||
| 960 | static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, | ||
| 961 | struct tpacket3_hdr *ppd) | ||
| 962 | { | ||
| 963 | ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); | ||
| 964 | } | ||
| 965 | |||
| 966 | static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, | ||
| 967 | struct tpacket3_hdr *ppd) | ||
| 968 | { | ||
| 969 | ppd->hv1.tp_rxhash = 0; | ||
| 970 | } | ||
| 971 | |||
| 972 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, | ||
| 973 | struct tpacket3_hdr *ppd) | ||
| 974 | { | ||
| 975 | if (vlan_tx_tag_present(pkc->skb)) { | ||
| 976 | ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); | ||
| 977 | ppd->tp_status = TP_STATUS_VLAN_VALID; | ||
| 978 | } else { | ||
| 979 | ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, | ||
| 984 | struct tpacket3_hdr *ppd) | ||
| 985 | { | ||
| 986 | prb_fill_vlan_info(pkc, ppd); | ||
| 987 | |||
| 988 | if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) | ||
| 989 | prb_fill_rxhash(pkc, ppd); | ||
| 990 | else | ||
| 991 | prb_clear_rxhash(pkc, ppd); | ||
| 992 | } | ||
| 993 | |||
| 994 | static void prb_fill_curr_block(char *curr, | ||
| 995 | struct tpacket_kbdq_core *pkc, | ||
| 996 | struct tpacket_block_desc *pbd, | ||
| 997 | unsigned int len) | ||
| 998 | { | ||
| 999 | struct tpacket3_hdr *ppd; | ||
| 1000 | |||
| 1001 | ppd = (struct tpacket3_hdr *)curr; | ||
| 1002 | ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); | ||
| 1003 | pkc->prev = curr; | ||
| 1004 | pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); | ||
| 1005 | BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); | ||
| 1006 | BLOCK_NUM_PKTS(pbd) += 1; | ||
| 1007 | atomic_inc(&pkc->blk_fill_in_prog); | ||
| 1008 | prb_run_all_ft_ops(pkc, ppd); | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | /* Assumes caller has the sk->rx_queue.lock */ | ||
| 1012 | static void *__packet_lookup_frame_in_block(struct packet_sock *po, | ||
| 1013 | struct sk_buff *skb, | ||
| 1014 | int status, | ||
| 1015 | unsigned int len | ||
| 1016 | ) | ||
| 1017 | { | ||
| 1018 | struct tpacket_kbdq_core *pkc; | ||
| 1019 | struct tpacket_block_desc *pbd; | ||
| 1020 | char *curr, *end; | ||
| 1021 | |||
| 1022 | pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring)); | ||
| 1023 | pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); | ||
| 1024 | |||
| 1025 | /* Queue is frozen when user space is lagging behind */ | ||
| 1026 | if (prb_queue_frozen(pkc)) { | ||
| 1027 | /* | ||
| 1028 | * Check if that last block which caused the queue to freeze, | ||
| 1029 | * is still in_use by user-space. | ||
| 1030 | */ | ||
| 1031 | if (prb_curr_blk_in_use(pkc, pbd)) { | ||
| 1032 | /* Can't record this packet */ | ||
| 1033 | return NULL; | ||
| 1034 | } else { | ||
| 1035 | /* | ||
| 1036 | * Ok, the block was released by user-space. | ||
| 1037 | * Now let's open that block. | ||
| 1038 | * opening a block also thaws the queue. | ||
| 1039 | * Thawing is a side effect. | ||
| 1040 | */ | ||
| 1041 | prb_open_block(pkc, pbd); | ||
| 1042 | } | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | smp_mb(); | ||
| 1046 | curr = pkc->nxt_offset; | ||
| 1047 | pkc->skb = skb; | ||
| 1048 | end = (char *) ((char *)pbd + pkc->kblk_size); | ||
| 1049 | |||
| 1050 | /* first try the current block */ | ||
| 1051 | if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { | ||
| 1052 | prb_fill_curr_block(curr, pkc, pbd, len); | ||
| 1053 | return (void *)curr; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | /* Ok, close the current block */ | ||
| 1057 | prb_retire_current_block(pkc, po, 0); | ||
| 1058 | |||
| 1059 | /* Now, try to dispatch the next block */ | ||
| 1060 | curr = (char *)prb_dispatch_next_block(pkc, po); | ||
| 1061 | if (curr) { | ||
| 1062 | pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); | ||
| 1063 | prb_fill_curr_block(curr, pkc, pbd, len); | ||
| 1064 | return (void *)curr; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | /* | ||
| 1068 | * No free blocks are available.user_space hasn't caught up yet. | ||
| 1069 | * Queue was just frozen and now this packet will get dropped. | ||
| 1070 | */ | ||
| 1071 | return NULL; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | static void *packet_current_rx_frame(struct packet_sock *po, | ||
| 1075 | struct sk_buff *skb, | ||
| 1076 | int status, unsigned int len) | ||
| 1077 | { | ||
| 1078 | char *curr = NULL; | ||
| 1079 | switch (po->tp_version) { | ||
| 1080 | case TPACKET_V1: | ||
| 1081 | case TPACKET_V2: | ||
| 1082 | curr = packet_lookup_frame(po, &po->rx_ring, | ||
| 1083 | po->rx_ring.head, status); | ||
| 1084 | return curr; | ||
| 1085 | case TPACKET_V3: | ||
| 1086 | return __packet_lookup_frame_in_block(po, skb, status, len); | ||
| 1087 | default: | ||
| 1088 | WARN(1, "TPACKET version not supported\n"); | ||
| 1089 | BUG(); | ||
| 1090 | return 0; | ||
| 1091 | } | ||
| 1092 | } | ||
| 1093 | |||
| 1094 | static void *prb_lookup_block(struct packet_sock *po, | ||
| 1095 | struct packet_ring_buffer *rb, | ||
| 1096 | unsigned int previous, | ||
| 1097 | int status) | ||
| 1098 | { | ||
| 1099 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); | ||
| 1100 | struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); | ||
| 1101 | |||
| 1102 | if (status != BLOCK_STATUS(pbd)) | ||
| 1103 | return NULL; | ||
| 1104 | return pbd; | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | static int prb_previous_blk_num(struct packet_ring_buffer *rb) | ||
| 1108 | { | ||
| 1109 | unsigned int prev; | ||
| 1110 | if (rb->prb_bdqc.kactive_blk_num) | ||
| 1111 | prev = rb->prb_bdqc.kactive_blk_num-1; | ||
| 1112 | else | ||
| 1113 | prev = rb->prb_bdqc.knum_blocks-1; | ||
| 1114 | return prev; | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | /* Assumes caller has held the rx_queue.lock */ | ||
| 1118 | static void *__prb_previous_block(struct packet_sock *po, | ||
| 1119 | struct packet_ring_buffer *rb, | ||
| 1120 | int status) | ||
| 1121 | { | ||
| 1122 | unsigned int previous = prb_previous_blk_num(rb); | ||
| 1123 | return prb_lookup_block(po, rb, previous, status); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | static void *packet_previous_rx_frame(struct packet_sock *po, | ||
| 1127 | struct packet_ring_buffer *rb, | ||
| 1128 | int status) | ||
| 1129 | { | ||
| 1130 | if (po->tp_version <= TPACKET_V2) | ||
| 1131 | return packet_previous_frame(po, rb, status); | ||
| 1132 | |||
| 1133 | return __prb_previous_block(po, rb, status); | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | static void packet_increment_rx_head(struct packet_sock *po, | ||
| 1137 | struct packet_ring_buffer *rb) | ||
| 1138 | { | ||
| 1139 | switch (po->tp_version) { | ||
| 1140 | case TPACKET_V1: | ||
| 1141 | case TPACKET_V2: | ||
| 1142 | return packet_increment_head(rb); | ||
| 1143 | case TPACKET_V3: | ||
| 1144 | default: | ||
| 1145 | WARN(1, "TPACKET version not supported.\n"); | ||
| 1146 | BUG(); | ||
| 1147 | return; | ||
| 1148 | } | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | static void *packet_previous_frame(struct packet_sock *po, | ||
| 393 | struct packet_ring_buffer *rb, | 1152 | struct packet_ring_buffer *rb, |
| 394 | int status) | 1153 | int status) |
| 395 | { | 1154 | { |
| @@ -397,7 +1156,7 @@ static inline void *packet_previous_frame(struct packet_sock *po, | |||
| 397 | return packet_lookup_frame(po, rb, previous, status); | 1156 | return packet_lookup_frame(po, rb, previous, status); |
| 398 | } | 1157 | } |
| 399 | 1158 | ||
| 400 | static inline void packet_increment_head(struct packet_ring_buffer *buff) | 1159 | static void packet_increment_head(struct packet_ring_buffer *buff) |
| 401 | { | 1160 | { |
| 402 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; | 1161 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; |
| 403 | } | 1162 | } |
| @@ -454,43 +1213,6 @@ static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *sk | |||
| 454 | return f->arr[cpu % num]; | 1213 | return f->arr[cpu % num]; |
| 455 | } | 1214 | } |
| 456 | 1215 | ||
| 457 | static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) | ||
| 458 | { | ||
| 459 | #ifdef CONFIG_INET | ||
| 460 | const struct iphdr *iph; | ||
| 461 | u32 len; | ||
| 462 | |||
| 463 | if (skb->protocol != htons(ETH_P_IP)) | ||
| 464 | return skb; | ||
| 465 | |||
| 466 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | ||
| 467 | return skb; | ||
| 468 | |||
| 469 | iph = ip_hdr(skb); | ||
| 470 | if (iph->ihl < 5 || iph->version != 4) | ||
| 471 | return skb; | ||
| 472 | if (!pskb_may_pull(skb, iph->ihl*4)) | ||
| 473 | return skb; | ||
| 474 | iph = ip_hdr(skb); | ||
| 475 | len = ntohs(iph->tot_len); | ||
| 476 | if (skb->len < len || len < (iph->ihl * 4)) | ||
| 477 | return skb; | ||
| 478 | |||
| 479 | if (ip_is_fragment(ip_hdr(skb))) { | ||
| 480 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
| 481 | if (skb) { | ||
| 482 | if (pskb_trim_rcsum(skb, len)) | ||
| 483 | return skb; | ||
| 484 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | ||
| 485 | if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) | ||
| 486 | return NULL; | ||
| 487 | skb->rxhash = 0; | ||
| 488 | } | ||
| 489 | } | ||
| 490 | #endif | ||
| 491 | return skb; | ||
| 492 | } | ||
| 493 | |||
| 494 | static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, | 1216 | static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, |
| 495 | struct packet_type *pt, struct net_device *orig_dev) | 1217 | struct packet_type *pt, struct net_device *orig_dev) |
| 496 | { | 1218 | { |
| @@ -509,7 +1231,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, | |||
| 509 | case PACKET_FANOUT_HASH: | 1231 | case PACKET_FANOUT_HASH: |
| 510 | default: | 1232 | default: |
| 511 | if (f->defrag) { | 1233 | if (f->defrag) { |
| 512 | skb = fanout_check_defrag(skb); | 1234 | skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); |
| 513 | if (!skb) | 1235 | if (!skb) |
| 514 | return 0; | 1236 | return 0; |
| 515 | } | 1237 | } |
| @@ -836,7 +1558,7 @@ out_free: | |||
| 836 | return err; | 1558 | return err; |
| 837 | } | 1559 | } |
| 838 | 1560 | ||
| 839 | static inline unsigned int run_filter(const struct sk_buff *skb, | 1561 | static unsigned int run_filter(const struct sk_buff *skb, |
| 840 | const struct sock *sk, | 1562 | const struct sock *sk, |
| 841 | unsigned int res) | 1563 | unsigned int res) |
| 842 | { | 1564 | { |
| @@ -961,7 +1683,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 961 | return 0; | 1683 | return 0; |
| 962 | 1684 | ||
| 963 | drop_n_acct: | 1685 | drop_n_acct: |
| 964 | po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); | 1686 | spin_lock(&sk->sk_receive_queue.lock); |
| 1687 | po->stats.tp_drops++; | ||
| 1688 | atomic_inc(&sk->sk_drops); | ||
| 1689 | spin_unlock(&sk->sk_receive_queue.lock); | ||
| 965 | 1690 | ||
| 966 | drop_n_restore: | 1691 | drop_n_restore: |
| 967 | if (skb_head != skb->data && skb_shared(skb)) { | 1692 | if (skb_head != skb->data && skb_shared(skb)) { |
| @@ -982,12 +1707,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 982 | union { | 1707 | union { |
| 983 | struct tpacket_hdr *h1; | 1708 | struct tpacket_hdr *h1; |
| 984 | struct tpacket2_hdr *h2; | 1709 | struct tpacket2_hdr *h2; |
| 1710 | struct tpacket3_hdr *h3; | ||
| 985 | void *raw; | 1711 | void *raw; |
| 986 | } h; | 1712 | } h; |
| 987 | u8 *skb_head = skb->data; | 1713 | u8 *skb_head = skb->data; |
| 988 | int skb_len = skb->len; | 1714 | int skb_len = skb->len; |
| 989 | unsigned int snaplen, res; | 1715 | unsigned int snaplen, res; |
| 990 | unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; | 1716 | unsigned long status = TP_STATUS_USER; |
| 991 | unsigned short macoff, netoff, hdrlen; | 1717 | unsigned short macoff, netoff, hdrlen; |
| 992 | struct sk_buff *copy_skb = NULL; | 1718 | struct sk_buff *copy_skb = NULL; |
| 993 | struct timeval tv; | 1719 | struct timeval tv; |
| @@ -1033,37 +1759,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 1033 | po->tp_reserve; | 1759 | po->tp_reserve; |
| 1034 | macoff = netoff - maclen; | 1760 | macoff = netoff - maclen; |
| 1035 | } | 1761 | } |
| 1036 | 1762 | if (po->tp_version <= TPACKET_V2) { | |
| 1037 | if (macoff + snaplen > po->rx_ring.frame_size) { | 1763 | if (macoff + snaplen > po->rx_ring.frame_size) { |
| 1038 | if (po->copy_thresh && | 1764 | if (po->copy_thresh && |
| 1039 | atomic_read(&sk->sk_rmem_alloc) + skb->truesize < | 1765 | atomic_read(&sk->sk_rmem_alloc) + skb->truesize |
| 1040 | (unsigned)sk->sk_rcvbuf) { | 1766 | < (unsigned)sk->sk_rcvbuf) { |
| 1041 | if (skb_shared(skb)) { | 1767 | if (skb_shared(skb)) { |
| 1042 | copy_skb = skb_clone(skb, GFP_ATOMIC); | 1768 | copy_skb = skb_clone(skb, GFP_ATOMIC); |
| 1043 | } else { | 1769 | } else { |
| 1044 | copy_skb = skb_get(skb); | 1770 | copy_skb = skb_get(skb); |
| 1045 | skb_head = skb->data; | 1771 | skb_head = skb->data; |
| 1772 | } | ||
| 1773 | if (copy_skb) | ||
| 1774 | skb_set_owner_r(copy_skb, sk); | ||
| 1046 | } | 1775 | } |
| 1047 | if (copy_skb) | 1776 | snaplen = po->rx_ring.frame_size - macoff; |
| 1048 | skb_set_owner_r(copy_skb, sk); | 1777 | if ((int)snaplen < 0) |
| 1778 | snaplen = 0; | ||
| 1049 | } | 1779 | } |
| 1050 | snaplen = po->rx_ring.frame_size - macoff; | ||
| 1051 | if ((int)snaplen < 0) | ||
| 1052 | snaplen = 0; | ||
| 1053 | } | 1780 | } |
| 1054 | |||
| 1055 | spin_lock(&sk->sk_receive_queue.lock); | 1781 | spin_lock(&sk->sk_receive_queue.lock); |
| 1056 | h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); | 1782 | h.raw = packet_current_rx_frame(po, skb, |
| 1783 | TP_STATUS_KERNEL, (macoff+snaplen)); | ||
| 1057 | if (!h.raw) | 1784 | if (!h.raw) |
| 1058 | goto ring_is_full; | 1785 | goto ring_is_full; |
| 1059 | packet_increment_head(&po->rx_ring); | 1786 | if (po->tp_version <= TPACKET_V2) { |
| 1787 | packet_increment_rx_head(po, &po->rx_ring); | ||
| 1788 | /* | ||
| 1789 | * LOSING will be reported till you read the stats, | ||
| 1790 | * because it's COR - Clear On Read. | ||
| 1791 | * Anyways, moving it for V1/V2 only as V3 doesn't need this | ||
| 1792 | * at packet level. | ||
| 1793 | */ | ||
| 1794 | if (po->stats.tp_drops) | ||
| 1795 | status |= TP_STATUS_LOSING; | ||
| 1796 | } | ||
| 1060 | po->stats.tp_packets++; | 1797 | po->stats.tp_packets++; |
| 1061 | if (copy_skb) { | 1798 | if (copy_skb) { |
| 1062 | status |= TP_STATUS_COPY; | 1799 | status |= TP_STATUS_COPY; |
| 1063 | __skb_queue_tail(&sk->sk_receive_queue, copy_skb); | 1800 | __skb_queue_tail(&sk->sk_receive_queue, copy_skb); |
| 1064 | } | 1801 | } |
| 1065 | if (!po->stats.tp_drops) | ||
| 1066 | status &= ~TP_STATUS_LOSING; | ||
| 1067 | spin_unlock(&sk->sk_receive_queue.lock); | 1802 | spin_unlock(&sk->sk_receive_queue.lock); |
| 1068 | 1803 | ||
| 1069 | skb_copy_bits(skb, 0, h.raw + macoff, snaplen); | 1804 | skb_copy_bits(skb, 0, h.raw + macoff, snaplen); |
| @@ -1114,6 +1849,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 1114 | h.h2->tp_padding = 0; | 1849 | h.h2->tp_padding = 0; |
| 1115 | hdrlen = sizeof(*h.h2); | 1850 | hdrlen = sizeof(*h.h2); |
| 1116 | break; | 1851 | break; |
| 1852 | case TPACKET_V3: | ||
| 1853 | /* tp_nxt_offset,vlan are already populated above. | ||
| 1854 | * So DONT clear those fields here | ||
| 1855 | */ | ||
| 1856 | h.h3->tp_status |= status; | ||
| 1857 | h.h3->tp_len = skb->len; | ||
| 1858 | h.h3->tp_snaplen = snaplen; | ||
| 1859 | h.h3->tp_mac = macoff; | ||
| 1860 | h.h3->tp_net = netoff; | ||
| 1861 | if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) | ||
| 1862 | && shhwtstamps->syststamp.tv64) | ||
| 1863 | ts = ktime_to_timespec(shhwtstamps->syststamp); | ||
| 1864 | else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) | ||
| 1865 | && shhwtstamps->hwtstamp.tv64) | ||
| 1866 | ts = ktime_to_timespec(shhwtstamps->hwtstamp); | ||
| 1867 | else if (skb->tstamp.tv64) | ||
| 1868 | ts = ktime_to_timespec(skb->tstamp); | ||
| 1869 | else | ||
| 1870 | getnstimeofday(&ts); | ||
| 1871 | h.h3->tp_sec = ts.tv_sec; | ||
| 1872 | h.h3->tp_nsec = ts.tv_nsec; | ||
| 1873 | hdrlen = sizeof(*h.h3); | ||
| 1874 | break; | ||
| 1117 | default: | 1875 | default: |
| 1118 | BUG(); | 1876 | BUG(); |
| 1119 | } | 1877 | } |
| @@ -1134,13 +1892,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 1134 | { | 1892 | { |
| 1135 | u8 *start, *end; | 1893 | u8 *start, *end; |
| 1136 | 1894 | ||
| 1137 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); | 1895 | if (po->tp_version <= TPACKET_V2) { |
| 1138 | for (start = h.raw; start < end; start += PAGE_SIZE) | 1896 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw |
| 1139 | flush_dcache_page(pgv_to_page(start)); | 1897 | + macoff + snaplen); |
| 1898 | for (start = h.raw; start < end; start += PAGE_SIZE) | ||
| 1899 | flush_dcache_page(pgv_to_page(start)); | ||
| 1900 | } | ||
| 1140 | smp_wmb(); | 1901 | smp_wmb(); |
| 1141 | } | 1902 | } |
| 1142 | #endif | 1903 | #endif |
| 1143 | __packet_set_status(po, h.raw, status); | 1904 | if (po->tp_version <= TPACKET_V2) |
| 1905 | __packet_set_status(po, h.raw, status); | ||
| 1906 | else | ||
| 1907 | prb_clear_blk_fill_status(&po->rx_ring); | ||
| 1144 | 1908 | ||
| 1145 | sk->sk_data_ready(sk, 0); | 1909 | sk->sk_data_ready(sk, 0); |
| 1146 | 1910 | ||
| @@ -1167,8 +1931,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) | |||
| 1167 | struct packet_sock *po = pkt_sk(skb->sk); | 1931 | struct packet_sock *po = pkt_sk(skb->sk); |
| 1168 | void *ph; | 1932 | void *ph; |
| 1169 | 1933 | ||
| 1170 | BUG_ON(skb == NULL); | ||
| 1171 | |||
| 1172 | if (likely(po->tx_ring.pg_vec)) { | 1934 | if (likely(po->tx_ring.pg_vec)) { |
| 1173 | ph = skb_shinfo(skb)->destructor_arg; | 1935 | ph = skb_shinfo(skb)->destructor_arg; |
| 1174 | BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); | 1936 | BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); |
| @@ -1405,10 +2167,10 @@ out: | |||
| 1405 | return err; | 2167 | return err; |
| 1406 | } | 2168 | } |
| 1407 | 2169 | ||
| 1408 | static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, | 2170 | static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, |
| 1409 | size_t reserve, size_t len, | 2171 | size_t reserve, size_t len, |
| 1410 | size_t linear, int noblock, | 2172 | size_t linear, int noblock, |
| 1411 | int *err) | 2173 | int *err) |
| 1412 | { | 2174 | { |
| 1413 | struct sk_buff *skb; | 2175 | struct sk_buff *skb; |
| 1414 | 2176 | ||
| @@ -1631,7 +2393,7 @@ static int packet_release(struct socket *sock) | |||
| 1631 | struct sock *sk = sock->sk; | 2393 | struct sock *sk = sock->sk; |
| 1632 | struct packet_sock *po; | 2394 | struct packet_sock *po; |
| 1633 | struct net *net; | 2395 | struct net *net; |
| 1634 | struct tpacket_req req; | 2396 | union tpacket_req_u req_u; |
| 1635 | 2397 | ||
| 1636 | if (!sk) | 2398 | if (!sk) |
| 1637 | return 0; | 2399 | return 0; |
| @@ -1654,13 +2416,13 @@ static int packet_release(struct socket *sock) | |||
| 1654 | 2416 | ||
| 1655 | packet_flush_mclist(sk); | 2417 | packet_flush_mclist(sk); |
| 1656 | 2418 | ||
| 1657 | memset(&req, 0, sizeof(req)); | 2419 | memset(&req_u, 0, sizeof(req_u)); |
| 1658 | 2420 | ||
| 1659 | if (po->rx_ring.pg_vec) | 2421 | if (po->rx_ring.pg_vec) |
| 1660 | packet_set_ring(sk, &req, 1, 0); | 2422 | packet_set_ring(sk, &req_u, 1, 0); |
| 1661 | 2423 | ||
| 1662 | if (po->tx_ring.pg_vec) | 2424 | if (po->tx_ring.pg_vec) |
| 1663 | packet_set_ring(sk, &req, 1, 1); | 2425 | packet_set_ring(sk, &req_u, 1, 1); |
| 1664 | 2426 | ||
| 1665 | fanout_release(sk); | 2427 | fanout_release(sk); |
| 1666 | 2428 | ||
| @@ -2280,15 +3042,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
| 2280 | case PACKET_RX_RING: | 3042 | case PACKET_RX_RING: |
| 2281 | case PACKET_TX_RING: | 3043 | case PACKET_TX_RING: |
| 2282 | { | 3044 | { |
| 2283 | struct tpacket_req req; | 3045 | union tpacket_req_u req_u; |
| 3046 | int len; | ||
| 2284 | 3047 | ||
| 2285 | if (optlen < sizeof(req)) | 3048 | switch (po->tp_version) { |
| 3049 | case TPACKET_V1: | ||
| 3050 | case TPACKET_V2: | ||
| 3051 | len = sizeof(req_u.req); | ||
| 3052 | break; | ||
| 3053 | case TPACKET_V3: | ||
| 3054 | default: | ||
| 3055 | len = sizeof(req_u.req3); | ||
| 3056 | break; | ||
| 3057 | } | ||
| 3058 | if (optlen < len) | ||
| 2286 | return -EINVAL; | 3059 | return -EINVAL; |
| 2287 | if (pkt_sk(sk)->has_vnet_hdr) | 3060 | if (pkt_sk(sk)->has_vnet_hdr) |
| 2288 | return -EINVAL; | 3061 | return -EINVAL; |
| 2289 | if (copy_from_user(&req, optval, sizeof(req))) | 3062 | if (copy_from_user(&req_u.req, optval, len)) |
| 2290 | return -EFAULT; | 3063 | return -EFAULT; |
| 2291 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); | 3064 | return packet_set_ring(sk, &req_u, 0, |
| 3065 | optname == PACKET_TX_RING); | ||
| 2292 | } | 3066 | } |
| 2293 | case PACKET_COPY_THRESH: | 3067 | case PACKET_COPY_THRESH: |
| 2294 | { | 3068 | { |
| @@ -2315,6 +3089,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
| 2315 | switch (val) { | 3089 | switch (val) { |
| 2316 | case TPACKET_V1: | 3090 | case TPACKET_V1: |
| 2317 | case TPACKET_V2: | 3091 | case TPACKET_V2: |
| 3092 | case TPACKET_V3: | ||
| 2318 | po->tp_version = val; | 3093 | po->tp_version = val; |
| 2319 | return 0; | 3094 | return 0; |
| 2320 | default: | 3095 | default: |
| @@ -2424,6 +3199,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
| 2424 | struct packet_sock *po = pkt_sk(sk); | 3199 | struct packet_sock *po = pkt_sk(sk); |
| 2425 | void *data; | 3200 | void *data; |
| 2426 | struct tpacket_stats st; | 3201 | struct tpacket_stats st; |
| 3202 | union tpacket_stats_u st_u; | ||
| 2427 | 3203 | ||
| 2428 | if (level != SOL_PACKET) | 3204 | if (level != SOL_PACKET) |
| 2429 | return -ENOPROTOOPT; | 3205 | return -ENOPROTOOPT; |
| @@ -2436,15 +3212,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
| 2436 | 3212 | ||
| 2437 | switch (optname) { | 3213 | switch (optname) { |
| 2438 | case PACKET_STATISTICS: | 3214 | case PACKET_STATISTICS: |
| 2439 | if (len > sizeof(struct tpacket_stats)) | 3215 | if (po->tp_version == TPACKET_V3) { |
| 2440 | len = sizeof(struct tpacket_stats); | 3216 | len = sizeof(struct tpacket_stats_v3); |
| 3217 | } else { | ||
| 3218 | if (len > sizeof(struct tpacket_stats)) | ||
| 3219 | len = sizeof(struct tpacket_stats); | ||
| 3220 | } | ||
| 2441 | spin_lock_bh(&sk->sk_receive_queue.lock); | 3221 | spin_lock_bh(&sk->sk_receive_queue.lock); |
| 2442 | st = po->stats; | 3222 | if (po->tp_version == TPACKET_V3) { |
| 3223 | memcpy(&st_u.stats3, &po->stats, | ||
| 3224 | sizeof(struct tpacket_stats)); | ||
| 3225 | st_u.stats3.tp_freeze_q_cnt = | ||
| 3226 | po->stats_u.stats3.tp_freeze_q_cnt; | ||
| 3227 | st_u.stats3.tp_packets += po->stats.tp_drops; | ||
| 3228 | data = &st_u.stats3; | ||
| 3229 | } else { | ||
| 3230 | st = po->stats; | ||
| 3231 | st.tp_packets += st.tp_drops; | ||
| 3232 | data = &st; | ||
| 3233 | } | ||
| 2443 | memset(&po->stats, 0, sizeof(st)); | 3234 | memset(&po->stats, 0, sizeof(st)); |
| 2444 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 3235 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
| 2445 | st.tp_packets += st.tp_drops; | ||
| 2446 | |||
| 2447 | data = &st; | ||
| 2448 | break; | 3236 | break; |
| 2449 | case PACKET_AUXDATA: | 3237 | case PACKET_AUXDATA: |
| 2450 | if (len > sizeof(int)) | 3238 | if (len > sizeof(int)) |
| @@ -2485,6 +3273,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
| 2485 | case TPACKET_V2: | 3273 | case TPACKET_V2: |
| 2486 | val = sizeof(struct tpacket2_hdr); | 3274 | val = sizeof(struct tpacket2_hdr); |
| 2487 | break; | 3275 | break; |
| 3276 | case TPACKET_V3: | ||
| 3277 | val = sizeof(struct tpacket3_hdr); | ||
| 3278 | break; | ||
| 2488 | default: | 3279 | default: |
| 2489 | return -EINVAL; | 3280 | return -EINVAL; |
| 2490 | } | 3281 | } |
| @@ -2641,7 +3432,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock, | |||
| 2641 | 3432 | ||
| 2642 | spin_lock_bh(&sk->sk_receive_queue.lock); | 3433 | spin_lock_bh(&sk->sk_receive_queue.lock); |
| 2643 | if (po->rx_ring.pg_vec) { | 3434 | if (po->rx_ring.pg_vec) { |
| 2644 | if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) | 3435 | if (!packet_previous_rx_frame(po, &po->rx_ring, |
| 3436 | TP_STATUS_KERNEL)) | ||
| 2645 | mask |= POLLIN | POLLRDNORM; | 3437 | mask |= POLLIN | POLLRDNORM; |
| 2646 | } | 3438 | } |
| 2647 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 3439 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
| @@ -2702,7 +3494,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, | |||
| 2702 | kfree(pg_vec); | 3494 | kfree(pg_vec); |
| 2703 | } | 3495 | } |
| 2704 | 3496 | ||
| 2705 | static inline char *alloc_one_pg_vec_page(unsigned long order) | 3497 | static char *alloc_one_pg_vec_page(unsigned long order) |
| 2706 | { | 3498 | { |
| 2707 | char *buffer = NULL; | 3499 | char *buffer = NULL; |
| 2708 | gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | | 3500 | gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | |
| @@ -2760,7 +3552,7 @@ out_free_pgvec: | |||
| 2760 | goto out; | 3552 | goto out; |
| 2761 | } | 3553 | } |
| 2762 | 3554 | ||
| 2763 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 3555 | static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, |
| 2764 | int closing, int tx_ring) | 3556 | int closing, int tx_ring) |
| 2765 | { | 3557 | { |
| 2766 | struct pgv *pg_vec = NULL; | 3558 | struct pgv *pg_vec = NULL; |
| @@ -2769,7 +3561,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
| 2769 | struct packet_ring_buffer *rb; | 3561 | struct packet_ring_buffer *rb; |
| 2770 | struct sk_buff_head *rb_queue; | 3562 | struct sk_buff_head *rb_queue; |
| 2771 | __be16 num; | 3563 | __be16 num; |
| 2772 | int err; | 3564 | int err = -EINVAL; |
| 3565 | /* Added to avoid minimal code churn */ | ||
| 3566 | struct tpacket_req *req = &req_u->req; | ||
| 3567 | |||
| 3568 | /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ | ||
| 3569 | if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { | ||
| 3570 | WARN(1, "Tx-ring is not supported.\n"); | ||
| 3571 | goto out; | ||
| 3572 | } | ||
| 2773 | 3573 | ||
| 2774 | rb = tx_ring ? &po->tx_ring : &po->rx_ring; | 3574 | rb = tx_ring ? &po->tx_ring : &po->rx_ring; |
| 2775 | rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; | 3575 | rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; |
| @@ -2795,6 +3595,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
| 2795 | case TPACKET_V2: | 3595 | case TPACKET_V2: |
| 2796 | po->tp_hdrlen = TPACKET2_HDRLEN; | 3596 | po->tp_hdrlen = TPACKET2_HDRLEN; |
| 2797 | break; | 3597 | break; |
| 3598 | case TPACKET_V3: | ||
| 3599 | po->tp_hdrlen = TPACKET3_HDRLEN; | ||
| 3600 | break; | ||
| 2798 | } | 3601 | } |
| 2799 | 3602 | ||
| 2800 | err = -EINVAL; | 3603 | err = -EINVAL; |
| @@ -2820,6 +3623,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
| 2820 | pg_vec = alloc_pg_vec(req, order); | 3623 | pg_vec = alloc_pg_vec(req, order); |
| 2821 | if (unlikely(!pg_vec)) | 3624 | if (unlikely(!pg_vec)) |
| 2822 | goto out; | 3625 | goto out; |
| 3626 | switch (po->tp_version) { | ||
| 3627 | case TPACKET_V3: | ||
| 3628 | /* Transmit path is not supported. We checked | ||
| 3629 | * it above but just being paranoid | ||
| 3630 | */ | ||
| 3631 | if (!tx_ring) | ||
| 3632 | init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); | ||
| 3633 | break; | ||
| 3634 | default: | ||
| 3635 | break; | ||
| 3636 | } | ||
| 2823 | } | 3637 | } |
| 2824 | /* Done */ | 3638 | /* Done */ |
| 2825 | else { | 3639 | else { |
| @@ -2872,7 +3686,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
| 2872 | register_prot_hook(sk); | 3686 | register_prot_hook(sk); |
| 2873 | } | 3687 | } |
| 2874 | spin_unlock(&po->bind_lock); | 3688 | spin_unlock(&po->bind_lock); |
| 2875 | 3689 | if (closing && (po->tp_version > TPACKET_V2)) { | |
| 3690 | /* Because we don't support block-based V3 on tx-ring */ | ||
| 3691 | if (!tx_ring) | ||
| 3692 | prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); | ||
| 3693 | } | ||
| 2876 | release_sock(sk); | 3694 | release_sock(sk); |
| 2877 | 3695 | ||
| 2878 | if (pg_vec) | 3696 | if (pg_vec) |
