summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2016-05-02 12:20:15 -0400
committerDavid S. Miller <davem@davemloft.net>2016-05-03 00:33:14 -0400
commit6071bd1aa13ed9e41824bafad845b7b7f4df5cfd (patch)
tree0878bfd0cd038527e442d597ef4993111e631943 /net
parent9b40d5aaef542f5b2bcba41fb1e24cfcba793774 (diff)
netem: Segment GSO packets on enqueue
This was recently reported to me, and reproduced on the latest net kernel, when attempting to run netperf from a host that had a netem qdisc attached to the egress interface: [ 788.073771] ---------------------[ cut here ]--------------------------- [ 788.096716] WARNING: at net/core/dev.c:2253 skb_warn_bad_offload+0xcd/0xda() [ 788.129521] bnx2: caps=(0x00000001801949b3, 0x0000000000000000) len=2962 data_len=0 gso_size=1448 gso_type=1 ip_summed=3 [ 788.182150] Modules linked in: sch_netem kvm_amd kvm crc32_pclmul ipmi_ssif ghash_clmulni_intel sp5100_tco amd64_edac_mod aesni_intel lrw gf128mul glue_helper ablk_helper edac_mce_amd cryptd pcspkr sg edac_core hpilo ipmi_si i2c_piix4 k10temp fam15h_power hpwdt ipmi_msghandler shpchp acpi_power_meter pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ahci ata_generic pata_acpi ttm libahci crct10dif_pclmul pata_atiixp tg3 libata crct10dif_common drm crc32c_intel ptp serio_raw bnx2 r8169 hpsa pps_core i2c_core mii dm_mirror dm_region_hash dm_log dm_mod [ 788.465294] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G W ------------ 3.10.0-327.el7.x86_64 #1 [ 788.511521] Hardware name: HP ProLiant DL385p Gen8, BIOS A28 12/17/2012 [ 788.542260] ffff880437c036b8 f7afc56532a53db9 ffff880437c03670 ffffffff816351f1 [ 788.576332] ffff880437c036a8 ffffffff8107b200 ffff880633e74200 ffff880231674000 [ 788.611943] 0000000000000001 0000000000000003 0000000000000000 ffff880437c03710 [ 788.647241] Call Trace: [ 788.658817] <IRQ> [<ffffffff816351f1>] dump_stack+0x19/0x1b [ 788.686193] [<ffffffff8107b200>] warn_slowpath_common+0x70/0xb0 [ 788.713803] [<ffffffff8107b29c>] warn_slowpath_fmt+0x5c/0x80 [ 788.741314] [<ffffffff812f92f3>] ? ___ratelimit+0x93/0x100 [ 788.767018] [<ffffffff81637f49>] skb_warn_bad_offload+0xcd/0xda [ 788.796117] [<ffffffff8152950c>] skb_checksum_help+0x17c/0x190 [ 788.823392] [<ffffffffa01463a1>] netem_enqueue+0x741/0x7c0 [sch_netem] [ 788.854487] [<ffffffff8152cb58>] dev_queue_xmit+0x2a8/0x570 [ 788.880870] [<ffffffff8156ae1d>] ip_finish_output+0x53d/0x7d0 ... The problem occurs because netem is not prepared to handle GSO packets (as it uses skb_checksum_help in its enqueue path, which cannot manipulate these frames). The solution I think is to simply segment the skb in a simmilar fashion to the way we do in __dev_queue_xmit (via validate_xmit_skb), with some minor changes. When we decide to corrupt an skb, if the frame is GSO, we segment it, corrupt the first segment, and enqueue the remaining ones. tested successfully by myself on the latest net kernel, to which this applies Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: Jamal Hadi Salim <jhs@mojatatu.com> CC: "David S. Miller" <davem@davemloft.net> CC: netem@lists.linux-foundation.org CC: eric.dumazet@gmail.com CC: stephen@networkplumber.org Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/sched/sch_netem.c61
1 files changed, 59 insertions, 2 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9640bb39a5d2..4befe97a9034 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
395 sch->q.qlen++; 395 sch->q.qlen++;
396} 396}
397 397
398/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
399 * when we statistically choose to corrupt one, we instead segment it, returning
400 * the first packet to be corrupted, and re-enqueue the remaining frames
401 */
402static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct sk_buff *segs;
405 netdev_features_t features = netif_skb_features(skb);
406
407 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
408
409 if (IS_ERR_OR_NULL(segs)) {
410 qdisc_reshape_fail(skb, sch);
411 return NULL;
412 }
413 consume_skb(skb);
414 return segs;
415}
416
398/* 417/*
399 * Insert one skb into qdisc. 418 * Insert one skb into qdisc.
400 * Note: parent depends on return value to account for queue length. 419 * Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
407 /* We don't fill cb now as skb_unshare() may invalidate it */ 426 /* We don't fill cb now as skb_unshare() may invalidate it */
408 struct netem_skb_cb *cb; 427 struct netem_skb_cb *cb;
409 struct sk_buff *skb2; 428 struct sk_buff *skb2;
429 struct sk_buff *segs = NULL;
430 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
431 int nb = 0;
410 int count = 1; 432 int count = 1;
433 int rc = NET_XMIT_SUCCESS;
411 434
412 /* Random duplication */ 435 /* Random duplication */
413 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 436 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
453 * do it now in software before we mangle it. 476 * do it now in software before we mangle it.
454 */ 477 */
455 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 478 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
479 if (skb_is_gso(skb)) {
480 segs = netem_segment(skb, sch);
481 if (!segs)
482 return NET_XMIT_DROP;
483 } else {
484 segs = skb;
485 }
486
487 skb = segs;
488 segs = segs->next;
489
456 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 490 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
457 (skb->ip_summed == CHECKSUM_PARTIAL && 491 (skb->ip_summed == CHECKSUM_PARTIAL &&
458 skb_checksum_help(skb))) 492 skb_checksum_help(skb))) {
459 return qdisc_drop(skb, sch); 493 rc = qdisc_drop(skb, sch);
494 goto finish_segs;
495 }
460 496
461 skb->data[prandom_u32() % skb_headlen(skb)] ^= 497 skb->data[prandom_u32() % skb_headlen(skb)] ^=
462 1<<(prandom_u32() % 8); 498 1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
516 sch->qstats.requeues++; 552 sch->qstats.requeues++;
517 } 553 }
518 554
555finish_segs:
556 if (segs) {
557 while (segs) {
558 skb2 = segs->next;
559 segs->next = NULL;
560 qdisc_skb_cb(segs)->pkt_len = segs->len;
561 last_len = segs->len;
562 rc = qdisc_enqueue(segs, sch);
563 if (rc != NET_XMIT_SUCCESS) {
564 if (net_xmit_drop_count(rc))
565 qdisc_qstats_drop(sch);
566 } else {
567 nb++;
568 len += last_len;
569 }
570 segs = skb2;
571 }
572 sch->q.qlen += nb;
573 if (nb > 1)
574 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
575 }
519 return NET_XMIT_SUCCESS; 576 return NET_XMIT_SUCCESS;
520} 577}
521 578