aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulian Wiedmann <jwi@linux.vnet.ibm.com>2017-12-01 04:14:50 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-02 21:35:21 -0500
commit6d69b1f1eb7a2edf8a3547f361c61f2538e054bb (patch)
tree11944d807ae8a95faa6a3f77491a52cbe0bd1378
parentbc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 (diff)
s390/qeth: fix GSO throughput regression
Using GSO with small MTUs currently results in a substantial throughput regression - which is caused by how qeth needs to map non-linear skbs into its IO buffer elements: compared to a linear skb, each GSO-segmented skb effectively consumes twice as many buffer elements (ie two instead of one) due to the additional header-only part. This causes the Output Queue to be congested with low-utilized IO buffers. Fix this as follows: If the MSS is low enough so that a non-SG GSO segmentation produces order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is where we anticipate the biggest savings, since an SG-enabled GSO segmentation produces skbs that always consume at least two buffer elements. Larger MSS values continue to get a SG-enabled GSO segmentation, since 1) the relative overhead of the additional header-only buffer element becomes less noticeable, and 2) the linearization overhead increases. With the throughput regression fixed, re-enable NETIF_F_SG by default to reap the significant CPU savings of GSO. Fixes: 5722963a8e83 ("qeth: do not turn on SG per default") Reported-by: Nils Hoppmann <niho@de.ibm.com> Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/s390/net/qeth_core.h3
-rw-r--r--drivers/s390/net/qeth_core_main.c31
-rw-r--r--drivers/s390/net/qeth_l2_main.c2
-rw-r--r--drivers/s390/net/qeth_l3_main.c2
4 files changed, 38 insertions, 0 deletions
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9cd569ef43ec..15015a24f8ad 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -987,6 +987,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
987int qeth_set_features(struct net_device *, netdev_features_t); 987int qeth_set_features(struct net_device *, netdev_features_t);
988void qeth_recover_features(struct net_device *dev); 988void qeth_recover_features(struct net_device *dev);
989netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); 989netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
990netdev_features_t qeth_features_check(struct sk_buff *skb,
991 struct net_device *dev,
992 netdev_features_t features);
990int qeth_vm_request_mac(struct qeth_card *card); 993int qeth_vm_request_mac(struct qeth_card *card);
991int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len); 994int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
992 995
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 49b9efeba1bd..d9b0e07d4fa7 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,6 +19,11 @@
19#include <linux/mii.h> 19#include <linux/mii.h>
20#include <linux/kthread.h> 20#include <linux/kthread.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/if_vlan.h>
23#include <linux/netdevice.h>
24#include <linux/netdev_features.h>
25#include <linux/skbuff.h>
26
22#include <net/iucv/af_iucv.h> 27#include <net/iucv/af_iucv.h>
23#include <net/dsfield.h> 28#include <net/dsfield.h>
24 29
@@ -6438,6 +6443,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
6438} 6443}
6439EXPORT_SYMBOL_GPL(qeth_fix_features); 6444EXPORT_SYMBOL_GPL(qeth_fix_features);
6440 6445
6446netdev_features_t qeth_features_check(struct sk_buff *skb,
6447 struct net_device *dev,
6448 netdev_features_t features)
6449{
6450 /* GSO segmentation builds skbs with
6451 * a (small) linear part for the headers, and
6452 * page frags for the data.
6453 * Compared to a linear skb, the header-only part consumes an
6454 * additional buffer element. This reduces buffer utilization, and
6455 * hurts throughput. So compress small segments into one element.
6456 */
6457 if (netif_needs_gso(skb, features)) {
6458 /* match skb_segment(): */
6459 unsigned int doffset = skb->data - skb_mac_header(skb);
6460 unsigned int hsize = skb_shinfo(skb)->gso_size;
6461 unsigned int hroom = skb_headroom(skb);
6462
6463 /* linearize only if resulting skb allocations are order-0: */
6464 if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
6465 features &= ~NETIF_F_SG;
6466 }
6467
6468 return vlan_features_check(skb, features);
6469}
6470EXPORT_SYMBOL_GPL(qeth_features_check);
6471
6441static int __init qeth_core_init(void) 6472static int __init qeth_core_init(void)
6442{ 6473{
6443 int rc; 6474 int rc;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index d2537c09126d..85162712d207 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -960,6 +960,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
960 .ndo_stop = qeth_l2_stop, 960 .ndo_stop = qeth_l2_stop,
961 .ndo_get_stats = qeth_get_stats, 961 .ndo_get_stats = qeth_get_stats,
962 .ndo_start_xmit = qeth_l2_hard_start_xmit, 962 .ndo_start_xmit = qeth_l2_hard_start_xmit,
963 .ndo_features_check = qeth_features_check,
963 .ndo_validate_addr = eth_validate_addr, 964 .ndo_validate_addr = eth_validate_addr,
964 .ndo_set_rx_mode = qeth_l2_set_rx_mode, 965 .ndo_set_rx_mode = qeth_l2_set_rx_mode,
965 .ndo_do_ioctl = qeth_do_ioctl, 966 .ndo_do_ioctl = qeth_do_ioctl,
@@ -1010,6 +1011,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
1010 if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { 1011 if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
1011 card->dev->hw_features = NETIF_F_SG; 1012 card->dev->hw_features = NETIF_F_SG;
1012 card->dev->vlan_features = NETIF_F_SG; 1013 card->dev->vlan_features = NETIF_F_SG;
1014 card->dev->features |= NETIF_F_SG;
1013 /* OSA 3S and earlier has no RX/TX support */ 1015 /* OSA 3S and earlier has no RX/TX support */
1014 if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) { 1016 if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
1015 card->dev->hw_features |= NETIF_F_IP_CSUM; 1017 card->dev->hw_features |= NETIF_F_IP_CSUM;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e79936b50698..46a841258fc8 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2918,6 +2918,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
2918 .ndo_stop = qeth_l3_stop, 2918 .ndo_stop = qeth_l3_stop,
2919 .ndo_get_stats = qeth_get_stats, 2919 .ndo_get_stats = qeth_get_stats,
2920 .ndo_start_xmit = qeth_l3_hard_start_xmit, 2920 .ndo_start_xmit = qeth_l3_hard_start_xmit,
2921 .ndo_features_check = qeth_features_check,
2921 .ndo_validate_addr = eth_validate_addr, 2922 .ndo_validate_addr = eth_validate_addr,
2922 .ndo_set_rx_mode = qeth_l3_set_multicast_list, 2923 .ndo_set_rx_mode = qeth_l3_set_multicast_list,
2923 .ndo_do_ioctl = qeth_do_ioctl, 2924 .ndo_do_ioctl = qeth_do_ioctl,
@@ -2958,6 +2959,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
2958 card->dev->vlan_features = NETIF_F_SG | 2959 card->dev->vlan_features = NETIF_F_SG |
2959 NETIF_F_RXCSUM | NETIF_F_IP_CSUM | 2960 NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
2960 NETIF_F_TSO; 2961 NETIF_F_TSO;
2962 card->dev->features |= NETIF_F_SG;
2961 } 2963 }
2962 } 2964 }
2963 } else if (card->info.type == QETH_CARD_TYPE_IQD) { 2965 } else if (card->info.type == QETH_CARD_TYPE_IQD) {