aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Herbert <therbert@google.com>2011-04-05 01:30:30 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-05 01:30:30 -0400
commitc6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 (patch)
tree6955c20538050329d0bdffdf24a787507ae6fdf1
parent14f98f258f1936e0dba77474bd7eda63f61a9826 (diff)
net: Allow no-cache copy from user on transmit
This patch uses __copy_from_user_nocache on transmit to bypass data cache for a performance improvement. skb_add_data_nocache and skb_copy_to_page_nocache can be called by sendmsg functions to use this feature, initial support is in tcp_sendmsg. This functionality is configurable per device using ethtool. Presumably, this feature would only be useful when the driver does not touch the data. The feature is turned on by default if a device indicates that it does some form of checksum offload; it is off by default for devices that do no checksum offload or indicate no checksum is necessary. For the former case copy-checksum is probably done anyway, in the latter case the device is likely loopback in which case the no cache copy is probably not beneficial. This patch was tested using 200 instances of netperf TCP_RR with 1400 byte request and one byte reply. Platform is 16 core AMD x86. No-cache copy disabled: 672703 tps, 97.13% utilization 50/90/99% latency:244.31 484.205 1028.41 No-cache copy enabled: 702113 tps, 96.16% utilization, 50/90/99% latency 238.56 467.56 956.955 Using 14000 byte request and response sizes demonstrate the effects more dramatically: No-cache copy disabled: 79571 tps, 34.34 %utlization 50/90/95% latency 1584.46 2319.59 5001.76 No-cache copy enabled: 83856 tps, 34.81% utilization 50/90/95% latency 2508.42 2622.62 2735.88 Note especially the effect on latency tail (95th percentile). This seems to provide a nice performance improvement and is consistent in the tests I ran. Presumably, this would provide the greatest benfits in the presence of an application workload stressing the cache and a lot of transmit data happening. Signed-off-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/net/sock.h53
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/ipv4/tcp.c7
6 files changed, 73 insertions, 6 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 16d6fe95469..b51e021354b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1407,7 +1407,7 @@ static int bond_compute_features(struct bonding *bond)
1407 int i; 1407 int i;
1408 1408
1409 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); 1409 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
1410 features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; 1410 features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_NOCACHE_COPY;
1411 1411
1412 if (!bond->first_slave) 1412 if (!bond->first_slave)
1413 goto done; 1413 goto done;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a4664cc68e2..09d26241576 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1066,6 +1066,7 @@ struct net_device {
1066#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ 1066#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */
1067#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ 1067#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */
1068#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ 1068#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */
1069#define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */
1069 1070
1070 /* Segmentation offload features */ 1071 /* Segmentation offload features */
1071#define NETIF_F_GSO_SHIFT 16 1072#define NETIF_F_GSO_SHIFT 16
@@ -1081,7 +1082,7 @@ struct net_device {
1081 /* = all defined minus driver/device-class-related */ 1082 /* = all defined minus driver/device-class-related */
1082#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ 1083#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
1083 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) 1084 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
1084#define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE) 1085#define NETIF_F_ETHTOOL_BITS (0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
1085 1086
1086 /* List of features with software fallbacks. */ 1087 /* List of features with software fallbacks. */
1087#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ 1088#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \
diff --git a/include/net/sock.h b/include/net/sock.h
index da0534d3401..43bd515e92f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -52,6 +52,7 @@
52#include <linux/mm.h> 52#include <linux/mm.h>
53#include <linux/security.h> 53#include <linux/security.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/uaccess.h>
55 56
56#include <linux/filter.h> 57#include <linux/filter.h>
57#include <linux/rculist_nulls.h> 58#include <linux/rculist_nulls.h>
@@ -1389,6 +1390,58 @@ static inline void sk_nocaps_add(struct sock *sk, int flags)
1389 sk->sk_route_caps &= ~flags; 1390 sk->sk_route_caps &= ~flags;
1390} 1391}
1391 1392
1393static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1394 char __user *from, char *to,
1395 int copy)
1396{
1397 if (skb->ip_summed == CHECKSUM_NONE) {
1398 int err = 0;
1399 __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
1400 if (err)
1401 return err;
1402 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1403 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1404 if (!access_ok(VERIFY_READ, from, copy) ||
1405 __copy_from_user_nocache(to, from, copy))
1406 return -EFAULT;
1407 } else if (copy_from_user(to, from, copy))
1408 return -EFAULT;
1409
1410 return 0;
1411}
1412
1413static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1414 char __user *from, int copy)
1415{
1416 int err;
1417
1418 err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy), copy);
1419 if (err)
1420 __skb_trim(skb, skb->len);
1421
1422 return err;
1423}
1424
1425static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
1426 struct sk_buff *skb,
1427 struct page *page,
1428 int off, int copy)
1429{
1430 int err;
1431
1432 err = skb_do_copy_data_nocache(sk, skb, from,
1433 page_address(page) + off, copy);
1434 if (err)
1435 return err;
1436
1437 skb->len += copy;
1438 skb->data_len += copy;
1439 skb->truesize += copy;
1440 sk->sk_wmem_queued += copy;
1441 sk_mem_charge(sk, copy);
1442 return 0;
1443}
1444
1392static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1445static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1393 struct sk_buff *skb, struct page *page, 1446 struct sk_buff *skb, struct page *page,
1394 int off, int copy) 1447 int off, int copy)
diff --git a/net/core/dev.c b/net/core/dev.c
index 02f56376fe9..5d0b4f6f1a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev)
5425 dev->features &= ~NETIF_F_GSO; 5425 dev->features &= ~NETIF_F_GSO;
5426 } 5426 }
5427 5427
5428 /* Turn on no cache copy if HW is doing checksum */
5429 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5430 if ((dev->features & NETIF_F_ALL_CSUM) &&
5431 !(dev->features & NETIF_F_NO_CSUM)) {
5432 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5433 dev->features |= NETIF_F_NOCACHE_COPY;
5434 }
5435
5428 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5436 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5429 * vlan_dev_init() will do the dev->features check, so these features 5437 * vlan_dev_init() will do the dev->features check, so these features
5430 * are enabled only if supported by underlying device. 5438 * are enabled only if supported by underlying device.
@@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6182 } 6190 }
6183 } 6191 }
6184 6192
6193 /* If device can't no cache copy, don't do for all */
6194 if (!(one & NETIF_F_NOCACHE_COPY))
6195 all &= ~NETIF_F_NOCACHE_COPY;
6196
6185 one |= NETIF_F_ALL_CSUM; 6197 one |= NETIF_F_ALL_CSUM;
6186 6198
6187 one |= all & NETIF_F_ONE_FOR_ALL; 6199 one |= all & NETIF_F_ONE_FOR_ALL;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 439e4b0e131..719670ae199 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -359,7 +359,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
359 /* NETIF_F_NTUPLE */ "rx-ntuple-filter", 359 /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
360 /* NETIF_F_RXHASH */ "rx-hashing", 360 /* NETIF_F_RXHASH */ "rx-hashing",
361 /* NETIF_F_RXCSUM */ "rx-checksum", 361 /* NETIF_F_RXCSUM */ "rx-checksum",
362 "", 362 /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy"
363 "", 363 "",
364}; 364};
365 365
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d4501054..054a59d21eb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
999 /* We have some space in skb head. Superb! */ 999 /* We have some space in skb head. Superb! */
1000 if (copy > skb_tailroom(skb)) 1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb); 1001 copy = skb_tailroom(skb);
1002 if ((err = skb_add_data(skb, from, copy)) != 0) 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
1003 goto do_fault; 1004 goto do_fault;
1004 } else { 1005 } else {
1005 int merge = 0; 1006 int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
1042 1043
1043 /* Time to copy data. We are close to 1044 /* Time to copy data. We are close to
1044 * the end! */ 1045 * the end! */
1045 err = skb_copy_to_page(sk, from, skb, page, 1046 err = skb_copy_to_page_nocache(sk, from, skb,
1046 off, copy); 1047 page, off, copy);
1047 if (err) { 1048 if (err) {
1048 /* If this page was new, give it to the 1049 /* If this page was new, give it to the
1049 * socket so it does not get leaked. 1050 * socket so it does not get leaked.