aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorTom Herbert <therbert@google.com>2011-04-05 01:30:30 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-05 01:30:30 -0400
commitc6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 (patch)
tree6955c20538050329d0bdffdf24a787507ae6fdf1 /include/linux
parent14f98f258f1936e0dba77474bd7eda63f61a9826 (diff)
net: Allow no-cache copy from user on transmit
This patch uses __copy_from_user_nocache on transmit to bypass data cache for a performance improvement. skb_add_data_nocache and skb_copy_to_page_nocache can be called by sendmsg functions to use this feature, initial support is in tcp_sendmsg. This functionality is configurable per device using ethtool. Presumably, this feature would only be useful when the driver does not touch the data. The feature is turned on by default if a device indicates that it does some form of checksum offload; it is off by default for devices that do no checksum offload or indicate no checksum is necessary. For the former case copy-checksum is probably done anyway, in the latter case the device is likely loopback in which case the no cache copy is probably not beneficial. This patch was tested using 200 instances of netperf TCP_RR with 1400 byte request and one byte reply. Platform is 16 core AMD x86. No-cache copy disabled: 672703 tps, 97.13% utilization 50/90/99% latency:244.31 484.205 1028.41 No-cache copy enabled: 702113 tps, 96.16% utilization, 50/90/99% latency 238.56 467.56 956.955 Using 14000 byte request and response sizes demonstrate the effects more dramatically: No-cache copy disabled: 79571 tps, 34.34 %utlization 50/90/95% latency 1584.46 2319.59 5001.76 No-cache copy enabled: 83856 tps, 34.81% utilization 50/90/95% latency 2508.42 2622.62 2735.88 Note especially the effect on latency tail (95th percentile). This seems to provide a nice performance improvement and is consistent in the tests I ran. Presumably, this would provide the greatest benfits in the presence of an application workload stressing the cache and a lot of transmit data happening. Signed-off-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netdevice.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a4664cc68e2b..09d262415769 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1066,6 +1066,7 @@ struct net_device {
1066#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ 1066#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */
1067#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ 1067#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */
1068#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ 1068#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */
1069#define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */
1069 1070
1070 /* Segmentation offload features */ 1071 /* Segmentation offload features */
1071#define NETIF_F_GSO_SHIFT 16 1072#define NETIF_F_GSO_SHIFT 16
@@ -1081,7 +1082,7 @@ struct net_device {
1081 /* = all defined minus driver/device-class-related */ 1082 /* = all defined minus driver/device-class-related */
1082#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ 1083#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
1083 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) 1084 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
1084#define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE) 1085#define NETIF_F_ETHTOOL_BITS (0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
1085 1086
1086 /* List of features with software fallbacks. */ 1087 /* List of features with software fallbacks. */
1087#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ 1088#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \