diff options
author | Tom Herbert <therbert@google.com> | 2011-04-05 01:30:30 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-05 01:30:30 -0400 |
commit | c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 (patch) | |
tree | 6955c20538050329d0bdffdf24a787507ae6fdf1 | |
parent | 14f98f258f1936e0dba77474bd7eda63f61a9826 (diff) |
net: Allow no-cache copy from user on transmit
This patch uses __copy_from_user_nocache on transmit to bypass data
cache for a performance improvement. skb_add_data_nocache and
skb_copy_to_page_nocache can be called by sendmsg functions to use
this feature, initial support is in tcp_sendmsg. This functionality is
configurable per device using ethtool.
Presumably, this feature would only be useful when the driver does
not touch the data. The feature is turned on by default if a device
indicates that it does some form of checksum offload; it is off by
default for devices that do no checksum offload or indicate no checksum
is necessary. For the former case copy-checksum is probably done
anyway, in the latter case the device is likely loopback in which case
the no cache copy is probably not beneficial.
This patch was tested using 200 instances of netperf TCP_RR with
1400 byte request and one byte reply. Platform is 16 core AMD x86.
No-cache copy disabled:
672703 tps, 97.13% utilization
50/90/99% latency:244.31 484.205 1028.41
No-cache copy enabled:
702113 tps, 96.16% utilization,
50/90/99% latency 238.56 467.56 956.955
Using 14000 byte request and response sizes demonstrate the
effects more dramatically:
No-cache copy disabled:
79571 tps, 34.34 %utlization
50/90/95% latency 1584.46 2319.59 5001.76
No-cache copy enabled:
83856 tps, 34.81% utilization
50/90/95% latency 2508.42 2622.62 2735.88
Note especially the effect on latency tail (95th percentile).
This seems to provide a nice performance improvement and is
consistent in the tests I ran. Presumably, this would provide
the greatest benfits in the presence of an application workload
stressing the cache and a lot of transmit data happening.
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/bonding/bond_main.c | 2 | ||||
-rw-r--r-- | include/linux/netdevice.h | 3 | ||||
-rw-r--r-- | include/net/sock.h | 53 | ||||
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/ethtool.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 7 |
6 files changed, 73 insertions, 6 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 16d6fe95469..b51e021354b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c | |||
@@ -1407,7 +1407,7 @@ static int bond_compute_features(struct bonding *bond) | |||
1407 | int i; | 1407 | int i; |
1408 | 1408 | ||
1409 | features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); | 1409 | features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); |
1410 | features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; | 1410 | features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_NOCACHE_COPY; |
1411 | 1411 | ||
1412 | if (!bond->first_slave) | 1412 | if (!bond->first_slave) |
1413 | goto done; | 1413 | goto done; |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a4664cc68e2..09d26241576 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1066,6 +1066,7 @@ struct net_device { | |||
1066 | #define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ | 1066 | #define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ |
1067 | #define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ | 1067 | #define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ |
1068 | #define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ | 1068 | #define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ |
1069 | #define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */ | ||
1069 | 1070 | ||
1070 | /* Segmentation offload features */ | 1071 | /* Segmentation offload features */ |
1071 | #define NETIF_F_GSO_SHIFT 16 | 1072 | #define NETIF_F_GSO_SHIFT 16 |
@@ -1081,7 +1082,7 @@ struct net_device { | |||
1081 | /* = all defined minus driver/device-class-related */ | 1082 | /* = all defined minus driver/device-class-related */ |
1082 | #define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ | 1083 | #define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ |
1083 | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) | 1084 | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) |
1084 | #define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE) | 1085 | #define NETIF_F_ETHTOOL_BITS (0x7f3fffff & ~NETIF_F_NEVER_CHANGE) |
1085 | 1086 | ||
1086 | /* List of features with software fallbacks. */ | 1087 | /* List of features with software fallbacks. */ |
1087 | #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ | 1088 | #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ |
diff --git a/include/net/sock.h b/include/net/sock.h index da0534d3401..43bd515e92f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/mm.h> | 52 | #include <linux/mm.h> |
53 | #include <linux/security.h> | 53 | #include <linux/security.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/uaccess.h> | ||
55 | 56 | ||
56 | #include <linux/filter.h> | 57 | #include <linux/filter.h> |
57 | #include <linux/rculist_nulls.h> | 58 | #include <linux/rculist_nulls.h> |
@@ -1389,6 +1390,58 @@ static inline void sk_nocaps_add(struct sock *sk, int flags) | |||
1389 | sk->sk_route_caps &= ~flags; | 1390 | sk->sk_route_caps &= ~flags; |
1390 | } | 1391 | } |
1391 | 1392 | ||
1393 | static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, | ||
1394 | char __user *from, char *to, | ||
1395 | int copy) | ||
1396 | { | ||
1397 | if (skb->ip_summed == CHECKSUM_NONE) { | ||
1398 | int err = 0; | ||
1399 | __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); | ||
1400 | if (err) | ||
1401 | return err; | ||
1402 | skb->csum = csum_block_add(skb->csum, csum, skb->len); | ||
1403 | } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { | ||
1404 | if (!access_ok(VERIFY_READ, from, copy) || | ||
1405 | __copy_from_user_nocache(to, from, copy)) | ||
1406 | return -EFAULT; | ||
1407 | } else if (copy_from_user(to, from, copy)) | ||
1408 | return -EFAULT; | ||
1409 | |||
1410 | return 0; | ||
1411 | } | ||
1412 | |||
1413 | static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, | ||
1414 | char __user *from, int copy) | ||
1415 | { | ||
1416 | int err; | ||
1417 | |||
1418 | err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy), copy); | ||
1419 | if (err) | ||
1420 | __skb_trim(skb, skb->len); | ||
1421 | |||
1422 | return err; | ||
1423 | } | ||
1424 | |||
1425 | static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, | ||
1426 | struct sk_buff *skb, | ||
1427 | struct page *page, | ||
1428 | int off, int copy) | ||
1429 | { | ||
1430 | int err; | ||
1431 | |||
1432 | err = skb_do_copy_data_nocache(sk, skb, from, | ||
1433 | page_address(page) + off, copy); | ||
1434 | if (err) | ||
1435 | return err; | ||
1436 | |||
1437 | skb->len += copy; | ||
1438 | skb->data_len += copy; | ||
1439 | skb->truesize += copy; | ||
1440 | sk->sk_wmem_queued += copy; | ||
1441 | sk_mem_charge(sk, copy); | ||
1442 | return 0; | ||
1443 | } | ||
1444 | |||
1392 | static inline int skb_copy_to_page(struct sock *sk, char __user *from, | 1445 | static inline int skb_copy_to_page(struct sock *sk, char __user *from, |
1393 | struct sk_buff *skb, struct page *page, | 1446 | struct sk_buff *skb, struct page *page, |
1394 | int off, int copy) | 1447 | int off, int copy) |
diff --git a/net/core/dev.c b/net/core/dev.c index 02f56376fe9..5d0b4f6f1a7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev) | |||
5425 | dev->features &= ~NETIF_F_GSO; | 5425 | dev->features &= ~NETIF_F_GSO; |
5426 | } | 5426 | } |
5427 | 5427 | ||
5428 | /* Turn on no cache copy if HW is doing checksum */ | ||
5429 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | ||
5430 | if ((dev->features & NETIF_F_ALL_CSUM) && | ||
5431 | !(dev->features & NETIF_F_NO_CSUM)) { | ||
5432 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | ||
5433 | dev->features |= NETIF_F_NOCACHE_COPY; | ||
5434 | } | ||
5435 | |||
5428 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, | 5436 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, |
5429 | * vlan_dev_init() will do the dev->features check, so these features | 5437 | * vlan_dev_init() will do the dev->features check, so these features |
5430 | * are enabled only if supported by underlying device. | 5438 | * are enabled only if supported by underlying device. |
@@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) | |||
6182 | } | 6190 | } |
6183 | } | 6191 | } |
6184 | 6192 | ||
6193 | /* If device can't no cache copy, don't do for all */ | ||
6194 | if (!(one & NETIF_F_NOCACHE_COPY)) | ||
6195 | all &= ~NETIF_F_NOCACHE_COPY; | ||
6196 | |||
6185 | one |= NETIF_F_ALL_CSUM; | 6197 | one |= NETIF_F_ALL_CSUM; |
6186 | 6198 | ||
6187 | one |= all & NETIF_F_ONE_FOR_ALL; | 6199 | one |= all & NETIF_F_ONE_FOR_ALL; |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 439e4b0e131..719670ae199 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -359,7 +359,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS | |||
359 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", | 359 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", |
360 | /* NETIF_F_RXHASH */ "rx-hashing", | 360 | /* NETIF_F_RXHASH */ "rx-hashing", |
361 | /* NETIF_F_RXCSUM */ "rx-checksum", | 361 | /* NETIF_F_RXCSUM */ "rx-checksum", |
362 | "", | 362 | /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" |
363 | "", | 363 | "", |
364 | }; | 364 | }; |
365 | 365 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d4501054..054a59d21eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -999,7 +999,8 @@ new_segment: | |||
999 | /* We have some space in skb head. Superb! */ | 999 | /* We have some space in skb head. Superb! */ |
1000 | if (copy > skb_tailroom(skb)) | 1000 | if (copy > skb_tailroom(skb)) |
1001 | copy = skb_tailroom(skb); | 1001 | copy = skb_tailroom(skb); |
1002 | if ((err = skb_add_data(skb, from, copy)) != 0) | 1002 | err = skb_add_data_nocache(sk, skb, from, copy); |
1003 | if (err) | ||
1003 | goto do_fault; | 1004 | goto do_fault; |
1004 | } else { | 1005 | } else { |
1005 | int merge = 0; | 1006 | int merge = 0; |
@@ -1042,8 +1043,8 @@ new_segment: | |||
1042 | 1043 | ||
1043 | /* Time to copy data. We are close to | 1044 | /* Time to copy data. We are close to |
1044 | * the end! */ | 1045 | * the end! */ |
1045 | err = skb_copy_to_page(sk, from, skb, page, | 1046 | err = skb_copy_to_page_nocache(sk, from, skb, |
1046 | off, copy); | 1047 | page, off, copy); |
1047 | if (err) { | 1048 | if (err) { |
1048 | /* If this page was new, give it to the | 1049 | /* If this page was new, give it to the |
1049 | * socket so it does not get leaked. | 1050 | * socket so it does not get leaked. |