aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-05-16 02:57:10 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-16 02:57:10 -0400
commit3b098e2d7c693796cc4dffb07caa249fc0f70771 (patch)
tree586c4f5dc57988ade175ffc7e4b6d0261b12e166 /net/core
parenta1aa3483041bd3691c7f029272ccef4ce70bd957 (diff)
net: Consistent skb timestamping
With RPS inclusion, skb timestamping is not consistent in RX path. If netif_receive_skb() is used, its deferred after RPS dispatch. If netif_rx() is used, its done before RPS dispatch. This can give strange tcpdump timestamps results. I think timestamping should be done as soon as possible in the receive path, to get meaningful values (ie timestamps taken at the time packet was delivered by NIC driver to our stack), even if NAPI already can defer timestamping a bit (RPS can help to reduce the gap) Tom Herbert prefer to sample timestamps after RPS dispatch. In case sampling is expensive (HPET/acpi_pm on x86), this makes sense. Let admins switch from one mode to another, using a new sysctl, /proc/sys/net/core/netdev_tstamp_prequeue Its default value (1), means timestamps are taken as soon as possible, before backlog queueing, giving accurate timestamps. Setting a 0 value permits to sample timestamps when processing backlog, after RPS dispatch, to lower the load of the pre-RPS cpu. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c50
-rw-r--r--net/core/sysctl_net_core.c7
2 files changed, 38 insertions, 19 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5cbba0927a8e..988e42912e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
1454} 1454}
1455EXPORT_SYMBOL(net_disable_timestamp); 1455EXPORT_SYMBOL(net_disable_timestamp);
1456 1456
1457static inline void net_timestamp(struct sk_buff *skb) 1457static inline void net_timestamp_set(struct sk_buff *skb)
1458{ 1458{
1459 if (atomic_read(&netstamp_needed)) 1459 if (atomic_read(&netstamp_needed))
1460 __net_timestamp(skb); 1460 __net_timestamp(skb);
@@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
1462 skb->tstamp.tv64 = 0; 1462 skb->tstamp.tv64 = 0;
1463} 1463}
1464 1464
1465static inline void net_timestamp_check(struct sk_buff *skb)
1466{
1467 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1468 __net_timestamp(skb);
1469}
1470
1465/** 1471/**
1466 * dev_forward_skb - loopback an skb to another netif 1472 * dev_forward_skb - loopback an skb to another netif
1467 * 1473 *
@@ -1508,9 +1514,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1508 1514
1509#ifdef CONFIG_NET_CLS_ACT 1515#ifdef CONFIG_NET_CLS_ACT
1510 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) 1516 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1511 net_timestamp(skb); 1517 net_timestamp_set(skb);
1512#else 1518#else
1513 net_timestamp(skb); 1519 net_timestamp_set(skb);
1514#endif 1520#endif
1515 1521
1516 rcu_read_lock(); 1522 rcu_read_lock();
@@ -2201,6 +2207,7 @@ EXPORT_SYMBOL(dev_queue_xmit);
2201 =======================================================================*/ 2207 =======================================================================*/
2202 2208
2203int netdev_max_backlog __read_mostly = 1000; 2209int netdev_max_backlog __read_mostly = 1000;
2210int netdev_tstamp_prequeue __read_mostly = 1;
2204int netdev_budget __read_mostly = 300; 2211int netdev_budget __read_mostly = 300;
2205int weight_p __read_mostly = 64; /* old backlog weight */ 2212int weight_p __read_mostly = 64; /* old backlog weight */
2206 2213
@@ -2465,8 +2472,8 @@ int netif_rx(struct sk_buff *skb)
2465 if (netpoll_rx(skb)) 2472 if (netpoll_rx(skb))
2466 return NET_RX_DROP; 2473 return NET_RX_DROP;
2467 2474
2468 if (!skb->tstamp.tv64) 2475 if (netdev_tstamp_prequeue)
2469 net_timestamp(skb); 2476 net_timestamp_check(skb);
2470 2477
2471#ifdef CONFIG_RPS 2478#ifdef CONFIG_RPS
2472 { 2479 {
@@ -2791,8 +2798,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
2791 int ret = NET_RX_DROP; 2798 int ret = NET_RX_DROP;
2792 __be16 type; 2799 __be16 type;
2793 2800
2794 if (!skb->tstamp.tv64) 2801 if (!netdev_tstamp_prequeue)
2795 net_timestamp(skb); 2802 net_timestamp_check(skb);
2796 2803
2797 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2804 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2798 return NET_RX_SUCCESS; 2805 return NET_RX_SUCCESS;
@@ -2910,23 +2917,28 @@ out:
2910 */ 2917 */
2911int netif_receive_skb(struct sk_buff *skb) 2918int netif_receive_skb(struct sk_buff *skb)
2912{ 2919{
2920 if (netdev_tstamp_prequeue)
2921 net_timestamp_check(skb);
2922
2913#ifdef CONFIG_RPS 2923#ifdef CONFIG_RPS
2914 struct rps_dev_flow voidflow, *rflow = &voidflow; 2924 {
2915 int cpu, ret; 2925 struct rps_dev_flow voidflow, *rflow = &voidflow;
2926 int cpu, ret;
2916 2927
2917 rcu_read_lock(); 2928 rcu_read_lock();
2929
2930 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2918 2931
2919 cpu = get_rps_cpu(skb->dev, skb, &rflow); 2932 if (cpu >= 0) {
2933 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2934 rcu_read_unlock();
2935 } else {
2936 rcu_read_unlock();
2937 ret = __netif_receive_skb(skb);
2938 }
2920 2939
2921 if (cpu >= 0) { 2940 return ret;
2922 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2923 rcu_read_unlock();
2924 } else {
2925 rcu_read_unlock();
2926 ret = __netif_receive_skb(skb);
2927 } 2941 }
2928
2929 return ret;
2930#else 2942#else
2931 return __netif_receive_skb(skb); 2943 return __netif_receive_skb(skb);
2932#endif 2944#endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dcc7d25996ab..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,13 @@ static struct ctl_table net_core_table[] = {
122 .proc_handler = proc_dointvec 122 .proc_handler = proc_dointvec
123 }, 123 },
124 { 124 {
125 .procname = "netdev_tstamp_prequeue",
126 .data = &netdev_tstamp_prequeue,
127 .maxlen = sizeof(int),
128 .mode = 0644,
129 .proc_handler = proc_dointvec
130 },
131 {
125 .procname = "message_cost", 132 .procname = "message_cost",
126 .data = &net_ratelimit_state.interval, 133 .data = &net_ratelimit_state.interval,
127 .maxlen = sizeof(int), 134 .maxlen = sizeof(int),