diff options
author | Patrick Ohly <patrick.ohly@intel.com> | 2009-02-12 00:03:37 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-02-16 01:43:34 -0500 |
commit | ac45f602ee3d1b6f326f68bc0c2591ceebf05ba4 (patch) | |
tree | c92c86bd0d89b844a3794c0e441aa2fccb36725f | |
parent | cb9eff097831007afb30d64373f29d99825d0068 (diff) |
net: infrastructure for hardware time stamping
The additional per-packet information (16 bytes for time stamps, 1
byte for flags) is stored for all packets in the skb_shared_info
struct. This implementation detail is hidden from users of that
information via skb_* accessor functions. A separate struct resp.
union is used for the additional information so that it can be
stored/copied easily outside of skb_shared_info.
Compared to previous implementations (reusing the tstamp field
depending on the context, optional additional structures) this
is the simplest solution. It does not extend sk_buff itself.
TX time stamping is implemented in software if the device driver
doesn't support hardware time stamping.
The new semantic for hardware/software time stamping around
ndo_start_xmit() is based on two assumptions about existing
network device drivers which don't support hardware time
stamping and know nothing about it:
- they leave the new skb_shared_tx unmodified
- the keep the connection to the originating socket in skb->sk
alive, i.e., don't call skb_orphan()
Given that skb_shared_tx is new, the first assumption is safe.
The second is only true for some drivers. As a result, software
TX time stamping currently works with the bnx2 driver, but not
with the unmodified igb driver (the two drivers this patch series
was tested with).
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 91 | ||||
-rw-r--r-- | net/core/dev.c | 32 | ||||
-rw-r--r-- | net/core/skbuff.c | 41 |
3 files changed, 161 insertions, 3 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 924700844580..f96bc91bf0a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -132,6 +132,57 @@ struct skb_frag_struct { | |||
132 | __u32 size; | 132 | __u32 size; |
133 | }; | 133 | }; |
134 | 134 | ||
135 | #define HAVE_HW_TIME_STAMP | ||
136 | |||
137 | /** | ||
138 | * skb_shared_hwtstamps - hardware time stamps | ||
139 | * | ||
140 | * @hwtstamp: hardware time stamp transformed into duration | ||
141 | * since arbitrary point in time | ||
142 | * @syststamp: hwtstamp transformed to system time base | ||
143 | * | ||
144 | * Software time stamps generated by ktime_get_real() are stored in | ||
145 | * skb->tstamp. The relation between the different kinds of time | ||
146 | * stamps is as follows: | ||
147 | * | ||
148 | * syststamp and tstamp can be compared against each other in | ||
149 | * arbitrary combinations. The accuracy of a | ||
150 | * syststamp/tstamp/"syststamp from other device" comparison is | ||
151 | * limited by the accuracy of the transformation into system time | ||
152 | * base. This depends on the device driver and its underlying | ||
153 | * hardware. | ||
154 | * | ||
155 | * hwtstamps can only be compared against other hwtstamps from | ||
156 | * the same device. | ||
157 | * | ||
158 | * This structure is attached to packets as part of the | ||
159 | * &skb_shared_info. Use skb_hwtstamps() to get a pointer. | ||
160 | */ | ||
161 | struct skb_shared_hwtstamps { | ||
162 | ktime_t hwtstamp; | ||
163 | ktime_t syststamp; | ||
164 | }; | ||
165 | |||
166 | /** | ||
167 | * skb_shared_tx - instructions for time stamping of outgoing packets | ||
168 | * | ||
169 | * @hardware: generate hardware time stamp | ||
170 | * @software: generate software time stamp | ||
171 | * @in_progress: device driver is going to provide | ||
172 | * hardware time stamp | ||
173 | * | ||
174 | * These flags are attached to packets as part of the | ||
175 | * &skb_shared_info. Use skb_tx() to get a pointer. | ||
176 | */ | ||
177 | union skb_shared_tx { | ||
178 | struct { | ||
179 | __u8 hardware:1, | ||
180 | software:1, | ||
181 | in_progress:1; | ||
182 | }; | ||
183 | __u8 flags; | ||
184 | }; | ||
185 | |||
135 | /* This data is invariant across clones and lives at | 186 | /* This data is invariant across clones and lives at |
136 | * the end of the header data, ie. at skb->end. | 187 | * the end of the header data, ie. at skb->end. |
137 | */ | 188 | */ |
@@ -143,10 +194,12 @@ struct skb_shared_info { | |||
143 | unsigned short gso_segs; | 194 | unsigned short gso_segs; |
144 | unsigned short gso_type; | 195 | unsigned short gso_type; |
145 | __be32 ip6_frag_id; | 196 | __be32 ip6_frag_id; |
197 | union skb_shared_tx tx_flags; | ||
146 | #ifdef CONFIG_HAS_DMA | 198 | #ifdef CONFIG_HAS_DMA |
147 | unsigned int num_dma_maps; | 199 | unsigned int num_dma_maps; |
148 | #endif | 200 | #endif |
149 | struct sk_buff *frag_list; | 201 | struct sk_buff *frag_list; |
202 | struct skb_shared_hwtstamps hwtstamps; | ||
150 | skb_frag_t frags[MAX_SKB_FRAGS]; | 203 | skb_frag_t frags[MAX_SKB_FRAGS]; |
151 | #ifdef CONFIG_HAS_DMA | 204 | #ifdef CONFIG_HAS_DMA |
152 | dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; | 205 | dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; |
@@ -465,6 +518,16 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) | |||
465 | /* Internal */ | 518 | /* Internal */ |
466 | #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) | 519 | #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) |
467 | 520 | ||
521 | static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) | ||
522 | { | ||
523 | return &skb_shinfo(skb)->hwtstamps; | ||
524 | } | ||
525 | |||
526 | static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) | ||
527 | { | ||
528 | return &skb_shinfo(skb)->tx_flags; | ||
529 | } | ||
530 | |||
468 | /** | 531 | /** |
469 | * skb_queue_empty - check if a queue is empty | 532 | * skb_queue_empty - check if a queue is empty |
470 | * @list: queue head | 533 | * @list: queue head |
@@ -1730,6 +1793,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, | |||
1730 | 1793 | ||
1731 | extern void skb_init(void); | 1794 | extern void skb_init(void); |
1732 | 1795 | ||
1796 | static inline ktime_t skb_get_ktime(const struct sk_buff *skb) | ||
1797 | { | ||
1798 | return skb->tstamp; | ||
1799 | } | ||
1800 | |||
1733 | /** | 1801 | /** |
1734 | * skb_get_timestamp - get timestamp from a skb | 1802 | * skb_get_timestamp - get timestamp from a skb |
1735 | * @skb: skb to get stamp from | 1803 | * @skb: skb to get stamp from |
@@ -1739,11 +1807,18 @@ extern void skb_init(void); | |||
1739 | * This function converts the offset back to a struct timeval and stores | 1807 | * This function converts the offset back to a struct timeval and stores |
1740 | * it in stamp. | 1808 | * it in stamp. |
1741 | */ | 1809 | */ |
1742 | static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) | 1810 | static inline void skb_get_timestamp(const struct sk_buff *skb, |
1811 | struct timeval *stamp) | ||
1743 | { | 1812 | { |
1744 | *stamp = ktime_to_timeval(skb->tstamp); | 1813 | *stamp = ktime_to_timeval(skb->tstamp); |
1745 | } | 1814 | } |
1746 | 1815 | ||
1816 | static inline void skb_get_timestampns(const struct sk_buff *skb, | ||
1817 | struct timespec *stamp) | ||
1818 | { | ||
1819 | *stamp = ktime_to_timespec(skb->tstamp); | ||
1820 | } | ||
1821 | |||
1747 | static inline void __net_timestamp(struct sk_buff *skb) | 1822 | static inline void __net_timestamp(struct sk_buff *skb) |
1748 | { | 1823 | { |
1749 | skb->tstamp = ktime_get_real(); | 1824 | skb->tstamp = ktime_get_real(); |
@@ -1759,6 +1834,20 @@ static inline ktime_t net_invalid_timestamp(void) | |||
1759 | return ktime_set(0, 0); | 1834 | return ktime_set(0, 0); |
1760 | } | 1835 | } |
1761 | 1836 | ||
1837 | /** | ||
1838 | * skb_tstamp_tx - queue clone of skb with send time stamps | ||
1839 | * @orig_skb: the original outgoing packet | ||
1840 | * @hwtstamps: hardware time stamps, may be NULL if not available | ||
1841 | * | ||
1842 | * If the skb has a socket associated, then this function clones the | ||
1843 | * skb (thus sharing the actual data and optional structures), stores | ||
1844 | * the optional hardware time stamping information (if non NULL) or | ||
1845 | * generates a software time stamp (otherwise), then queues the clone | ||
1846 | * to the error queue of the socket. Errors are silently ignored. | ||
1847 | */ | ||
1848 | extern void skb_tstamp_tx(struct sk_buff *orig_skb, | ||
1849 | struct skb_shared_hwtstamps *hwtstamps); | ||
1850 | |||
1762 | extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); | 1851 | extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); |
1763 | extern __sum16 __skb_checksum_complete(struct sk_buff *skb); | 1852 | extern __sum16 __skb_checksum_complete(struct sk_buff *skb); |
1764 | 1853 | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 1e27a67df242..d20c28e839d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1672,10 +1672,21 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1672 | return 0; | 1672 | return 0; |
1673 | } | 1673 | } |
1674 | 1674 | ||
1675 | static void tstamp_tx(struct sk_buff *skb) | ||
1676 | { | ||
1677 | union skb_shared_tx *shtx = | ||
1678 | skb_tx(skb); | ||
1679 | if (unlikely(shtx->software && | ||
1680 | !shtx->in_progress)) { | ||
1681 | skb_tstamp_tx(skb, NULL); | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1675 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1685 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
1676 | struct netdev_queue *txq) | 1686 | struct netdev_queue *txq) |
1677 | { | 1687 | { |
1678 | const struct net_device_ops *ops = dev->netdev_ops; | 1688 | const struct net_device_ops *ops = dev->netdev_ops; |
1689 | int rc; | ||
1679 | 1690 | ||
1680 | prefetch(&dev->netdev_ops->ndo_start_xmit); | 1691 | prefetch(&dev->netdev_ops->ndo_start_xmit); |
1681 | if (likely(!skb->next)) { | 1692 | if (likely(!skb->next)) { |
@@ -1689,13 +1700,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1689 | goto gso; | 1700 | goto gso; |
1690 | } | 1701 | } |
1691 | 1702 | ||
1692 | return ops->ndo_start_xmit(skb, dev); | 1703 | rc = ops->ndo_start_xmit(skb, dev); |
1704 | /* | ||
1705 | * TODO: if skb_orphan() was called by | ||
1706 | * dev->hard_start_xmit() (for example, the unmodified | ||
1707 | * igb driver does that; bnx2 doesn't), then | ||
1708 | * skb_tx_software_timestamp() will be unable to send | ||
1709 | * back the time stamp. | ||
1710 | * | ||
1711 | * How can this be prevented? Always create another | ||
1712 | * reference to the socket before calling | ||
1713 | * dev->hard_start_xmit()? Prevent that skb_orphan() | ||
1714 | * does anything in dev->hard_start_xmit() by clearing | ||
1715 | * the skb destructor before the call and restoring it | ||
1716 | * afterwards, then doing the skb_orphan() ourselves? | ||
1717 | */ | ||
1718 | if (likely(!rc)) | ||
1719 | tstamp_tx(skb); | ||
1720 | return rc; | ||
1693 | } | 1721 | } |
1694 | 1722 | ||
1695 | gso: | 1723 | gso: |
1696 | do { | 1724 | do { |
1697 | struct sk_buff *nskb = skb->next; | 1725 | struct sk_buff *nskb = skb->next; |
1698 | int rc; | ||
1699 | 1726 | ||
1700 | skb->next = nskb->next; | 1727 | skb->next = nskb->next; |
1701 | nskb->next = NULL; | 1728 | nskb->next = NULL; |
@@ -1705,6 +1732,7 @@ gso: | |||
1705 | skb->next = nskb; | 1732 | skb->next = nskb; |
1706 | return rc; | 1733 | return rc; |
1707 | } | 1734 | } |
1735 | tstamp_tx(skb); | ||
1708 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) | 1736 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) |
1709 | return NETDEV_TX_BUSY; | 1737 | return NETDEV_TX_BUSY; |
1710 | } while (skb->next); | 1738 | } while (skb->next); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ab7d2e9f02fa..e5a8351ff12d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/rtnetlink.h> | 55 | #include <linux/rtnetlink.h> |
56 | #include <linux/init.h> | 56 | #include <linux/init.h> |
57 | #include <linux/scatterlist.h> | 57 | #include <linux/scatterlist.h> |
58 | #include <linux/errqueue.h> | ||
58 | 59 | ||
59 | #include <net/protocol.h> | 60 | #include <net/protocol.h> |
60 | #include <net/dst.h> | 61 | #include <net/dst.h> |
@@ -215,7 +216,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
215 | shinfo->gso_segs = 0; | 216 | shinfo->gso_segs = 0; |
216 | shinfo->gso_type = 0; | 217 | shinfo->gso_type = 0; |
217 | shinfo->ip6_frag_id = 0; | 218 | shinfo->ip6_frag_id = 0; |
219 | shinfo->tx_flags.flags = 0; | ||
218 | shinfo->frag_list = NULL; | 220 | shinfo->frag_list = NULL; |
221 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
219 | 222 | ||
220 | if (fclone) { | 223 | if (fclone) { |
221 | struct sk_buff *child = skb + 1; | 224 | struct sk_buff *child = skb + 1; |
@@ -2945,6 +2948,44 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
2945 | } | 2948 | } |
2946 | EXPORT_SYMBOL_GPL(skb_cow_data); | 2949 | EXPORT_SYMBOL_GPL(skb_cow_data); |
2947 | 2950 | ||
2951 | void skb_tstamp_tx(struct sk_buff *orig_skb, | ||
2952 | struct skb_shared_hwtstamps *hwtstamps) | ||
2953 | { | ||
2954 | struct sock *sk = orig_skb->sk; | ||
2955 | struct sock_exterr_skb *serr; | ||
2956 | struct sk_buff *skb; | ||
2957 | int err; | ||
2958 | |||
2959 | if (!sk) | ||
2960 | return; | ||
2961 | |||
2962 | skb = skb_clone(orig_skb, GFP_ATOMIC); | ||
2963 | if (!skb) | ||
2964 | return; | ||
2965 | |||
2966 | if (hwtstamps) { | ||
2967 | *skb_hwtstamps(skb) = | ||
2968 | *hwtstamps; | ||
2969 | } else { | ||
2970 | /* | ||
2971 | * no hardware time stamps available, | ||
2972 | * so keep the skb_shared_tx and only | ||
2973 | * store software time stamp | ||
2974 | */ | ||
2975 | skb->tstamp = ktime_get_real(); | ||
2976 | } | ||
2977 | |||
2978 | serr = SKB_EXT_ERR(skb); | ||
2979 | memset(serr, 0, sizeof(*serr)); | ||
2980 | serr->ee.ee_errno = ENOMSG; | ||
2981 | serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; | ||
2982 | err = sock_queue_err_skb(sk, skb); | ||
2983 | if (err) | ||
2984 | kfree_skb(skb); | ||
2985 | } | ||
2986 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); | ||
2987 | |||
2988 | |||
2948 | /** | 2989 | /** |
2949 | * skb_partial_csum_set - set up and verify partial csum values for packet | 2990 | * skb_partial_csum_set - set up and verify partial csum values for packet |
2950 | * @skb: the skb to set | 2991 | * @skb: the skb to set |