aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJacob Keller <jacob.e.keller@intel.com>2018-08-09 09:28:54 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2018-08-23 12:49:16 -0400
commitf8ba7db850350319348b6d3c276f8ba19bc098ef (patch)
treec716336f2c95ea24de2368fdafff260a89cba744
parent5ab522443bd1dafa9e32d6f4b029128efda072de (diff)
ice: Report stats for allocated queues via ethtool stats
It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c52
2 files changed, 46 insertions, 13 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index d8b5fff581e7..ed071ea75f20 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -89,6 +89,13 @@ extern const char ice_drv_ver[];
89#define ice_for_each_rxq(vsi, i) \ 89#define ice_for_each_rxq(vsi, i) \
90 for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) 90 for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
91 91
92/* Macros for each allocated tx/rx ring whether used or not in a VSI */
93#define ice_for_each_alloc_txq(vsi, i) \
94 for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
95
96#define ice_for_each_alloc_rxq(vsi, i) \
97 for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
98
92struct ice_tc_info { 99struct ice_tc_info {
93 u16 qoffset; 100 u16 qoffset;
94 u16 qcount; 101 u16 qcount;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1db304c01d10..c71a9b528d6d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -26,7 +26,7 @@ static int ice_q_stats_len(struct net_device *netdev)
26{ 26{
27 struct ice_netdev_priv *np = netdev_priv(netdev); 27 struct ice_netdev_priv *np = netdev_priv(netdev);
28 28
29 return ((np->vsi->num_txq + np->vsi->num_rxq) * 29 return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
30 (sizeof(struct ice_q_stats) / sizeof(u64))); 30 (sizeof(struct ice_q_stats) / sizeof(u64)));
31} 31}
32 32
@@ -218,7 +218,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
218 p += ETH_GSTRING_LEN; 218 p += ETH_GSTRING_LEN;
219 } 219 }
220 220
221 ice_for_each_txq(vsi, i) { 221 ice_for_each_alloc_txq(vsi, i) {
222 snprintf(p, ETH_GSTRING_LEN, 222 snprintf(p, ETH_GSTRING_LEN,
223 "tx-queue-%u.tx_packets", i); 223 "tx-queue-%u.tx_packets", i);
224 p += ETH_GSTRING_LEN; 224 p += ETH_GSTRING_LEN;
@@ -226,7 +226,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
226 p += ETH_GSTRING_LEN; 226 p += ETH_GSTRING_LEN;
227 } 227 }
228 228
229 ice_for_each_rxq(vsi, i) { 229 ice_for_each_alloc_rxq(vsi, i) {
230 snprintf(p, ETH_GSTRING_LEN, 230 snprintf(p, ETH_GSTRING_LEN,
231 "rx-queue-%u.rx_packets", i); 231 "rx-queue-%u.rx_packets", i);
232 p += ETH_GSTRING_LEN; 232 p += ETH_GSTRING_LEN;
@@ -253,6 +253,24 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
253{ 253{
254 switch (sset) { 254 switch (sset) {
255 case ETH_SS_STATS: 255 case ETH_SS_STATS:
256 /* The number (and order) of strings reported *must* remain
257 * constant for a given netdevice. This function must not
258 * report a different number based on run time parameters
259 * (such as the number of queues in use, or the setting of
260 * a private ethtool flag). This is due to the nature of the
261 * ethtool stats API.
262 *
263 * User space programs such as ethtool must make 3 separate
264 * ioctl requests, one for size, one for the strings, and
265 * finally one for the stats. Since these cross into
266 * user space, changes to the number or size could result in
267 * undefined memory access or incorrect string<->value
268 * correlations for statistics.
269 *
270 * Even if it appears to be safe, changes to the size or
271 * order of strings will suffer from race conditions and are
272 * not safe.
273 */
256 return ICE_ALL_STATS_LEN(netdev); 274 return ICE_ALL_STATS_LEN(netdev);
257 default: 275 default:
258 return -EOPNOTSUPP; 276 return -EOPNOTSUPP;
@@ -280,18 +298,26 @@ ice_get_ethtool_stats(struct net_device *netdev,
280 /* populate per queue stats */ 298 /* populate per queue stats */
281 rcu_read_lock(); 299 rcu_read_lock();
282 300
283 ice_for_each_txq(vsi, j) { 301 ice_for_each_alloc_txq(vsi, j) {
284 ring = READ_ONCE(vsi->tx_rings[j]); 302 ring = READ_ONCE(vsi->tx_rings[j]);
285 if (!ring) 303 if (ring) {
286 continue; 304 data[i++] = ring->stats.pkts;
287 data[i++] = ring->stats.pkts; 305 data[i++] = ring->stats.bytes;
288 data[i++] = ring->stats.bytes; 306 } else {
307 data[i++] = 0;
308 data[i++] = 0;
309 }
289 } 310 }
290 311
291 ice_for_each_rxq(vsi, j) { 312 ice_for_each_alloc_rxq(vsi, j) {
292 ring = READ_ONCE(vsi->rx_rings[j]); 313 ring = READ_ONCE(vsi->rx_rings[j]);
293 data[i++] = ring->stats.pkts; 314 if (ring) {
294 data[i++] = ring->stats.bytes; 315 data[i++] = ring->stats.pkts;
316 data[i++] = ring->stats.bytes;
317 } else {
318 data[i++] = 0;
319 data[i++] = 0;
320 }
295 } 321 }
296 322
297 rcu_read_unlock(); 323 rcu_read_unlock();
@@ -519,7 +545,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
519 goto done; 545 goto done;
520 } 546 }
521 547
522 for (i = 0; i < vsi->num_txq; i++) { 548 for (i = 0; i < vsi->alloc_txq; i++) {
523 /* clone ring and setup updated count */ 549 /* clone ring and setup updated count */
524 tx_rings[i] = *vsi->tx_rings[i]; 550 tx_rings[i] = *vsi->tx_rings[i];
525 tx_rings[i].count = new_tx_cnt; 551 tx_rings[i].count = new_tx_cnt;
@@ -551,7 +577,7 @@ process_rx:
551 goto done; 577 goto done;
552 } 578 }
553 579
554 for (i = 0; i < vsi->num_rxq; i++) { 580 for (i = 0; i < vsi->alloc_rxq; i++) {
555 /* clone ring and setup updated count */ 581 /* clone ring and setup updated count */
556 rx_rings[i] = *vsi->rx_rings[i]; 582 rx_rings[i] = *vsi->rx_rings[i];
557 rx_rings[i].count = new_rx_cnt; 583 rx_rings[i].count = new_rx_cnt;