aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/ice/ice_txrx.c
diff options
context:
space:
mode:
authorAnirudh Venkataramanan <anirudh.venkataramanan@intel.com>2019-02-19 18:04:01 -0500
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2019-03-26 17:03:01 -0400
commit64a59d05a4b3ddb37eb5ad3a3be0f17148f449f5 (patch)
treebe2b1cbfa0d984412eb9fc1a265e72df1aa75f55 /drivers/net/ethernet/intel/ice/ice_txrx.c
parent5995b6d0c6fcdb9b29ef9339c5beeb6e02aae737 (diff)
ice: Fix for adaptive interrupt moderation
commit 63f545ed1285 ("ice: Add support for adaptive interrupt moderation") was meant to add support for adaptive interrupt moderation but there was an error on my part while formatting the patch, and thus only part of the patch ended up being submitted. This patch rectifies the error by adding the rest of the code. Fixes: 63f545ed1285 ("ice: Add support for adaptive interrupt moderation") Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c292
1 files changed, 268 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ea4ec3760f8b..dfd7fa06ed22 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1097,18 +1097,257 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
1097 return failure ? budget : (int)total_rx_pkts; 1097 return failure ? budget : (int)total_rx_pkts;
1098} 1098}
1099 1099
1100static unsigned int ice_itr_divisor(struct ice_port_info *pi)
1101{
1102 switch (pi->phy.link_info.link_speed) {
1103 case ICE_AQ_LINK_SPEED_40GB:
1104 return ICE_ITR_ADAPTIVE_MIN_INC * 1024;
1105 case ICE_AQ_LINK_SPEED_25GB:
1106 case ICE_AQ_LINK_SPEED_20GB:
1107 return ICE_ITR_ADAPTIVE_MIN_INC * 512;
1108 case ICE_AQ_LINK_SPEED_100MB:
1109 return ICE_ITR_ADAPTIVE_MIN_INC * 32;
1110 default:
1111 return ICE_ITR_ADAPTIVE_MIN_INC * 256;
1112 }
1113}
1114
1115/**
1116 * ice_update_itr - update the adaptive ITR value based on statistics
1117 * @q_vector: structure containing interrupt and ring information
1118 * @rc: structure containing ring performance data
1119 *
1120 * Stores a new ITR value based on packets and byte
1121 * counts during the last interrupt. The advantage of per interrupt
1122 * computation is faster updates and more accurate ITR for the current
1123 * traffic pattern. Constants in this function were computed
1124 * based on theoretical maximum wire speed and thresholds were set based
1125 * on testing data as well as attempting to minimize response time
1126 * while increasing bulk throughput.
1127 */
1128static void
1129ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
1130{
1131 unsigned int avg_wire_size, packets, bytes, itr;
1132 unsigned long next_update = jiffies;
1133 bool container_is_rx;
1134
1135 if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
1136 return;
1137
1138 /* If itr_countdown is set it means we programmed an ITR within
1139 * the last 4 interrupt cycles. This has a side effect of us
1140 * potentially firing an early interrupt. In order to work around
1141 * this we need to throw out any data received for a few
1142 * interrupts following the update.
1143 */
1144 if (q_vector->itr_countdown) {
1145 itr = rc->target_itr;
1146 goto clear_counts;
1147 }
1148
1149 container_is_rx = (&q_vector->rx == rc);
1150 /* For Rx we want to push the delay up and default to low latency.
1151 * for Tx we want to pull the delay down and default to high latency.
1152 */
1153 itr = container_is_rx ?
1154 ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY :
1155 ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY;
1156
1157 /* If we didn't update within up to 1 - 2 jiffies we can assume
1158 * that either packets are coming in so slow there hasn't been
1159 * any work, or that there is so much work that NAPI is dealing
1160 * with interrupt moderation and we don't need to do anything.
1161 */
1162 if (time_after(next_update, rc->next_update))
1163 goto clear_counts;
1164
1165 packets = rc->total_pkts;
1166 bytes = rc->total_bytes;
1167
1168 if (container_is_rx) {
1169 /* If Rx there are 1 to 4 packets and bytes are less than
1170 * 9000 assume insufficient data to use bulk rate limiting
1171 * approach unless Tx is already in bulk rate limiting. We
1172 * are likely latency driven.
1173 */
1174 if (packets && packets < 4 && bytes < 9000 &&
1175 (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
1176 itr = ICE_ITR_ADAPTIVE_LATENCY;
1177 goto adjust_by_size;
1178 }
1179 } else if (packets < 4) {
1180 /* If we have Tx and Rx ITR maxed and Tx ITR is running in
1181 * bulk mode and we are receiving 4 or fewer packets just
1182 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
1183 * that the Rx can relax.
1184 */
1185 if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS &&
1186 (q_vector->rx.target_itr & ICE_ITR_MASK) ==
1187 ICE_ITR_ADAPTIVE_MAX_USECS)
1188 goto clear_counts;
1189 } else if (packets > 32) {
1190 /* If we have processed over 32 packets in a single interrupt
1191 * for Tx assume we need to switch over to "bulk" mode.
1192 */
1193 rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY;
1194 }
1195
1196 /* We have no packets to actually measure against. This means
1197 * either one of the other queues on this vector is active or
1198 * we are a Tx queue doing TSO with too high of an interrupt rate.
1199 *
1200 * Between 4 and 56 we can assume that our current interrupt delay
1201 * is only slightly too low. As such we should increase it by a small
1202 * fixed amount.
1203 */
1204 if (packets < 56) {
1205 itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC;
1206 if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
1207 itr &= ICE_ITR_ADAPTIVE_LATENCY;
1208 itr += ICE_ITR_ADAPTIVE_MAX_USECS;
1209 }
1210 goto clear_counts;
1211 }
1212
1213 if (packets <= 256) {
1214 itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
1215 itr &= ICE_ITR_MASK;
1216
1217 /* Between 56 and 112 is our "goldilocks" zone where we are
1218 * working out "just right". Just report that our current
1219 * ITR is good for us.
1220 */
1221 if (packets <= 112)
1222 goto clear_counts;
1223
1224 /* If packet count is 128 or greater we are likely looking
1225 * at a slight overrun of the delay we want. Try halving
1226 * our delay to see if that will cut the number of packets
1227 * in half per interrupt.
1228 */
1229 itr >>= 1;
1230 itr &= ICE_ITR_MASK;
1231 if (itr < ICE_ITR_ADAPTIVE_MIN_USECS)
1232 itr = ICE_ITR_ADAPTIVE_MIN_USECS;
1233
1234 goto clear_counts;
1235 }
1236
1237 /* The paths below assume we are dealing with a bulk ITR since
1238 * number of packets is greater than 256. We are just going to have
1239 * to compute a value and try to bring the count under control,
1240 * though for smaller packet sizes there isn't much we can do as
1241 * NAPI polling will likely be kicking in sooner rather than later.
1242 */
1243 itr = ICE_ITR_ADAPTIVE_BULK;
1244
1245adjust_by_size:
1246 /* If packet counts are 256 or greater we can assume we have a gross
1247 * overestimation of what the rate should be. Instead of trying to fine
1248 * tune it just use the formula below to try and dial in an exact value
1249 * gives the current packet size of the frame.
1250 */
1251 avg_wire_size = bytes / packets;
1252
1253 /* The following is a crude approximation of:
1254 * wmem_default / (size + overhead) = desired_pkts_per_int
1255 * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
1256 * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
1257 *
1258 * Assuming wmem_default is 212992 and overhead is 640 bytes per
1259 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
1260 * formula down to
1261 *
1262 * (170 * (size + 24)) / (size + 640) = ITR
1263 *
1264 * We first do some math on the packet size and then finally bitshift
1265 * by 8 after rounding up. We also have to account for PCIe link speed
1266 * difference as ITR scales based on this.
1267 */
1268 if (avg_wire_size <= 60) {
1269 /* Start at 250k ints/sec */
1270 avg_wire_size = 4096;
1271 } else if (avg_wire_size <= 380) {
1272 /* 250K ints/sec to 60K ints/sec */
1273 avg_wire_size *= 40;
1274 avg_wire_size += 1696;
1275 } else if (avg_wire_size <= 1084) {
1276 /* 60K ints/sec to 36K ints/sec */
1277 avg_wire_size *= 15;
1278 avg_wire_size += 11452;
1279 } else if (avg_wire_size <= 1980) {
1280 /* 36K ints/sec to 30K ints/sec */
1281 avg_wire_size *= 5;
1282 avg_wire_size += 22420;
1283 } else {
1284 /* plateau at a limit of 30K ints/sec */
1285 avg_wire_size = 32256;
1286 }
1287
1288 /* If we are in low latency mode halve our delay which doubles the
1289 * rate to somewhere between 100K to 16K ints/sec
1290 */
1291 if (itr & ICE_ITR_ADAPTIVE_LATENCY)
1292 avg_wire_size >>= 1;
1293
1294 /* Resultant value is 256 times larger than it needs to be. This
1295 * gives us room to adjust the value as needed to either increase
1296 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
1297 *
1298 * Use addition as we have already recorded the new latency flag
1299 * for the ITR value.
1300 */
1301 itr += DIV_ROUND_UP(avg_wire_size,
1302 ice_itr_divisor(q_vector->vsi->port_info)) *
1303 ICE_ITR_ADAPTIVE_MIN_INC;
1304
1305 if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
1306 itr &= ICE_ITR_ADAPTIVE_LATENCY;
1307 itr += ICE_ITR_ADAPTIVE_MAX_USECS;
1308 }
1309
1310clear_counts:
1311 /* write back value */
1312 rc->target_itr = itr;
1313
1314 /* next update should occur within next jiffy */
1315 rc->next_update = next_update + 1;
1316
1317 rc->total_bytes = 0;
1318 rc->total_pkts = 0;
1319}
1320
1100/** 1321/**
1101 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register 1322 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
1102 * @itr_idx: interrupt throttling index 1323 * @itr_idx: interrupt throttling index
1103 * @reg_itr: interrupt throttling value adjusted based on ITR granularity 1324 * @itr: interrupt throttling value in usecs
1104 */ 1325 */
1105static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) 1326static u32 ice_buildreg_itr(int itr_idx, u16 itr)
1106{ 1327{
1328 /* The itr value is reported in microseconds, and the register value is
1329 * recorded in 2 microsecond units. For this reason we only need to
1330 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
1331 * granularity as a shift instead of division. The mask makes sure the
1332 * ITR value is never odd so we don't accidentally write into the field
1333 * prior to the ITR field.
1334 */
1335 itr &= ICE_ITR_MASK;
1336
1107 return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | 1337 return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
1108 (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | 1338 (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
1109 (reg_itr << GLINT_DYN_CTL_INTERVAL_S); 1339 (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
1110} 1340}
1111 1341
1342/* The act of updating the ITR will cause it to immediately trigger. In order
1343 * to prevent this from throwing off adaptive update statistics we defer the
1344 * update so that it can only happen so often. So after either Tx or Rx are
1345 * updated we make the adaptive scheme wait until either the ITR completely
1346 * expires via the next_update expiration or we have been through at least
1347 * 3 interrupts.
1348 */
1349#define ITR_COUNTDOWN_START 3
1350
1112/** 1351/**
1113 * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt 1352 * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
1114 * @vsi: the VSI associated with the q_vector 1353 * @vsi: the VSI associated with the q_vector
@@ -1117,10 +1356,14 @@ static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr)
1117static void 1356static void
1118ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) 1357ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
1119{ 1358{
1120 struct ice_hw *hw = &vsi->back->hw; 1359 struct ice_ring_container *tx = &q_vector->tx;
1121 struct ice_ring_container *rc; 1360 struct ice_ring_container *rx = &q_vector->rx;
1122 u32 itr_val; 1361 u32 itr_val;
1123 1362
1363 /* This will do nothing if dynamic updates are not enabled */
1364 ice_update_itr(q_vector, tx);
1365 ice_update_itr(q_vector, rx);
1366
1124 /* This block of logic allows us to get away with only updating 1367 /* This block of logic allows us to get away with only updating
1125 * one ITR value with each interrupt. The idea is to perform a 1368 * one ITR value with each interrupt. The idea is to perform a
1126 * pseudo-lazy update with the following criteria. 1369 * pseudo-lazy update with the following criteria.
@@ -1129,35 +1372,36 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
1129 * 2. If we must reduce an ITR that is given highest priority. 1372 * 2. If we must reduce an ITR that is given highest priority.
1130 * 3. We then give priority to increasing ITR based on amount. 1373 * 3. We then give priority to increasing ITR based on amount.
1131 */ 1374 */
1132 if (q_vector->rx.target_itr < q_vector->rx.current_itr) { 1375 if (rx->target_itr < rx->current_itr) {
1133 rc = &q_vector->rx;
1134 /* Rx ITR needs to be reduced, this is highest priority */ 1376 /* Rx ITR needs to be reduced, this is highest priority */
1135 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); 1377 itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
1136 rc->current_itr = rc->target_itr; 1378 rx->current_itr = rx->target_itr;
1137 } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || 1379 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1138 ((q_vector->rx.target_itr - q_vector->rx.current_itr) < 1380 } else if ((tx->target_itr < tx->current_itr) ||
1139 (q_vector->tx.target_itr - q_vector->tx.current_itr))) { 1381 ((rx->target_itr - rx->current_itr) <
1140 rc = &q_vector->tx; 1382 (tx->target_itr - tx->current_itr))) {
1141 /* Tx ITR needs to be reduced, this is second priority 1383 /* Tx ITR needs to be reduced, this is second priority
1142 * Tx ITR needs to be increased more than Rx, fourth priority 1384 * Tx ITR needs to be increased more than Rx, fourth priority
1143 */ 1385 */
1144 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); 1386 itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr);
1145 rc->current_itr = rc->target_itr; 1387 tx->current_itr = tx->target_itr;
1146 } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { 1388 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1147 rc = &q_vector->rx; 1389 } else if (rx->current_itr != rx->target_itr) {
1148 /* Rx ITR needs to be increased, third priority */ 1390 /* Rx ITR needs to be increased, third priority */
1149 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); 1391 itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
1150 rc->current_itr = rc->target_itr; 1392 rx->current_itr = rx->target_itr;
1393 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1151 } else { 1394 } else {
1152 /* Still have to re-enable the interrupts */ 1395 /* Still have to re-enable the interrupts */
1153 itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); 1396 itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
1397 if (q_vector->itr_countdown)
1398 q_vector->itr_countdown--;
1154 } 1399 }
1155 1400
1156 if (!test_bit(__ICE_DOWN, vsi->state)) { 1401 if (!test_bit(__ICE_DOWN, vsi->state))
1157 int vector = vsi->hw_base_vector + q_vector->v_idx; 1402 wr32(&vsi->back->hw,
1158 1403 GLINT_DYN_CTL(vsi->hw_base_vector + q_vector->v_idx),
1159 wr32(hw, GLINT_DYN_CTL(vector), itr_val); 1404 itr_val);
1160 }
1161} 1405}
1162 1406
1163/** 1407/**