diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_txrx.c | 292 |
1 files changed, 268 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index ea4ec3760f8b..dfd7fa06ed22 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c | |||
@@ -1097,18 +1097,257 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) | |||
1097 | return failure ? budget : (int)total_rx_pkts; | 1097 | return failure ? budget : (int)total_rx_pkts; |
1098 | } | 1098 | } |
1099 | 1099 | ||
1100 | static unsigned int ice_itr_divisor(struct ice_port_info *pi) | ||
1101 | { | ||
1102 | switch (pi->phy.link_info.link_speed) { | ||
1103 | case ICE_AQ_LINK_SPEED_40GB: | ||
1104 | return ICE_ITR_ADAPTIVE_MIN_INC * 1024; | ||
1105 | case ICE_AQ_LINK_SPEED_25GB: | ||
1106 | case ICE_AQ_LINK_SPEED_20GB: | ||
1107 | return ICE_ITR_ADAPTIVE_MIN_INC * 512; | ||
1108 | case ICE_AQ_LINK_SPEED_100MB: | ||
1109 | return ICE_ITR_ADAPTIVE_MIN_INC * 32; | ||
1110 | default: | ||
1111 | return ICE_ITR_ADAPTIVE_MIN_INC * 256; | ||
1112 | } | ||
1113 | } | ||
1114 | |||
1115 | /** | ||
1116 | * ice_update_itr - update the adaptive ITR value based on statistics | ||
1117 | * @q_vector: structure containing interrupt and ring information | ||
1118 | * @rc: structure containing ring performance data | ||
1119 | * | ||
1120 | * Stores a new ITR value based on packets and byte | ||
1121 | * counts during the last interrupt. The advantage of per interrupt | ||
1122 | * computation is faster updates and more accurate ITR for the current | ||
1123 | * traffic pattern. Constants in this function were computed | ||
1124 | * based on theoretical maximum wire speed and thresholds were set based | ||
1125 | * on testing data as well as attempting to minimize response time | ||
1126 | * while increasing bulk throughput. | ||
1127 | */ | ||
1128 | static void | ||
1129 | ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) | ||
1130 | { | ||
1131 | unsigned int avg_wire_size, packets, bytes, itr; | ||
1132 | unsigned long next_update = jiffies; | ||
1133 | bool container_is_rx; | ||
1134 | |||
1135 | if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) | ||
1136 | return; | ||
1137 | |||
1138 | /* If itr_countdown is set it means we programmed an ITR within | ||
1139 | * the last 4 interrupt cycles. This has a side effect of us | ||
1140 | * potentially firing an early interrupt. In order to work around | ||
1141 | * this we need to throw out any data received for a few | ||
1142 | * interrupts following the update. | ||
1143 | */ | ||
1144 | if (q_vector->itr_countdown) { | ||
1145 | itr = rc->target_itr; | ||
1146 | goto clear_counts; | ||
1147 | } | ||
1148 | |||
1149 | container_is_rx = (&q_vector->rx == rc); | ||
1150 | /* For Rx we want to push the delay up and default to low latency. | ||
1151 | * for Tx we want to pull the delay down and default to high latency. | ||
1152 | */ | ||
1153 | itr = container_is_rx ? | ||
1154 | ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : | ||
1155 | ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; | ||
1156 | |||
1157 | /* If we didn't update within up to 1 - 2 jiffies we can assume | ||
1158 | * that either packets are coming in so slow there hasn't been | ||
1159 | * any work, or that there is so much work that NAPI is dealing | ||
1160 | * with interrupt moderation and we don't need to do anything. | ||
1161 | */ | ||
1162 | if (time_after(next_update, rc->next_update)) | ||
1163 | goto clear_counts; | ||
1164 | |||
1165 | packets = rc->total_pkts; | ||
1166 | bytes = rc->total_bytes; | ||
1167 | |||
1168 | if (container_is_rx) { | ||
1169 | /* If Rx there are 1 to 4 packets and bytes are less than | ||
1170 | * 9000 assume insufficient data to use bulk rate limiting | ||
1171 | * approach unless Tx is already in bulk rate limiting. We | ||
1172 | * are likely latency driven. | ||
1173 | */ | ||
1174 | if (packets && packets < 4 && bytes < 9000 && | ||
1175 | (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { | ||
1176 | itr = ICE_ITR_ADAPTIVE_LATENCY; | ||
1177 | goto adjust_by_size; | ||
1178 | } | ||
1179 | } else if (packets < 4) { | ||
1180 | /* If we have Tx and Rx ITR maxed and Tx ITR is running in | ||
1181 | * bulk mode and we are receiving 4 or fewer packets just | ||
1182 | * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so | ||
1183 | * that the Rx can relax. | ||
1184 | */ | ||
1185 | if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && | ||
1186 | (q_vector->rx.target_itr & ICE_ITR_MASK) == | ||
1187 | ICE_ITR_ADAPTIVE_MAX_USECS) | ||
1188 | goto clear_counts; | ||
1189 | } else if (packets > 32) { | ||
1190 | /* If we have processed over 32 packets in a single interrupt | ||
1191 | * for Tx assume we need to switch over to "bulk" mode. | ||
1192 | */ | ||
1193 | rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; | ||
1194 | } | ||
1195 | |||
1196 | /* We have no packets to actually measure against. This means | ||
1197 | * either one of the other queues on this vector is active or | ||
1198 | * we are a Tx queue doing TSO with too high of an interrupt rate. | ||
1199 | * | ||
1200 | * Between 4 and 56 we can assume that our current interrupt delay | ||
1201 | * is only slightly too low. As such we should increase it by a small | ||
1202 | * fixed amount. | ||
1203 | */ | ||
1204 | if (packets < 56) { | ||
1205 | itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; | ||
1206 | if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { | ||
1207 | itr &= ICE_ITR_ADAPTIVE_LATENCY; | ||
1208 | itr += ICE_ITR_ADAPTIVE_MAX_USECS; | ||
1209 | } | ||
1210 | goto clear_counts; | ||
1211 | } | ||
1212 | |||
1213 | if (packets <= 256) { | ||
1214 | itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); | ||
1215 | itr &= ICE_ITR_MASK; | ||
1216 | |||
1217 | /* Between 56 and 112 is our "goldilocks" zone where we are | ||
1218 | * working out "just right". Just report that our current | ||
1219 | * ITR is good for us. | ||
1220 | */ | ||
1221 | if (packets <= 112) | ||
1222 | goto clear_counts; | ||
1223 | |||
1224 | /* If packet count is 128 or greater we are likely looking | ||
1225 | * at a slight overrun of the delay we want. Try halving | ||
1226 | * our delay to see if that will cut the number of packets | ||
1227 | * in half per interrupt. | ||
1228 | */ | ||
1229 | itr >>= 1; | ||
1230 | itr &= ICE_ITR_MASK; | ||
1231 | if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) | ||
1232 | itr = ICE_ITR_ADAPTIVE_MIN_USECS; | ||
1233 | |||
1234 | goto clear_counts; | ||
1235 | } | ||
1236 | |||
1237 | /* The paths below assume we are dealing with a bulk ITR since | ||
1238 | * number of packets is greater than 256. We are just going to have | ||
1239 | * to compute a value and try to bring the count under control, | ||
1240 | * though for smaller packet sizes there isn't much we can do as | ||
1241 | * NAPI polling will likely be kicking in sooner rather than later. | ||
1242 | */ | ||
1243 | itr = ICE_ITR_ADAPTIVE_BULK; | ||
1244 | |||
1245 | adjust_by_size: | ||
1246 | /* If packet counts are 256 or greater we can assume we have a gross | ||
1247 | * overestimation of what the rate should be. Instead of trying to fine | ||
1248 | * tune it just use the formula below to try and dial in an exact value | ||
1249 | * gives the current packet size of the frame. | ||
1250 | */ | ||
1251 | avg_wire_size = bytes / packets; | ||
1252 | |||
1253 | /* The following is a crude approximation of: | ||
1254 | * wmem_default / (size + overhead) = desired_pkts_per_int | ||
1255 | * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate | ||
1256 | * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value | ||
1257 | * | ||
1258 | * Assuming wmem_default is 212992 and overhead is 640 bytes per | ||
1259 | * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the | ||
1260 | * formula down to | ||
1261 | * | ||
1262 | * (170 * (size + 24)) / (size + 640) = ITR | ||
1263 | * | ||
1264 | * We first do some math on the packet size and then finally bitshift | ||
1265 | * by 8 after rounding up. We also have to account for PCIe link speed | ||
1266 | * difference as ITR scales based on this. | ||
1267 | */ | ||
1268 | if (avg_wire_size <= 60) { | ||
1269 | /* Start at 250k ints/sec */ | ||
1270 | avg_wire_size = 4096; | ||
1271 | } else if (avg_wire_size <= 380) { | ||
1272 | /* 250K ints/sec to 60K ints/sec */ | ||
1273 | avg_wire_size *= 40; | ||
1274 | avg_wire_size += 1696; | ||
1275 | } else if (avg_wire_size <= 1084) { | ||
1276 | /* 60K ints/sec to 36K ints/sec */ | ||
1277 | avg_wire_size *= 15; | ||
1278 | avg_wire_size += 11452; | ||
1279 | } else if (avg_wire_size <= 1980) { | ||
1280 | /* 36K ints/sec to 30K ints/sec */ | ||
1281 | avg_wire_size *= 5; | ||
1282 | avg_wire_size += 22420; | ||
1283 | } else { | ||
1284 | /* plateau at a limit of 30K ints/sec */ | ||
1285 | avg_wire_size = 32256; | ||
1286 | } | ||
1287 | |||
1288 | /* If we are in low latency mode halve our delay which doubles the | ||
1289 | * rate to somewhere between 100K to 16K ints/sec | ||
1290 | */ | ||
1291 | if (itr & ICE_ITR_ADAPTIVE_LATENCY) | ||
1292 | avg_wire_size >>= 1; | ||
1293 | |||
1294 | /* Resultant value is 256 times larger than it needs to be. This | ||
1295 | * gives us room to adjust the value as needed to either increase | ||
1296 | * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. | ||
1297 | * | ||
1298 | * Use addition as we have already recorded the new latency flag | ||
1299 | * for the ITR value. | ||
1300 | */ | ||
1301 | itr += DIV_ROUND_UP(avg_wire_size, | ||
1302 | ice_itr_divisor(q_vector->vsi->port_info)) * | ||
1303 | ICE_ITR_ADAPTIVE_MIN_INC; | ||
1304 | |||
1305 | if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { | ||
1306 | itr &= ICE_ITR_ADAPTIVE_LATENCY; | ||
1307 | itr += ICE_ITR_ADAPTIVE_MAX_USECS; | ||
1308 | } | ||
1309 | |||
1310 | clear_counts: | ||
1311 | /* write back value */ | ||
1312 | rc->target_itr = itr; | ||
1313 | |||
1314 | /* next update should occur within next jiffy */ | ||
1315 | rc->next_update = next_update + 1; | ||
1316 | |||
1317 | rc->total_bytes = 0; | ||
1318 | rc->total_pkts = 0; | ||
1319 | } | ||
1320 | |||
1100 | /** | 1321 | /** |
1101 | * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register | 1322 | * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register |
1102 | * @itr_idx: interrupt throttling index | 1323 | * @itr_idx: interrupt throttling index |
1103 | * @reg_itr: interrupt throttling value adjusted based on ITR granularity | 1324 | * @itr: interrupt throttling value in usecs |
1104 | */ | 1325 | */ |
1105 | static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) | 1326 | static u32 ice_buildreg_itr(int itr_idx, u16 itr) |
1106 | { | 1327 | { |
1328 | /* The itr value is reported in microseconds, and the register value is | ||
1329 | * recorded in 2 microsecond units. For this reason we only need to | ||
1330 | * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this | ||
1331 | * granularity as a shift instead of division. The mask makes sure the | ||
1332 | * ITR value is never odd so we don't accidentally write into the field | ||
1333 | * prior to the ITR field. | ||
1334 | */ | ||
1335 | itr &= ICE_ITR_MASK; | ||
1336 | |||
1107 | return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | | 1337 | return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | |
1108 | (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | | 1338 | (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | |
1109 | (reg_itr << GLINT_DYN_CTL_INTERVAL_S); | 1339 | (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); |
1110 | } | 1340 | } |
1111 | 1341 | ||
1342 | /* The act of updating the ITR will cause it to immediately trigger. In order | ||
1343 | * to prevent this from throwing off adaptive update statistics we defer the | ||
1344 | * update so that it can only happen so often. So after either Tx or Rx are | ||
1345 | * updated we make the adaptive scheme wait until either the ITR completely | ||
1346 | * expires via the next_update expiration or we have been through at least | ||
1347 | * 3 interrupts. | ||
1348 | */ | ||
1349 | #define ITR_COUNTDOWN_START 3 | ||
1350 | |||
1112 | /** | 1351 | /** |
1113 | * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt | 1352 | * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt |
1114 | * @vsi: the VSI associated with the q_vector | 1353 | * @vsi: the VSI associated with the q_vector |
@@ -1117,10 +1356,14 @@ static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) | |||
1117 | static void | 1356 | static void |
1118 | ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) | 1357 | ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) |
1119 | { | 1358 | { |
1120 | struct ice_hw *hw = &vsi->back->hw; | 1359 | struct ice_ring_container *tx = &q_vector->tx; |
1121 | struct ice_ring_container *rc; | 1360 | struct ice_ring_container *rx = &q_vector->rx; |
1122 | u32 itr_val; | 1361 | u32 itr_val; |
1123 | 1362 | ||
1363 | /* This will do nothing if dynamic updates are not enabled */ | ||
1364 | ice_update_itr(q_vector, tx); | ||
1365 | ice_update_itr(q_vector, rx); | ||
1366 | |||
1124 | /* This block of logic allows us to get away with only updating | 1367 | /* This block of logic allows us to get away with only updating |
1125 | * one ITR value with each interrupt. The idea is to perform a | 1368 | * one ITR value with each interrupt. The idea is to perform a |
1126 | * pseudo-lazy update with the following criteria. | 1369 | * pseudo-lazy update with the following criteria. |
@@ -1129,35 +1372,36 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) | |||
1129 | * 2. If we must reduce an ITR that is given highest priority. | 1372 | * 2. If we must reduce an ITR that is given highest priority. |
1130 | * 3. We then give priority to increasing ITR based on amount. | 1373 | * 3. We then give priority to increasing ITR based on amount. |
1131 | */ | 1374 | */ |
1132 | if (q_vector->rx.target_itr < q_vector->rx.current_itr) { | 1375 | if (rx->target_itr < rx->current_itr) { |
1133 | rc = &q_vector->rx; | ||
1134 | /* Rx ITR needs to be reduced, this is highest priority */ | 1376 | /* Rx ITR needs to be reduced, this is highest priority */ |
1135 | itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); | 1377 | itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); |
1136 | rc->current_itr = rc->target_itr; | 1378 | rx->current_itr = rx->target_itr; |
1137 | } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || | 1379 | q_vector->itr_countdown = ITR_COUNTDOWN_START; |
1138 | ((q_vector->rx.target_itr - q_vector->rx.current_itr) < | 1380 | } else if ((tx->target_itr < tx->current_itr) || |
1139 | (q_vector->tx.target_itr - q_vector->tx.current_itr))) { | 1381 | ((rx->target_itr - rx->current_itr) < |
1140 | rc = &q_vector->tx; | 1382 | (tx->target_itr - tx->current_itr))) { |
1141 | /* Tx ITR needs to be reduced, this is second priority | 1383 | /* Tx ITR needs to be reduced, this is second priority |
1142 | * Tx ITR needs to be increased more than Rx, fourth priority | 1384 | * Tx ITR needs to be increased more than Rx, fourth priority |
1143 | */ | 1385 | */ |
1144 | itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); | 1386 | itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); |
1145 | rc->current_itr = rc->target_itr; | 1387 | tx->current_itr = tx->target_itr; |
1146 | } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { | 1388 | q_vector->itr_countdown = ITR_COUNTDOWN_START; |
1147 | rc = &q_vector->rx; | 1389 | } else if (rx->current_itr != rx->target_itr) { |
1148 | /* Rx ITR needs to be increased, third priority */ | 1390 | /* Rx ITR needs to be increased, third priority */ |
1149 | itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); | 1391 | itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); |
1150 | rc->current_itr = rc->target_itr; | 1392 | rx->current_itr = rx->target_itr; |
1393 | q_vector->itr_countdown = ITR_COUNTDOWN_START; | ||
1151 | } else { | 1394 | } else { |
1152 | /* Still have to re-enable the interrupts */ | 1395 | /* Still have to re-enable the interrupts */ |
1153 | itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); | 1396 | itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); |
1397 | if (q_vector->itr_countdown) | ||
1398 | q_vector->itr_countdown--; | ||
1154 | } | 1399 | } |
1155 | 1400 | ||
1156 | if (!test_bit(__ICE_DOWN, vsi->state)) { | 1401 | if (!test_bit(__ICE_DOWN, vsi->state)) |
1157 | int vector = vsi->hw_base_vector + q_vector->v_idx; | 1402 | wr32(&vsi->back->hw, |
1158 | 1403 | GLINT_DYN_CTL(vsi->hw_base_vector + q_vector->v_idx), | |
1159 | wr32(hw, GLINT_DYN_CTL(vector), itr_val); | 1404 | itr_val); |
1160 | } | ||
1161 | } | 1405 | } |
1162 | 1406 | ||
1163 | /** | 1407 | /** |