diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2011-01-06 09:29:57 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-01-10 02:44:11 -0500 |
commit | 905e4a4163c4e807daf1f1f6b8f958e762a834a8 (patch) | |
tree | 4f2a71c7dc5255a1dfb9f4063b94a920a3ada135 /drivers/net/ixgbe/ixgbe_82599.c | |
parent | 2d39d576fad0fd4bb79a0de26fca50a4be1ffdc1 (diff) |
ixgbe: cleanup flow director hash computation to improve performance
This change cleans up the layout of the flow director data, and the
algorithm used to calculate the hash resulting in a 35x / 3500% performance
increase versus the old flow director hash computation. The overall effect
is only a 1% increase in transactions per second though due to the fact
that only 1 packet in 20 are actually hashed upon.
TCP_RR before:
Socket Size Request Resp. Elapsed Trans.
Send Recv Size Size Time Rate
bytes Bytes bytes bytes secs. per sec
16384 87380 1 1 60.00 23059.27
16384 87380
TCP_RR after:
Socket Size Request Resp. Elapsed Trans.
Send Recv Size Size Time Rate
bytes Bytes bytes bytes secs. per sec
16384 87380 1 1 60.00 23239.98
16384 87380
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ixgbe/ixgbe_82599.c')
-rw-r--r-- | drivers/net/ixgbe/ixgbe_82599.c | 335 |
1 files changed, 126 insertions, 209 deletions
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c index bfd3c227cd4a..40aa3c29dc1d 100644 --- a/drivers/net/ixgbe/ixgbe_82599.c +++ b/drivers/net/ixgbe/ixgbe_82599.c | |||
@@ -1003,7 +1003,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) | |||
1003 | udelay(10); | 1003 | udelay(10); |
1004 | } | 1004 | } |
1005 | if (i >= IXGBE_FDIRCMD_CMD_POLL) { | 1005 | if (i >= IXGBE_FDIRCMD_CMD_POLL) { |
1006 | hw_dbg(hw ,"Flow Director previous command isn't complete, " | 1006 | hw_dbg(hw, "Flow Director previous command isn't complete, " |
1007 | "aborting table re-initialization.\n"); | 1007 | "aborting table re-initialization.\n"); |
1008 | return IXGBE_ERR_FDIR_REINIT_FAILED; | 1008 | return IXGBE_ERR_FDIR_REINIT_FAILED; |
1009 | } | 1009 | } |
@@ -1113,13 +1113,10 @@ s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc) | |||
1113 | /* Move the flexible bytes to use the ethertype - shift 6 words */ | 1113 | /* Move the flexible bytes to use the ethertype - shift 6 words */ |
1114 | fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); | 1114 | fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); |
1115 | 1115 | ||
1116 | fdirctrl |= IXGBE_FDIRCTRL_REPORT_STATUS; | ||
1117 | 1116 | ||
1118 | /* Prime the keys for hashing */ | 1117 | /* Prime the keys for hashing */ |
1119 | IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, | 1118 | IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY); |
1120 | htonl(IXGBE_ATR_BUCKET_HASH_KEY)); | 1119 | IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY); |
1121 | IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, | ||
1122 | htonl(IXGBE_ATR_SIGNATURE_HASH_KEY)); | ||
1123 | 1120 | ||
1124 | /* | 1121 | /* |
1125 | * Poll init-done after we write the register. Estimated times: | 1122 | * Poll init-done after we write the register. Estimated times: |
@@ -1209,10 +1206,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc) | |||
1209 | fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); | 1206 | fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); |
1210 | 1207 | ||
1211 | /* Prime the keys for hashing */ | 1208 | /* Prime the keys for hashing */ |
1212 | IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, | 1209 | IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY); |
1213 | htonl(IXGBE_ATR_BUCKET_HASH_KEY)); | 1210 | IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY); |
1214 | IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, | ||
1215 | htonl(IXGBE_ATR_SIGNATURE_HASH_KEY)); | ||
1216 | 1211 | ||
1217 | /* | 1212 | /* |
1218 | * Poll init-done after we write the register. Estimated times: | 1213 | * Poll init-done after we write the register. Estimated times: |
@@ -1251,8 +1246,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc) | |||
1251 | * @stream: input bitstream to compute the hash on | 1246 | * @stream: input bitstream to compute the hash on |
1252 | * @key: 32-bit hash key | 1247 | * @key: 32-bit hash key |
1253 | **/ | 1248 | **/ |
1254 | static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input, | 1249 | static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input, |
1255 | u32 key) | 1250 | u32 key) |
1256 | { | 1251 | { |
1257 | /* | 1252 | /* |
1258 | * The algorithm is as follows: | 1253 | * The algorithm is as follows: |
@@ -1272,100 +1267,68 @@ static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input, | |||
1272 | * To simplify for programming, the algorithm is implemented | 1267 | * To simplify for programming, the algorithm is implemented |
1273 | * in software this way: | 1268 | * in software this way: |
1274 | * | 1269 | * |
1275 | * Key[31:0], Stream[335:0] | 1270 | * key[31:0], hi_hash_dword[31:0], lo_hash_dword[31:0], hash[15:0] |
1271 | * | ||
1272 | * for (i = 0; i < 352; i+=32) | ||
1273 | * hi_hash_dword[31:0] ^= Stream[(i+31):i]; | ||
1274 | * | ||
1275 | * lo_hash_dword[15:0] ^= Stream[15:0]; | ||
1276 | * lo_hash_dword[15:0] ^= hi_hash_dword[31:16]; | ||
1277 | * lo_hash_dword[31:16] ^= hi_hash_dword[15:0]; | ||
1278 | * | ||
1279 | * hi_hash_dword[31:0] ^= Stream[351:320]; | ||
1276 | * | 1280 | * |
1277 | * tmp_key[11 * 32 - 1:0] = 11{Key[31:0] = key concatenated 11 times | 1281 | * if(key[0]) |
1278 | * int_key[350:0] = tmp_key[351:1] | 1282 | * hash[15:0] ^= Stream[15:0]; |
1279 | * int_stream[365:0] = Stream[14:0] | Stream[335:0] | Stream[335:321] | ||
1280 | * | 1283 | * |
1281 | * hash[15:0] = 0; | 1284 | * for (i = 0; i < 16; i++) { |
1282 | * for (i = 0; i < 351; i++) { | 1285 | * if (key[i]) |
1283 | * if (int_key[i]) | 1286 | * hash[15:0] ^= lo_hash_dword[(i+15):i]; |
1284 | * hash ^= int_stream[(i + 15):i]; | 1287 | * if (key[i + 16]) |
1288 | * hash[15:0] ^= hi_hash_dword[(i+15):i]; | ||
1285 | * } | 1289 | * } |
1290 | * | ||
1286 | */ | 1291 | */ |
1292 | __be32 common_hash_dword = 0; | ||
1293 | u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; | ||
1294 | u32 hash_result = 0; | ||
1295 | u8 i; | ||
1287 | 1296 | ||
1288 | union { | 1297 | /* record the flow_vm_vlan bits as they are a key part to the hash */ |
1289 | u64 fill[6]; | 1298 | flow_vm_vlan = ntohl(atr_input->dword_stream[0]); |
1290 | u32 key[11]; | ||
1291 | u8 key_stream[44]; | ||
1292 | } tmp_key; | ||
1293 | 1299 | ||
1294 | u8 *stream = (u8 *)atr_input; | 1300 | /* generate common hash dword */ |
1295 | u8 int_key[44]; /* upper-most bit unused */ | 1301 | for (i = 10; i; i -= 2) |
1296 | u8 hash_str[46]; /* upper-most 2 bits unused */ | 1302 | common_hash_dword ^= atr_input->dword_stream[i] ^ |
1297 | u16 hash_result = 0; | 1303 | atr_input->dword_stream[i - 1]; |
1298 | int i, j, k, h; | ||
1299 | 1304 | ||
1300 | /* | 1305 | hi_hash_dword = ntohl(common_hash_dword); |
1301 | * Initialize the fill member to prevent warnings | ||
1302 | * on some compilers | ||
1303 | */ | ||
1304 | tmp_key.fill[0] = 0; | ||
1305 | 1306 | ||
1306 | /* First load the temporary key stream */ | 1307 | /* low dword is word swapped version of common */ |
1307 | for (i = 0; i < 6; i++) { | 1308 | lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); |
1308 | u64 fillkey = ((u64)key << 32) | key; | ||
1309 | tmp_key.fill[i] = fillkey; | ||
1310 | } | ||
1311 | 1309 | ||
1312 | /* | 1310 | /* apply flow ID/VM pool/VLAN ID bits to hash words */ |
1313 | * Set the interim key for the hashing. Bit 352 is unused, so we must | 1311 | hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); |
1314 | * shift and compensate when building the key. | ||
1315 | */ | ||
1316 | 1312 | ||
1317 | int_key[0] = tmp_key.key_stream[0] >> 1; | 1313 | /* Process bits 0 and 16 */ |
1318 | for (i = 1, j = 0; i < 44; i++) { | 1314 | if (key & 0x0001) hash_result ^= lo_hash_dword; |
1319 | unsigned int this_key = tmp_key.key_stream[j] << 7; | 1315 | if (key & 0x00010000) hash_result ^= hi_hash_dword; |
1320 | j++; | ||
1321 | int_key[i] = (u8)(this_key | (tmp_key.key_stream[j] >> 1)); | ||
1322 | } | ||
1323 | 1316 | ||
1324 | /* | 1317 | /* |
1325 | * Set the interim bit string for the hashing. Bits 368 and 367 are | 1318 | * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to |
1326 | * unused, so shift and compensate when building the string. | 1319 | * delay this because bit 0 of the stream should not be processed |
1320 | * so we do not add the vlan until after bit 0 was processed | ||
1327 | */ | 1321 | */ |
1328 | hash_str[0] = (stream[40] & 0x7f) >> 1; | 1322 | lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); |
1329 | for (i = 1, j = 40; i < 46; i++) { | ||
1330 | unsigned int this_str = stream[j] << 7; | ||
1331 | j++; | ||
1332 | if (j > 41) | ||
1333 | j = 0; | ||
1334 | hash_str[i] = (u8)(this_str | (stream[j] >> 1)); | ||
1335 | } | ||
1336 | 1323 | ||
1337 | /* | 1324 | |
1338 | * Now compute the hash. i is the index into hash_str, j is into our | 1325 | /* process the remaining 30 bits in the key 2 bits at a time */ |
1339 | * key stream, k is counting the number of bits, and h interates within | 1326 | for (i = 15; i; i-- ) { |
1340 | * each byte. | 1327 | if (key & (0x0001 << i)) hash_result ^= lo_hash_dword >> i; |
1341 | */ | 1328 | if (key & (0x00010000 << i)) hash_result ^= hi_hash_dword >> i; |
1342 | for (i = 45, j = 43, k = 0; k < 351 && i >= 2 && j >= 0; i--, j--) { | ||
1343 | for (h = 0; h < 8 && k < 351; h++, k++) { | ||
1344 | if (int_key[j] & (1 << h)) { | ||
1345 | /* | ||
1346 | * Key bit is set, XOR in the current 16-bit | ||
1347 | * string. Example of processing: | ||
1348 | * h = 0, | ||
1349 | * tmp = (hash_str[i - 2] & 0 << 16) | | ||
1350 | * (hash_str[i - 1] & 0xff << 8) | | ||
1351 | * (hash_str[i] & 0xff >> 0) | ||
1352 | * So tmp = hash_str[15 + k:k], since the | ||
1353 | * i + 2 clause rolls off the 16-bit value | ||
1354 | * h = 7, | ||
1355 | * tmp = (hash_str[i - 2] & 0x7f << 9) | | ||
1356 | * (hash_str[i - 1] & 0xff << 1) | | ||
1357 | * (hash_str[i] & 0x80 >> 7) | ||
1358 | */ | ||
1359 | int tmp = (hash_str[i] >> h); | ||
1360 | tmp |= (hash_str[i - 1] << (8 - h)); | ||
1361 | tmp |= (int)(hash_str[i - 2] & ((1 << h) - 1)) | ||
1362 | << (16 - h); | ||
1363 | hash_result ^= (u16)tmp; | ||
1364 | } | ||
1365 | } | ||
1366 | } | 1329 | } |
1367 | 1330 | ||
1368 | return hash_result; | 1331 | return hash_result & IXGBE_ATR_HASH_MASK; |
1369 | } | 1332 | } |
1370 | 1333 | ||
1371 | /** | 1334 | /** |
@@ -1373,10 +1336,9 @@ static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input, | |||
1373 | * @input: input stream to modify | 1336 | * @input: input stream to modify |
1374 | * @vlan: the VLAN id to load | 1337 | * @vlan: the VLAN id to load |
1375 | **/ | 1338 | **/ |
1376 | s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan) | 1339 | s32 ixgbe_atr_set_vlan_id_82599(union ixgbe_atr_input *input, __be16 vlan) |
1377 | { | 1340 | { |
1378 | input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] = vlan >> 8; | 1341 | input->formatted.vlan_id = vlan; |
1379 | input->byte_stream[IXGBE_ATR_VLAN_OFFSET] = vlan & 0xff; | ||
1380 | 1342 | ||
1381 | return 0; | 1343 | return 0; |
1382 | } | 1344 | } |
@@ -1386,14 +1348,9 @@ s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan) | |||
1386 | * @input: input stream to modify | 1348 | * @input: input stream to modify |
1387 | * @src_addr: the IP address to load | 1349 | * @src_addr: the IP address to load |
1388 | **/ | 1350 | **/ |
1389 | s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr) | 1351 | s32 ixgbe_atr_set_src_ipv4_82599(union ixgbe_atr_input *input, __be32 src_addr) |
1390 | { | 1352 | { |
1391 | input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] = src_addr >> 24; | 1353 | input->formatted.src_ip[0] = src_addr; |
1392 | input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] = | ||
1393 | (src_addr >> 16) & 0xff; | ||
1394 | input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] = | ||
1395 | (src_addr >> 8) & 0xff; | ||
1396 | input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET] = src_addr & 0xff; | ||
1397 | 1354 | ||
1398 | return 0; | 1355 | return 0; |
1399 | } | 1356 | } |
@@ -1403,14 +1360,9 @@ s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr) | |||
1403 | * @input: input stream to modify | 1360 | * @input: input stream to modify |
1404 | * @dst_addr: the IP address to load | 1361 | * @dst_addr: the IP address to load |
1405 | **/ | 1362 | **/ |
1406 | s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr) | 1363 | s32 ixgbe_atr_set_dst_ipv4_82599(union ixgbe_atr_input *input, __be32 dst_addr) |
1407 | { | 1364 | { |
1408 | input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] = dst_addr >> 24; | 1365 | input->formatted.dst_ip[0] = dst_addr; |
1409 | input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] = | ||
1410 | (dst_addr >> 16) & 0xff; | ||
1411 | input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] = | ||
1412 | (dst_addr >> 8) & 0xff; | ||
1413 | input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET] = dst_addr & 0xff; | ||
1414 | 1366 | ||
1415 | return 0; | 1367 | return 0; |
1416 | } | 1368 | } |
@@ -1420,10 +1372,9 @@ s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr) | |||
1420 | * @input: input stream to modify | 1372 | * @input: input stream to modify |
1421 | * @src_port: the source port to load | 1373 | * @src_port: the source port to load |
1422 | **/ | 1374 | **/ |
1423 | s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port) | 1375 | s32 ixgbe_atr_set_src_port_82599(union ixgbe_atr_input *input, __be16 src_port) |
1424 | { | 1376 | { |
1425 | input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1] = src_port >> 8; | 1377 | input->formatted.src_port = src_port; |
1426 | input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] = src_port & 0xff; | ||
1427 | 1378 | ||
1428 | return 0; | 1379 | return 0; |
1429 | } | 1380 | } |
@@ -1433,10 +1384,9 @@ s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port) | |||
1433 | * @input: input stream to modify | 1384 | * @input: input stream to modify |
1434 | * @dst_port: the destination port to load | 1385 | * @dst_port: the destination port to load |
1435 | **/ | 1386 | **/ |
1436 | s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port) | 1387 | s32 ixgbe_atr_set_dst_port_82599(union ixgbe_atr_input *input, __be16 dst_port) |
1437 | { | 1388 | { |
1438 | input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1] = dst_port >> 8; | 1389 | input->formatted.dst_port = dst_port; |
1439 | input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] = dst_port & 0xff; | ||
1440 | 1390 | ||
1441 | return 0; | 1391 | return 0; |
1442 | } | 1392 | } |
@@ -1446,10 +1396,10 @@ s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port) | |||
1446 | * @input: input stream to modify | 1396 | * @input: input stream to modify |
1447 | * @flex_bytes: the flexible bytes to load | 1397 | * @flex_bytes: the flexible bytes to load |
1448 | **/ | 1398 | **/ |
1449 | s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte) | 1399 | s32 ixgbe_atr_set_flex_byte_82599(union ixgbe_atr_input *input, |
1400 | __be16 flex_bytes) | ||
1450 | { | 1401 | { |
1451 | input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] = flex_byte >> 8; | 1402 | input->formatted.flex_bytes = flex_bytes; |
1452 | input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET] = flex_byte & 0xff; | ||
1453 | 1403 | ||
1454 | return 0; | 1404 | return 0; |
1455 | } | 1405 | } |
@@ -1459,9 +1409,9 @@ s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte) | |||
1459 | * @input: input stream to modify | 1409 | * @input: input stream to modify |
1460 | * @l4type: the layer 4 type value to load | 1410 | * @l4type: the layer 4 type value to load |
1461 | **/ | 1411 | **/ |
1462 | s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type) | 1412 | s32 ixgbe_atr_set_l4type_82599(union ixgbe_atr_input *input, u8 l4type) |
1463 | { | 1413 | { |
1464 | input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET] = l4type; | 1414 | input->formatted.flow_type = l4type; |
1465 | 1415 | ||
1466 | return 0; | 1416 | return 0; |
1467 | } | 1417 | } |
@@ -1471,10 +1421,9 @@ s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type) | |||
1471 | * @input: input stream to search | 1421 | * @input: input stream to search |
1472 | * @vlan: the VLAN id to load | 1422 | * @vlan: the VLAN id to load |
1473 | **/ | 1423 | **/ |
1474 | static s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan) | 1424 | static s32 ixgbe_atr_get_vlan_id_82599(union ixgbe_atr_input *input, __be16 *vlan) |
1475 | { | 1425 | { |
1476 | *vlan = input->byte_stream[IXGBE_ATR_VLAN_OFFSET]; | 1426 | *vlan = input->formatted.vlan_id; |
1477 | *vlan |= input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] << 8; | ||
1478 | 1427 | ||
1479 | return 0; | 1428 | return 0; |
1480 | } | 1429 | } |
@@ -1484,13 +1433,10 @@ static s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan) | |||
1484 | * @input: input stream to search | 1433 | * @input: input stream to search |
1485 | * @src_addr: the IP address to load | 1434 | * @src_addr: the IP address to load |
1486 | **/ | 1435 | **/ |
1487 | static s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input, | 1436 | static s32 ixgbe_atr_get_src_ipv4_82599(union ixgbe_atr_input *input, |
1488 | u32 *src_addr) | 1437 | __be32 *src_addr) |
1489 | { | 1438 | { |
1490 | *src_addr = input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET]; | 1439 | *src_addr = input->formatted.src_ip[0]; |
1491 | *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] << 8; | ||
1492 | *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] << 16; | ||
1493 | *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] << 24; | ||
1494 | 1440 | ||
1495 | return 0; | 1441 | return 0; |
1496 | } | 1442 | } |
@@ -1500,13 +1446,10 @@ static s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input, | |||
1500 | * @input: input stream to search | 1446 | * @input: input stream to search |
1501 | * @dst_addr: the IP address to load | 1447 | * @dst_addr: the IP address to load |
1502 | **/ | 1448 | **/ |
1503 | static s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input, | 1449 | static s32 ixgbe_atr_get_dst_ipv4_82599(union ixgbe_atr_input *input, |
1504 | u32 *dst_addr) | 1450 | __be32 *dst_addr) |
1505 | { | 1451 | { |
1506 | *dst_addr = input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET]; | 1452 | *dst_addr = input->formatted.dst_ip[0]; |
1507 | *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] << 8; | ||
1508 | *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] << 16; | ||
1509 | *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] << 24; | ||
1510 | 1453 | ||
1511 | return 0; | 1454 | return 0; |
1512 | } | 1455 | } |
@@ -1519,29 +1462,14 @@ static s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input, | |||
1519 | * @src_addr_3: the third 4 bytes of the IP address to load | 1462 | * @src_addr_3: the third 4 bytes of the IP address to load |
1520 | * @src_addr_4: the fourth 4 bytes of the IP address to load | 1463 | * @src_addr_4: the fourth 4 bytes of the IP address to load |
1521 | **/ | 1464 | **/ |
1522 | static s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input, | 1465 | static s32 ixgbe_atr_get_src_ipv6_82599(union ixgbe_atr_input *input, |
1523 | u32 *src_addr_1, u32 *src_addr_2, | 1466 | __be32 *src_addr_0, __be32 *src_addr_1, |
1524 | u32 *src_addr_3, u32 *src_addr_4) | 1467 | __be32 *src_addr_2, __be32 *src_addr_3) |
1525 | { | 1468 | { |
1526 | *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 12]; | 1469 | *src_addr_0 = input->formatted.src_ip[0]; |
1527 | *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 13] << 8; | 1470 | *src_addr_1 = input->formatted.src_ip[1]; |
1528 | *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 14] << 16; | 1471 | *src_addr_2 = input->formatted.src_ip[2]; |
1529 | *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 15] << 24; | 1472 | *src_addr_3 = input->formatted.src_ip[3]; |
1530 | |||
1531 | *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 8]; | ||
1532 | *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 9] << 8; | ||
1533 | *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 10] << 16; | ||
1534 | *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 11] << 24; | ||
1535 | |||
1536 | *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 4]; | ||
1537 | *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 5] << 8; | ||
1538 | *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 6] << 16; | ||
1539 | *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 7] << 24; | ||
1540 | |||
1541 | *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET]; | ||
1542 | *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 1] << 8; | ||
1543 | *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 2] << 16; | ||
1544 | *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 3] << 24; | ||
1545 | 1473 | ||
1546 | return 0; | 1474 | return 0; |
1547 | } | 1475 | } |
@@ -1556,11 +1484,10 @@ static s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input, | |||
1556 | * endianness when retrieving the data. This can be confusing since the | 1484 | * endianness when retrieving the data. This can be confusing since the |
1557 | * internal hash engine expects it to be big-endian. | 1485 | * internal hash engine expects it to be big-endian. |
1558 | **/ | 1486 | **/ |
1559 | static s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input, | 1487 | static s32 ixgbe_atr_get_src_port_82599(union ixgbe_atr_input *input, |
1560 | u16 *src_port) | 1488 | __be16 *src_port) |
1561 | { | 1489 | { |
1562 | *src_port = input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] << 8; | 1490 | *src_port = input->formatted.src_port; |
1563 | *src_port |= input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1]; | ||
1564 | 1491 | ||
1565 | return 0; | 1492 | return 0; |
1566 | } | 1493 | } |
@@ -1575,11 +1502,10 @@ static s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input, | |||
1575 | * endianness when retrieving the data. This can be confusing since the | 1502 | * endianness when retrieving the data. This can be confusing since the |
1576 | * internal hash engine expects it to be big-endian. | 1503 | * internal hash engine expects it to be big-endian. |
1577 | **/ | 1504 | **/ |
1578 | static s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input, | 1505 | static s32 ixgbe_atr_get_dst_port_82599(union ixgbe_atr_input *input, |
1579 | u16 *dst_port) | 1506 | __be16 *dst_port) |
1580 | { | 1507 | { |
1581 | *dst_port = input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] << 8; | 1508 | *dst_port = input->formatted.dst_port; |
1582 | *dst_port |= input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1]; | ||
1583 | 1509 | ||
1584 | return 0; | 1510 | return 0; |
1585 | } | 1511 | } |
@@ -1589,11 +1515,10 @@ static s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input, | |||
1589 | * @input: input stream to modify | 1515 | * @input: input stream to modify |
1590 | * @flex_bytes: the flexible bytes to load | 1516 | * @flex_bytes: the flexible bytes to load |
1591 | **/ | 1517 | **/ |
1592 | static s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input, | 1518 | static s32 ixgbe_atr_get_flex_byte_82599(union ixgbe_atr_input *input, |
1593 | u16 *flex_byte) | 1519 | __be16 *flex_bytes) |
1594 | { | 1520 | { |
1595 | *flex_byte = input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET]; | 1521 | *flex_bytes = input->formatted.flex_bytes; |
1596 | *flex_byte |= input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] << 8; | ||
1597 | 1522 | ||
1598 | return 0; | 1523 | return 0; |
1599 | } | 1524 | } |
@@ -1603,10 +1528,10 @@ static s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input, | |||
1603 | * @input: input stream to modify | 1528 | * @input: input stream to modify |
1604 | * @l4type: the layer 4 type value to load | 1529 | * @l4type: the layer 4 type value to load |
1605 | **/ | 1530 | **/ |
1606 | static s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input, | 1531 | static s32 ixgbe_atr_get_l4type_82599(union ixgbe_atr_input *input, |
1607 | u8 *l4type) | 1532 | u8 *l4type) |
1608 | { | 1533 | { |
1609 | *l4type = input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET]; | 1534 | *l4type = input->formatted.flow_type; |
1610 | 1535 | ||
1611 | return 0; | 1536 | return 0; |
1612 | } | 1537 | } |
@@ -1618,57 +1543,49 @@ static s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input, | |||
1618 | * @queue: queue index to direct traffic to | 1543 | * @queue: queue index to direct traffic to |
1619 | **/ | 1544 | **/ |
1620 | s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, | 1545 | s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, |
1621 | struct ixgbe_atr_input *input, | 1546 | union ixgbe_atr_input *input, |
1622 | u8 queue) | 1547 | u8 queue) |
1623 | { | 1548 | { |
1624 | u64 fdirhashcmd; | 1549 | u64 fdirhashcmd; |
1625 | u64 fdircmd; | 1550 | u32 fdircmd; |
1626 | u32 fdirhash; | 1551 | u32 bucket_hash, sig_hash; |
1627 | u16 bucket_hash, sig_hash; | ||
1628 | u8 l4type; | ||
1629 | |||
1630 | bucket_hash = ixgbe_atr_compute_hash_82599(input, | ||
1631 | IXGBE_ATR_BUCKET_HASH_KEY); | ||
1632 | |||
1633 | /* bucket_hash is only 15 bits */ | ||
1634 | bucket_hash &= IXGBE_ATR_HASH_MASK; | ||
1635 | |||
1636 | sig_hash = ixgbe_atr_compute_hash_82599(input, | ||
1637 | IXGBE_ATR_SIGNATURE_HASH_KEY); | ||
1638 | |||
1639 | /* Get the l4type in order to program FDIRCMD properly */ | ||
1640 | /* lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 */ | ||
1641 | ixgbe_atr_get_l4type_82599(input, &l4type); | ||
1642 | 1552 | ||
1643 | /* | 1553 | /* |
1644 | * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits | 1554 | * Get the flow_type in order to program FDIRCMD properly |
1645 | * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. | 1555 | * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 |
1646 | */ | 1556 | */ |
1647 | fdirhash = sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT | bucket_hash; | 1557 | switch (input->formatted.flow_type) { |
1648 | 1558 | case IXGBE_ATR_FLOW_TYPE_TCPV4: | |
1649 | fdircmd = (IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | | 1559 | case IXGBE_ATR_FLOW_TYPE_UDPV4: |
1650 | IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN); | 1560 | case IXGBE_ATR_FLOW_TYPE_SCTPV4: |
1651 | 1561 | case IXGBE_ATR_FLOW_TYPE_TCPV6: | |
1652 | switch (l4type & IXGBE_ATR_L4TYPE_MASK) { | 1562 | case IXGBE_ATR_FLOW_TYPE_UDPV6: |
1653 | case IXGBE_ATR_L4TYPE_TCP: | 1563 | case IXGBE_ATR_FLOW_TYPE_SCTPV6: |
1654 | fdircmd |= IXGBE_FDIRCMD_L4TYPE_TCP; | ||
1655 | break; | ||
1656 | case IXGBE_ATR_L4TYPE_UDP: | ||
1657 | fdircmd |= IXGBE_FDIRCMD_L4TYPE_UDP; | ||
1658 | break; | ||
1659 | case IXGBE_ATR_L4TYPE_SCTP: | ||
1660 | fdircmd |= IXGBE_FDIRCMD_L4TYPE_SCTP; | ||
1661 | break; | 1564 | break; |
1662 | default: | 1565 | default: |
1663 | hw_dbg(hw, "Error on l4type input\n"); | 1566 | hw_dbg(hw, " Error on flow type input\n"); |
1664 | return IXGBE_ERR_CONFIG; | 1567 | return IXGBE_ERR_CONFIG; |
1665 | } | 1568 | } |
1666 | 1569 | ||
1667 | if (l4type & IXGBE_ATR_L4TYPE_IPV6_MASK) | 1570 | /* configure FDIRCMD register */ |
1668 | fdircmd |= IXGBE_FDIRCMD_IPV6; | 1571 | fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | |
1572 | IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; | ||
1573 | fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; | ||
1574 | fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; | ||
1669 | 1575 | ||
1670 | fdircmd |= ((u64)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT); | 1576 | /* |
1671 | fdirhashcmd = ((fdircmd << 32) | fdirhash); | 1577 | * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits |
1578 | * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. | ||
1579 | */ | ||
1580 | fdirhashcmd = (u64)fdircmd << 32; | ||
1581 | |||
1582 | sig_hash = ixgbe_atr_compute_hash_82599(input, | ||
1583 | IXGBE_ATR_SIGNATURE_HASH_KEY); | ||
1584 | fdirhashcmd |= sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; | ||
1585 | |||
1586 | bucket_hash = ixgbe_atr_compute_hash_82599(input, | ||
1587 | IXGBE_ATR_BUCKET_HASH_KEY); | ||
1588 | fdirhashcmd |= bucket_hash; | ||
1672 | 1589 | ||
1673 | IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); | 1590 | IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); |
1674 | 1591 | ||
@@ -1687,7 +1604,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, | |||
1687 | * hardware writes must be protected from one another. | 1604 | * hardware writes must be protected from one another. |
1688 | **/ | 1605 | **/ |
1689 | s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, | 1606 | s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, |
1690 | struct ixgbe_atr_input *input, | 1607 | union ixgbe_atr_input *input, |
1691 | struct ixgbe_atr_input_masks *input_masks, | 1608 | struct ixgbe_atr_input_masks *input_masks, |
1692 | u16 soft_id, u8 queue) | 1609 | u16 soft_id, u8 queue) |
1693 | { | 1610 | { |