aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ixgbe/ixgbe_82599.c
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2011-01-06 09:29:57 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-10 02:44:11 -0500
commit905e4a4163c4e807daf1f1f6b8f958e762a834a8 (patch)
tree4f2a71c7dc5255a1dfb9f4063b94a920a3ada135 /drivers/net/ixgbe/ixgbe_82599.c
parent2d39d576fad0fd4bb79a0de26fca50a4be1ffdc1 (diff)
ixgbe: cleanup flow director hash computation to improve performance
This change cleans up the layout of the flow director data, and the algorithm used to calculate the hash resulting in a 35x / 3500% performance increase versus the old flow director hash computation. The overall effect is only a 1% increase in transactions per second though due to the fact that only 1 packet in 20 are actually hashed upon. TCP_RR before: Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 60.00 23059.27 16384 87380 TCP_RR after: Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 60.00 23239.98 16384 87380 Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Stephen Ko <stephen.s.ko@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ixgbe/ixgbe_82599.c')
-rw-r--r--drivers/net/ixgbe/ixgbe_82599.c335
1 files changed, 126 insertions, 209 deletions
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index bfd3c227cd4a..40aa3c29dc1d 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1003,7 +1003,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
1003 udelay(10); 1003 udelay(10);
1004 } 1004 }
1005 if (i >= IXGBE_FDIRCMD_CMD_POLL) { 1005 if (i >= IXGBE_FDIRCMD_CMD_POLL) {
1006 hw_dbg(hw ,"Flow Director previous command isn't complete, " 1006 hw_dbg(hw, "Flow Director previous command isn't complete, "
1007 "aborting table re-initialization.\n"); 1007 "aborting table re-initialization.\n");
1008 return IXGBE_ERR_FDIR_REINIT_FAILED; 1008 return IXGBE_ERR_FDIR_REINIT_FAILED;
1009 } 1009 }
@@ -1113,13 +1113,10 @@ s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc)
1113 /* Move the flexible bytes to use the ethertype - shift 6 words */ 1113 /* Move the flexible bytes to use the ethertype - shift 6 words */
1114 fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); 1114 fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
1115 1115
1116 fdirctrl |= IXGBE_FDIRCTRL_REPORT_STATUS;
1117 1116
1118 /* Prime the keys for hashing */ 1117 /* Prime the keys for hashing */
1119 IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, 1118 IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
1120 htonl(IXGBE_ATR_BUCKET_HASH_KEY)); 1119 IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
1121 IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY,
1122 htonl(IXGBE_ATR_SIGNATURE_HASH_KEY));
1123 1120
1124 /* 1121 /*
1125 * Poll init-done after we write the register. Estimated times: 1122 * Poll init-done after we write the register. Estimated times:
@@ -1209,10 +1206,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
1209 fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT); 1206 fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
1210 1207
1211 /* Prime the keys for hashing */ 1208 /* Prime the keys for hashing */
1212 IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, 1209 IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
1213 htonl(IXGBE_ATR_BUCKET_HASH_KEY)); 1210 IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
1214 IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY,
1215 htonl(IXGBE_ATR_SIGNATURE_HASH_KEY));
1216 1211
1217 /* 1212 /*
1218 * Poll init-done after we write the register. Estimated times: 1213 * Poll init-done after we write the register. Estimated times:
@@ -1251,8 +1246,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
1251 * @stream: input bitstream to compute the hash on 1246 * @stream: input bitstream to compute the hash on
1252 * @key: 32-bit hash key 1247 * @key: 32-bit hash key
1253 **/ 1248 **/
1254static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input, 1249static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input,
1255 u32 key) 1250 u32 key)
1256{ 1251{
1257 /* 1252 /*
1258 * The algorithm is as follows: 1253 * The algorithm is as follows:
@@ -1272,100 +1267,68 @@ static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input,
1272 * To simplify for programming, the algorithm is implemented 1267 * To simplify for programming, the algorithm is implemented
1273 * in software this way: 1268 * in software this way:
1274 * 1269 *
1275 * Key[31:0], Stream[335:0] 1270 * key[31:0], hi_hash_dword[31:0], lo_hash_dword[31:0], hash[15:0]
1271 *
1272 * for (i = 0; i < 352; i+=32)
1273 * hi_hash_dword[31:0] ^= Stream[(i+31):i];
1274 *
1275 * lo_hash_dword[15:0] ^= Stream[15:0];
1276 * lo_hash_dword[15:0] ^= hi_hash_dword[31:16];
1277 * lo_hash_dword[31:16] ^= hi_hash_dword[15:0];
1278 *
1279 * hi_hash_dword[31:0] ^= Stream[351:320];
1276 * 1280 *
1277 * tmp_key[11 * 32 - 1:0] = 11{Key[31:0] = key concatenated 11 times 1281 * if(key[0])
1278 * int_key[350:0] = tmp_key[351:1] 1282 * hash[15:0] ^= Stream[15:0];
1279 * int_stream[365:0] = Stream[14:0] | Stream[335:0] | Stream[335:321]
1280 * 1283 *
1281 * hash[15:0] = 0; 1284 * for (i = 0; i < 16; i++) {
1282 * for (i = 0; i < 351; i++) { 1285 * if (key[i])
1283 * if (int_key[i]) 1286 * hash[15:0] ^= lo_hash_dword[(i+15):i];
1284 * hash ^= int_stream[(i + 15):i]; 1287 * if (key[i + 16])
1288 * hash[15:0] ^= hi_hash_dword[(i+15):i];
1285 * } 1289 * }
1290 *
1286 */ 1291 */
1292 __be32 common_hash_dword = 0;
1293 u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
1294 u32 hash_result = 0;
1295 u8 i;
1287 1296
1288 union { 1297 /* record the flow_vm_vlan bits as they are a key part to the hash */
1289 u64 fill[6]; 1298 flow_vm_vlan = ntohl(atr_input->dword_stream[0]);
1290 u32 key[11];
1291 u8 key_stream[44];
1292 } tmp_key;
1293 1299
1294 u8 *stream = (u8 *)atr_input; 1300 /* generate common hash dword */
1295 u8 int_key[44]; /* upper-most bit unused */ 1301 for (i = 10; i; i -= 2)
1296 u8 hash_str[46]; /* upper-most 2 bits unused */ 1302 common_hash_dword ^= atr_input->dword_stream[i] ^
1297 u16 hash_result = 0; 1303 atr_input->dword_stream[i - 1];
1298 int i, j, k, h;
1299 1304
1300 /* 1305 hi_hash_dword = ntohl(common_hash_dword);
1301 * Initialize the fill member to prevent warnings
1302 * on some compilers
1303 */
1304 tmp_key.fill[0] = 0;
1305 1306
1306 /* First load the temporary key stream */ 1307 /* low dword is word swapped version of common */
1307 for (i = 0; i < 6; i++) { 1308 lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
1308 u64 fillkey = ((u64)key << 32) | key;
1309 tmp_key.fill[i] = fillkey;
1310 }
1311 1309
1312 /* 1310 /* apply flow ID/VM pool/VLAN ID bits to hash words */
1313 * Set the interim key for the hashing. Bit 352 is unused, so we must 1311 hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
1314 * shift and compensate when building the key.
1315 */
1316 1312
1317 int_key[0] = tmp_key.key_stream[0] >> 1; 1313 /* Process bits 0 and 16 */
1318 for (i = 1, j = 0; i < 44; i++) { 1314 if (key & 0x0001) hash_result ^= lo_hash_dword;
1319 unsigned int this_key = tmp_key.key_stream[j] << 7; 1315 if (key & 0x00010000) hash_result ^= hi_hash_dword;
1320 j++;
1321 int_key[i] = (u8)(this_key | (tmp_key.key_stream[j] >> 1));
1322 }
1323 1316
1324 /* 1317 /*
1325 * Set the interim bit string for the hashing. Bits 368 and 367 are 1318 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
1326 * unused, so shift and compensate when building the string. 1319 * delay this because bit 0 of the stream should not be processed
1320 * so we do not add the vlan until after bit 0 was processed
1327 */ 1321 */
1328 hash_str[0] = (stream[40] & 0x7f) >> 1; 1322 lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
1329 for (i = 1, j = 40; i < 46; i++) {
1330 unsigned int this_str = stream[j] << 7;
1331 j++;
1332 if (j > 41)
1333 j = 0;
1334 hash_str[i] = (u8)(this_str | (stream[j] >> 1));
1335 }
1336 1323
1337 /* 1324
1338 * Now compute the hash. i is the index into hash_str, j is into our 1325 /* process the remaining 30 bits in the key 2 bits at a time */
1339 * key stream, k is counting the number of bits, and h interates within 1326 for (i = 15; i; i-- ) {
1340 * each byte. 1327 if (key & (0x0001 << i)) hash_result ^= lo_hash_dword >> i;
1341 */ 1328 if (key & (0x00010000 << i)) hash_result ^= hi_hash_dword >> i;
1342 for (i = 45, j = 43, k = 0; k < 351 && i >= 2 && j >= 0; i--, j--) {
1343 for (h = 0; h < 8 && k < 351; h++, k++) {
1344 if (int_key[j] & (1 << h)) {
1345 /*
1346 * Key bit is set, XOR in the current 16-bit
1347 * string. Example of processing:
1348 * h = 0,
1349 * tmp = (hash_str[i - 2] & 0 << 16) |
1350 * (hash_str[i - 1] & 0xff << 8) |
1351 * (hash_str[i] & 0xff >> 0)
1352 * So tmp = hash_str[15 + k:k], since the
1353 * i + 2 clause rolls off the 16-bit value
1354 * h = 7,
1355 * tmp = (hash_str[i - 2] & 0x7f << 9) |
1356 * (hash_str[i - 1] & 0xff << 1) |
1357 * (hash_str[i] & 0x80 >> 7)
1358 */
1359 int tmp = (hash_str[i] >> h);
1360 tmp |= (hash_str[i - 1] << (8 - h));
1361 tmp |= (int)(hash_str[i - 2] & ((1 << h) - 1))
1362 << (16 - h);
1363 hash_result ^= (u16)tmp;
1364 }
1365 }
1366 } 1329 }
1367 1330
1368 return hash_result; 1331 return hash_result & IXGBE_ATR_HASH_MASK;
1369} 1332}
1370 1333
1371/** 1334/**
@@ -1373,10 +1336,9 @@ static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input,
1373 * @input: input stream to modify 1336 * @input: input stream to modify
1374 * @vlan: the VLAN id to load 1337 * @vlan: the VLAN id to load
1375 **/ 1338 **/
1376s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan) 1339s32 ixgbe_atr_set_vlan_id_82599(union ixgbe_atr_input *input, __be16 vlan)
1377{ 1340{
1378 input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] = vlan >> 8; 1341 input->formatted.vlan_id = vlan;
1379 input->byte_stream[IXGBE_ATR_VLAN_OFFSET] = vlan & 0xff;
1380 1342
1381 return 0; 1343 return 0;
1382} 1344}
@@ -1386,14 +1348,9 @@ s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan)
1386 * @input: input stream to modify 1348 * @input: input stream to modify
1387 * @src_addr: the IP address to load 1349 * @src_addr: the IP address to load
1388 **/ 1350 **/
1389s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr) 1351s32 ixgbe_atr_set_src_ipv4_82599(union ixgbe_atr_input *input, __be32 src_addr)
1390{ 1352{
1391 input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] = src_addr >> 24; 1353 input->formatted.src_ip[0] = src_addr;
1392 input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] =
1393 (src_addr >> 16) & 0xff;
1394 input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] =
1395 (src_addr >> 8) & 0xff;
1396 input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET] = src_addr & 0xff;
1397 1354
1398 return 0; 1355 return 0;
1399} 1356}
@@ -1403,14 +1360,9 @@ s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr)
1403 * @input: input stream to modify 1360 * @input: input stream to modify
1404 * @dst_addr: the IP address to load 1361 * @dst_addr: the IP address to load
1405 **/ 1362 **/
1406s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr) 1363s32 ixgbe_atr_set_dst_ipv4_82599(union ixgbe_atr_input *input, __be32 dst_addr)
1407{ 1364{
1408 input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] = dst_addr >> 24; 1365 input->formatted.dst_ip[0] = dst_addr;
1409 input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] =
1410 (dst_addr >> 16) & 0xff;
1411 input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] =
1412 (dst_addr >> 8) & 0xff;
1413 input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET] = dst_addr & 0xff;
1414 1366
1415 return 0; 1367 return 0;
1416} 1368}
@@ -1420,10 +1372,9 @@ s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr)
1420 * @input: input stream to modify 1372 * @input: input stream to modify
1421 * @src_port: the source port to load 1373 * @src_port: the source port to load
1422 **/ 1374 **/
1423s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port) 1375s32 ixgbe_atr_set_src_port_82599(union ixgbe_atr_input *input, __be16 src_port)
1424{ 1376{
1425 input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1] = src_port >> 8; 1377 input->formatted.src_port = src_port;
1426 input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] = src_port & 0xff;
1427 1378
1428 return 0; 1379 return 0;
1429} 1380}
@@ -1433,10 +1384,9 @@ s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port)
1433 * @input: input stream to modify 1384 * @input: input stream to modify
1434 * @dst_port: the destination port to load 1385 * @dst_port: the destination port to load
1435 **/ 1386 **/
1436s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port) 1387s32 ixgbe_atr_set_dst_port_82599(union ixgbe_atr_input *input, __be16 dst_port)
1437{ 1388{
1438 input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1] = dst_port >> 8; 1389 input->formatted.dst_port = dst_port;
1439 input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] = dst_port & 0xff;
1440 1390
1441 return 0; 1391 return 0;
1442} 1392}
@@ -1446,10 +1396,10 @@ s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port)
1446 * @input: input stream to modify 1396 * @input: input stream to modify
1447 * @flex_bytes: the flexible bytes to load 1397 * @flex_bytes: the flexible bytes to load
1448 **/ 1398 **/
1449s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte) 1399s32 ixgbe_atr_set_flex_byte_82599(union ixgbe_atr_input *input,
1400 __be16 flex_bytes)
1450{ 1401{
1451 input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] = flex_byte >> 8; 1402 input->formatted.flex_bytes = flex_bytes;
1452 input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET] = flex_byte & 0xff;
1453 1403
1454 return 0; 1404 return 0;
1455} 1405}
@@ -1459,9 +1409,9 @@ s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte)
1459 * @input: input stream to modify 1409 * @input: input stream to modify
1460 * @l4type: the layer 4 type value to load 1410 * @l4type: the layer 4 type value to load
1461 **/ 1411 **/
1462s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type) 1412s32 ixgbe_atr_set_l4type_82599(union ixgbe_atr_input *input, u8 l4type)
1463{ 1413{
1464 input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET] = l4type; 1414 input->formatted.flow_type = l4type;
1465 1415
1466 return 0; 1416 return 0;
1467} 1417}
@@ -1471,10 +1421,9 @@ s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type)
1471 * @input: input stream to search 1421 * @input: input stream to search
1472 * @vlan: the VLAN id to load 1422 * @vlan: the VLAN id to load
1473 **/ 1423 **/
1474static s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan) 1424static s32 ixgbe_atr_get_vlan_id_82599(union ixgbe_atr_input *input, __be16 *vlan)
1475{ 1425{
1476 *vlan = input->byte_stream[IXGBE_ATR_VLAN_OFFSET]; 1426 *vlan = input->formatted.vlan_id;
1477 *vlan |= input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] << 8;
1478 1427
1479 return 0; 1428 return 0;
1480} 1429}
@@ -1484,13 +1433,10 @@ static s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan)
1484 * @input: input stream to search 1433 * @input: input stream to search
1485 * @src_addr: the IP address to load 1434 * @src_addr: the IP address to load
1486 **/ 1435 **/
1487static s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input, 1436static s32 ixgbe_atr_get_src_ipv4_82599(union ixgbe_atr_input *input,
1488 u32 *src_addr) 1437 __be32 *src_addr)
1489{ 1438{
1490 *src_addr = input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET]; 1439 *src_addr = input->formatted.src_ip[0];
1491 *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] << 8;
1492 *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] << 16;
1493 *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] << 24;
1494 1440
1495 return 0; 1441 return 0;
1496} 1442}
@@ -1500,13 +1446,10 @@ static s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input,
1500 * @input: input stream to search 1446 * @input: input stream to search
1501 * @dst_addr: the IP address to load 1447 * @dst_addr: the IP address to load
1502 **/ 1448 **/
1503static s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input, 1449static s32 ixgbe_atr_get_dst_ipv4_82599(union ixgbe_atr_input *input,
1504 u32 *dst_addr) 1450 __be32 *dst_addr)
1505{ 1451{
1506 *dst_addr = input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET]; 1452 *dst_addr = input->formatted.dst_ip[0];
1507 *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] << 8;
1508 *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] << 16;
1509 *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] << 24;
1510 1453
1511 return 0; 1454 return 0;
1512} 1455}
@@ -1519,29 +1462,14 @@ static s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input,
1519 * @src_addr_3: the third 4 bytes of the IP address to load 1462 * @src_addr_3: the third 4 bytes of the IP address to load
1520 * @src_addr_4: the fourth 4 bytes of the IP address to load 1463 * @src_addr_4: the fourth 4 bytes of the IP address to load
1521 **/ 1464 **/
1522static s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input, 1465static s32 ixgbe_atr_get_src_ipv6_82599(union ixgbe_atr_input *input,
1523 u32 *src_addr_1, u32 *src_addr_2, 1466 __be32 *src_addr_0, __be32 *src_addr_1,
1524 u32 *src_addr_3, u32 *src_addr_4) 1467 __be32 *src_addr_2, __be32 *src_addr_3)
1525{ 1468{
1526 *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 12]; 1469 *src_addr_0 = input->formatted.src_ip[0];
1527 *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 13] << 8; 1470 *src_addr_1 = input->formatted.src_ip[1];
1528 *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 14] << 16; 1471 *src_addr_2 = input->formatted.src_ip[2];
1529 *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 15] << 24; 1472 *src_addr_3 = input->formatted.src_ip[3];
1530
1531 *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 8];
1532 *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 9] << 8;
1533 *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 10] << 16;
1534 *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 11] << 24;
1535
1536 *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 4];
1537 *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 5] << 8;
1538 *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 6] << 16;
1539 *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 7] << 24;
1540
1541 *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET];
1542 *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 1] << 8;
1543 *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 2] << 16;
1544 *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 3] << 24;
1545 1473
1546 return 0; 1474 return 0;
1547} 1475}
@@ -1556,11 +1484,10 @@ static s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input,
1556 * endianness when retrieving the data. This can be confusing since the 1484 * endianness when retrieving the data. This can be confusing since the
1557 * internal hash engine expects it to be big-endian. 1485 * internal hash engine expects it to be big-endian.
1558 **/ 1486 **/
1559static s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input, 1487static s32 ixgbe_atr_get_src_port_82599(union ixgbe_atr_input *input,
1560 u16 *src_port) 1488 __be16 *src_port)
1561{ 1489{
1562 *src_port = input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] << 8; 1490 *src_port = input->formatted.src_port;
1563 *src_port |= input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1];
1564 1491
1565 return 0; 1492 return 0;
1566} 1493}
@@ -1575,11 +1502,10 @@ static s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input,
1575 * endianness when retrieving the data. This can be confusing since the 1502 * endianness when retrieving the data. This can be confusing since the
1576 * internal hash engine expects it to be big-endian. 1503 * internal hash engine expects it to be big-endian.
1577 **/ 1504 **/
1578static s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input, 1505static s32 ixgbe_atr_get_dst_port_82599(union ixgbe_atr_input *input,
1579 u16 *dst_port) 1506 __be16 *dst_port)
1580{ 1507{
1581 *dst_port = input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] << 8; 1508 *dst_port = input->formatted.dst_port;
1582 *dst_port |= input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1];
1583 1509
1584 return 0; 1510 return 0;
1585} 1511}
@@ -1589,11 +1515,10 @@ static s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input,
1589 * @input: input stream to modify 1515 * @input: input stream to modify
1590 * @flex_bytes: the flexible bytes to load 1516 * @flex_bytes: the flexible bytes to load
1591 **/ 1517 **/
1592static s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input, 1518static s32 ixgbe_atr_get_flex_byte_82599(union ixgbe_atr_input *input,
1593 u16 *flex_byte) 1519 __be16 *flex_bytes)
1594{ 1520{
1595 *flex_byte = input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET]; 1521 *flex_bytes = input->formatted.flex_bytes;
1596 *flex_byte |= input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] << 8;
1597 1522
1598 return 0; 1523 return 0;
1599} 1524}
@@ -1603,10 +1528,10 @@ static s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input,
1603 * @input: input stream to modify 1528 * @input: input stream to modify
1604 * @l4type: the layer 4 type value to load 1529 * @l4type: the layer 4 type value to load
1605 **/ 1530 **/
1606static s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input, 1531static s32 ixgbe_atr_get_l4type_82599(union ixgbe_atr_input *input,
1607 u8 *l4type) 1532 u8 *l4type)
1608{ 1533{
1609 *l4type = input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET]; 1534 *l4type = input->formatted.flow_type;
1610 1535
1611 return 0; 1536 return 0;
1612} 1537}
@@ -1618,57 +1543,49 @@ static s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input,
1618 * @queue: queue index to direct traffic to 1543 * @queue: queue index to direct traffic to
1619 **/ 1544 **/
1620s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, 1545s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
1621 struct ixgbe_atr_input *input, 1546 union ixgbe_atr_input *input,
1622 u8 queue) 1547 u8 queue)
1623{ 1548{
1624 u64 fdirhashcmd; 1549 u64 fdirhashcmd;
1625 u64 fdircmd; 1550 u32 fdircmd;
1626 u32 fdirhash; 1551 u32 bucket_hash, sig_hash;
1627 u16 bucket_hash, sig_hash;
1628 u8 l4type;
1629
1630 bucket_hash = ixgbe_atr_compute_hash_82599(input,
1631 IXGBE_ATR_BUCKET_HASH_KEY);
1632
1633 /* bucket_hash is only 15 bits */
1634 bucket_hash &= IXGBE_ATR_HASH_MASK;
1635
1636 sig_hash = ixgbe_atr_compute_hash_82599(input,
1637 IXGBE_ATR_SIGNATURE_HASH_KEY);
1638
1639 /* Get the l4type in order to program FDIRCMD properly */
1640 /* lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 */
1641 ixgbe_atr_get_l4type_82599(input, &l4type);
1642 1552
1643 /* 1553 /*
1644 * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits 1554 * Get the flow_type in order to program FDIRCMD properly
1645 * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. 1555 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
1646 */ 1556 */
1647 fdirhash = sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT | bucket_hash; 1557 switch (input->formatted.flow_type) {
1648 1558 case IXGBE_ATR_FLOW_TYPE_TCPV4:
1649 fdircmd = (IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | 1559 case IXGBE_ATR_FLOW_TYPE_UDPV4:
1650 IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN); 1560 case IXGBE_ATR_FLOW_TYPE_SCTPV4:
1651 1561 case IXGBE_ATR_FLOW_TYPE_TCPV6:
1652 switch (l4type & IXGBE_ATR_L4TYPE_MASK) { 1562 case IXGBE_ATR_FLOW_TYPE_UDPV6:
1653 case IXGBE_ATR_L4TYPE_TCP: 1563 case IXGBE_ATR_FLOW_TYPE_SCTPV6:
1654 fdircmd |= IXGBE_FDIRCMD_L4TYPE_TCP;
1655 break;
1656 case IXGBE_ATR_L4TYPE_UDP:
1657 fdircmd |= IXGBE_FDIRCMD_L4TYPE_UDP;
1658 break;
1659 case IXGBE_ATR_L4TYPE_SCTP:
1660 fdircmd |= IXGBE_FDIRCMD_L4TYPE_SCTP;
1661 break; 1564 break;
1662 default: 1565 default:
1663 hw_dbg(hw, "Error on l4type input\n"); 1566 hw_dbg(hw, " Error on flow type input\n");
1664 return IXGBE_ERR_CONFIG; 1567 return IXGBE_ERR_CONFIG;
1665 } 1568 }
1666 1569
1667 if (l4type & IXGBE_ATR_L4TYPE_IPV6_MASK) 1570 /* configure FDIRCMD register */
1668 fdircmd |= IXGBE_FDIRCMD_IPV6; 1571 fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
1572 IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
1573 fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
1574 fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
1669 1575
1670 fdircmd |= ((u64)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT); 1576 /*
1671 fdirhashcmd = ((fdircmd << 32) | fdirhash); 1577 * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
1578 * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH.
1579 */
1580 fdirhashcmd = (u64)fdircmd << 32;
1581
1582 sig_hash = ixgbe_atr_compute_hash_82599(input,
1583 IXGBE_ATR_SIGNATURE_HASH_KEY);
1584 fdirhashcmd |= sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
1585
1586 bucket_hash = ixgbe_atr_compute_hash_82599(input,
1587 IXGBE_ATR_BUCKET_HASH_KEY);
1588 fdirhashcmd |= bucket_hash;
1672 1589
1673 IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); 1590 IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
1674 1591
@@ -1687,7 +1604,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
1687 * hardware writes must be protected from one another. 1604 * hardware writes must be protected from one another.
1688 **/ 1605 **/
1689s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, 1606s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
1690 struct ixgbe_atr_input *input, 1607 union ixgbe_atr_input *input,
1691 struct ixgbe_atr_input_masks *input_masks, 1608 struct ixgbe_atr_input_masks *input_masks,
1692 u16 soft_id, u8 queue) 1609 u16 soft_id, u8 queue)
1693{ 1610{