aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2011-01-06 09:29:58 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-10 02:44:11 -0500
commit69830529b26e6dc9582a4b65ab88f40f050cf94e (patch)
tree03c16ab393989d80e4ca9034f415fe499dc83ce8
parent905e4a4163c4e807daf1f1f6b8f958e762a834a8 (diff)
ixgbe: further flow director performance optimizations
This change adds a compressed input type for atr signature hash computation. It also drops the use of the set functions when setting up the ATR input since we can then directly setup the hash input as two dwords that can be stored and passed as registers. With these changes the cost of computing the has is low enough that we can perform a hash computation on each TCP SYN flagged packet allowing us to drop the number of flow director misses considerably in tests such as netperf TCP_CRR. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Stephen Ko <stephen.s.ko@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ixgbe/ixgbe.h3
-rw-r--r--drivers/net/ixgbe/ixgbe_82599.c112
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c107
-rw-r--r--drivers/net/ixgbe/ixgbe_type.h16
4 files changed, 194 insertions, 44 deletions
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 2666e69d328c..341b3db2e7db 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -526,7 +526,8 @@ extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
526extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc); 526extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
527extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc); 527extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
528extern s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, 528extern s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
529 union ixgbe_atr_input *input, 529 union ixgbe_atr_hash_dword input,
530 union ixgbe_atr_hash_dword common,
530 u8 queue); 531 u8 queue);
531extern s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, 532extern s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
532 union ixgbe_atr_input *input, 533 union ixgbe_atr_input *input,
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 40aa3c29dc1d..d41931f5c3d3 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1331,6 +1331,96 @@ static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input,
1331 return hash_result & IXGBE_ATR_HASH_MASK; 1331 return hash_result & IXGBE_ATR_HASH_MASK;
1332} 1332}
1333 1333
1334/*
1335 * These defines allow us to quickly generate all of the necessary instructions
1336 * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
1337 * for values 0 through 15
1338 */
1339#define IXGBE_ATR_COMMON_HASH_KEY \
1340 (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
1341#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
1342do { \
1343 u32 n = (_n); \
1344 if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
1345 common_hash ^= lo_hash_dword >> n; \
1346 else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
1347 bucket_hash ^= lo_hash_dword >> n; \
1348 else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
1349 sig_hash ^= lo_hash_dword << (16 - n); \
1350 if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
1351 common_hash ^= hi_hash_dword >> n; \
1352 else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
1353 bucket_hash ^= hi_hash_dword >> n; \
1354 else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
1355 sig_hash ^= hi_hash_dword << (16 - n); \
1356} while (0);
1357
1358/**
1359 * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
1360 * @stream: input bitstream to compute the hash on
1361 *
1362 * This function is almost identical to the function above but contains
1363 * several optomizations such as unwinding all of the loops, letting the
1364 * compiler work out all of the conditional ifs since the keys are static
1365 * defines, and computing two keys at once since the hashed dword stream
1366 * will be the same for both keys.
1367 **/
1368static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
1369 union ixgbe_atr_hash_dword common)
1370{
1371 u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
1372 u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
1373
1374 /* record the flow_vm_vlan bits as they are a key part to the hash */
1375 flow_vm_vlan = ntohl(input.dword);
1376
1377 /* generate common hash dword */
1378 hi_hash_dword = ntohl(common.dword);
1379
1380 /* low dword is word swapped version of common */
1381 lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
1382
1383 /* apply flow ID/VM pool/VLAN ID bits to hash words */
1384 hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
1385
1386 /* Process bits 0 and 16 */
1387 IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
1388
1389 /*
1390 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
1391 * delay this because bit 0 of the stream should not be processed
1392 * so we do not add the vlan until after bit 0 was processed
1393 */
1394 lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
1395
1396 /* Process remaining 30 bit of the key */
1397 IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
1398 IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
1399 IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
1400 IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
1401 IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
1402 IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
1403 IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
1404 IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
1405 IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
1406 IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
1407 IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
1408 IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
1409 IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
1410 IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
1411 IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
1412
1413 /* combine common_hash result with signature and bucket hashes */
1414 bucket_hash ^= common_hash;
1415 bucket_hash &= IXGBE_ATR_HASH_MASK;
1416
1417 sig_hash ^= common_hash << 16;
1418 sig_hash &= IXGBE_ATR_HASH_MASK << 16;
1419
1420 /* return completed signature hash */
1421 return sig_hash ^ bucket_hash;
1422}
1423
1334/** 1424/**
1335 * ixgbe_atr_set_vlan_id_82599 - Sets the VLAN id in the ATR input stream 1425 * ixgbe_atr_set_vlan_id_82599 - Sets the VLAN id in the ATR input stream
1336 * @input: input stream to modify 1426 * @input: input stream to modify
@@ -1539,22 +1629,23 @@ static s32 ixgbe_atr_get_l4type_82599(union ixgbe_atr_input *input,
1539/** 1629/**
1540 * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter 1630 * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
1541 * @hw: pointer to hardware structure 1631 * @hw: pointer to hardware structure
1542 * @stream: input bitstream 1632 * @input: unique input dword
1633 * @common: compressed common input dword
1543 * @queue: queue index to direct traffic to 1634 * @queue: queue index to direct traffic to
1544 **/ 1635 **/
1545s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, 1636s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
1546 union ixgbe_atr_input *input, 1637 union ixgbe_atr_hash_dword input,
1638 union ixgbe_atr_hash_dword common,
1547 u8 queue) 1639 u8 queue)
1548{ 1640{
1549 u64 fdirhashcmd; 1641 u64 fdirhashcmd;
1550 u32 fdircmd; 1642 u32 fdircmd;
1551 u32 bucket_hash, sig_hash;
1552 1643
1553 /* 1644 /*
1554 * Get the flow_type in order to program FDIRCMD properly 1645 * Get the flow_type in order to program FDIRCMD properly
1555 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 1646 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
1556 */ 1647 */
1557 switch (input->formatted.flow_type) { 1648 switch (input.formatted.flow_type) {
1558 case IXGBE_ATR_FLOW_TYPE_TCPV4: 1649 case IXGBE_ATR_FLOW_TYPE_TCPV4:
1559 case IXGBE_ATR_FLOW_TYPE_UDPV4: 1650 case IXGBE_ATR_FLOW_TYPE_UDPV4:
1560 case IXGBE_ATR_FLOW_TYPE_SCTPV4: 1651 case IXGBE_ATR_FLOW_TYPE_SCTPV4:
@@ -1570,7 +1661,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
1570 /* configure FDIRCMD register */ 1661 /* configure FDIRCMD register */
1571 fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | 1662 fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
1572 IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; 1663 IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
1573 fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; 1664 fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
1574 fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; 1665 fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
1575 1666
1576 /* 1667 /*
@@ -1578,17 +1669,12 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
1578 * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. 1669 * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH.
1579 */ 1670 */
1580 fdirhashcmd = (u64)fdircmd << 32; 1671 fdirhashcmd = (u64)fdircmd << 32;
1581 1672 fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
1582 sig_hash = ixgbe_atr_compute_hash_82599(input,
1583 IXGBE_ATR_SIGNATURE_HASH_KEY);
1584 fdirhashcmd |= sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
1585
1586 bucket_hash = ixgbe_atr_compute_hash_82599(input,
1587 IXGBE_ATR_BUCKET_HASH_KEY);
1588 fdirhashcmd |= bucket_hash;
1589 1673
1590 IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); 1674 IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
1591 1675
1676 hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
1677
1592 return 0; 1678 return 0;
1593} 1679}
1594 1680
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 26718abd5ce4..490818c46d74 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -6506,37 +6506,92 @@ static void ixgbe_tx_queue(struct ixgbe_ring *tx_ring,
6506 writel(i, tx_ring->tail); 6506 writel(i, tx_ring->tail);
6507} 6507}
6508 6508
6509static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb, 6509static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb,
6510 u8 queue, u32 tx_flags, __be16 protocol) 6510 u32 tx_flags, __be16 protocol)
6511{ 6511{
6512 union ixgbe_atr_input atr_input; 6512 struct ixgbe_q_vector *q_vector = ring->q_vector;
6513 struct iphdr *iph = ip_hdr(skb); 6513 union ixgbe_atr_hash_dword input = { .dword = 0 };
6514 struct ethhdr *eth = (struct ethhdr *)skb->data; 6514 union ixgbe_atr_hash_dword common = { .dword = 0 };
6515 union {
6516 unsigned char *network;
6517 struct iphdr *ipv4;
6518 struct ipv6hdr *ipv6;
6519 } hdr;
6515 struct tcphdr *th; 6520 struct tcphdr *th;
6516 __be16 vlan_id; 6521 __be16 vlan_id;
6517 6522
6518 /* Right now, we support IPv4 w/ TCP only */ 6523 /* if ring doesn't have a interrupt vector, cannot perform ATR */
6519 if (protocol != htons(ETH_P_IP) || 6524 if (!q_vector)
6520 iph->protocol != IPPROTO_TCP) 6525 return;
6526
6527 /* do nothing if sampling is disabled */
6528 if (!ring->atr_sample_rate)
6521 return; 6529 return;
6522 6530
6523 memset(&atr_input, 0, sizeof(union ixgbe_atr_input)); 6531 ring->atr_count++;
6524 6532
6525 vlan_id = htons(tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT); 6533 /* snag network header to get L4 type and address */
6534 hdr.network = skb_network_header(skb);
6535
6536 /* Currently only IPv4/IPv6 with TCP is supported */
6537 if ((protocol != __constant_htons(ETH_P_IPV6) ||
6538 hdr.ipv6->nexthdr != IPPROTO_TCP) &&
6539 (protocol != __constant_htons(ETH_P_IP) ||
6540 hdr.ipv4->protocol != IPPROTO_TCP))
6541 return;
6526 6542
6527 th = tcp_hdr(skb); 6543 th = tcp_hdr(skb);
6528 6544
6529 ixgbe_atr_set_vlan_id_82599(&atr_input, vlan_id); 6545 /* skip this packet since the socket is closing */
6530 ixgbe_atr_set_src_port_82599(&atr_input, th->dest); 6546 if (th->fin)
6531 ixgbe_atr_set_dst_port_82599(&atr_input, th->source); 6547 return;
6532 ixgbe_atr_set_flex_byte_82599(&atr_input, eth->h_proto); 6548
6533 ixgbe_atr_set_l4type_82599(&atr_input, IXGBE_ATR_FLOW_TYPE_TCPV4); 6549 /* sample on all syn packets or once every atr sample count */
6534 /* src and dst are inverted, think how the receiver sees them */ 6550 if (!th->syn && (ring->atr_count < ring->atr_sample_rate))
6535 ixgbe_atr_set_src_ipv4_82599(&atr_input, iph->daddr); 6551 return;
6536 ixgbe_atr_set_dst_ipv4_82599(&atr_input, iph->saddr); 6552
6553 /* reset sample count */
6554 ring->atr_count = 0;
6555
6556 vlan_id = htons(tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT);
6557
6558 /*
6559 * src and dst are inverted, think how the receiver sees them
6560 *
6561 * The input is broken into two sections, a non-compressed section
6562 * containing vm_pool, vlan_id, and flow_type. The rest of the data
6563 * is XORed together and stored in the compressed dword.
6564 */
6565 input.formatted.vlan_id = vlan_id;
6566
6567 /*
6568 * since src port and flex bytes occupy the same word XOR them together
6569 * and write the value to source port portion of compressed dword
6570 */
6571 if (vlan_id)
6572 common.port.src ^= th->dest ^ __constant_htons(ETH_P_8021Q);
6573 else
6574 common.port.src ^= th->dest ^ protocol;
6575 common.port.dst ^= th->source;
6576
6577 if (protocol == __constant_htons(ETH_P_IP)) {
6578 input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
6579 common.ip ^= hdr.ipv4->saddr ^ hdr.ipv4->daddr;
6580 } else {
6581 input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV6;
6582 common.ip ^= hdr.ipv6->saddr.s6_addr32[0] ^
6583 hdr.ipv6->saddr.s6_addr32[1] ^
6584 hdr.ipv6->saddr.s6_addr32[2] ^
6585 hdr.ipv6->saddr.s6_addr32[3] ^
6586 hdr.ipv6->daddr.s6_addr32[0] ^
6587 hdr.ipv6->daddr.s6_addr32[1] ^
6588 hdr.ipv6->daddr.s6_addr32[2] ^
6589 hdr.ipv6->daddr.s6_addr32[3];
6590 }
6537 6591
6538 /* This assumes the Rx queue and Tx queue are bound to the same CPU */ 6592 /* This assumes the Rx queue and Tx queue are bound to the same CPU */
6539 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, &atr_input, queue); 6593 ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
6594 input, common, ring->queue_index);
6540} 6595}
6541 6596
6542static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, int size) 6597static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, int size)
@@ -6707,16 +6762,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
6707 count = ixgbe_tx_map(adapter, tx_ring, skb, tx_flags, first, hdr_len); 6762 count = ixgbe_tx_map(adapter, tx_ring, skb, tx_flags, first, hdr_len);
6708 if (count) { 6763 if (count) {
6709 /* add the ATR filter if ATR is on */ 6764 /* add the ATR filter if ATR is on */
6710 if (tx_ring->atr_sample_rate) { 6765 if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
6711 ++tx_ring->atr_count; 6766 ixgbe_atr(tx_ring, skb, tx_flags, protocol);
6712 if ((tx_ring->atr_count >= tx_ring->atr_sample_rate) &&
6713 test_bit(__IXGBE_TX_FDIR_INIT_DONE,
6714 &tx_ring->state)) {
6715 ixgbe_atr(adapter, skb, tx_ring->queue_index,
6716 tx_flags, protocol);
6717 tx_ring->atr_count = 0;
6718 }
6719 }
6720 txq = netdev_get_tx_queue(netdev, tx_ring->queue_index); 6767 txq = netdev_get_tx_queue(netdev, tx_ring->queue_index);
6721 txq->tx_bytes += skb->len; 6768 txq->tx_bytes += skb->len;
6722 txq->tx_packets++; 6769 txq->tx_packets++;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index c56a7128e452..0d9392d92a0f 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -2198,6 +2198,22 @@ union ixgbe_atr_input {
2198 __be32 dword_stream[11]; 2198 __be32 dword_stream[11];
2199}; 2199};
2200 2200
2201/* Flow Director compressed ATR hash input struct */
2202union ixgbe_atr_hash_dword {
2203 struct {
2204 u8 vm_pool;
2205 u8 flow_type;
2206 __be16 vlan_id;
2207 } formatted;
2208 __be32 ip;
2209 struct {
2210 __be16 src;
2211 __be16 dst;
2212 } port;
2213 __be16 flex_bytes;
2214 __be32 dword;
2215};
2216
2201struct ixgbe_atr_input_masks { 2217struct ixgbe_atr_input_masks {
2202 __be32 src_ip_mask; 2218 __be32 src_ip_mask;
2203 __be32 dst_ip_mask; 2219 __be32 dst_ip_mask;