aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislaw Gruszka <sgruszka@redhat.com>2011-03-24 21:21:51 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-28 02:35:03 -0400
commitb3cd965739b8677f6e04913aa535fa156b09e73d (patch)
tree1cb6bb7af7f81fc5ace5ad520408007590336dc5
parentedf947f10074fea27fdb1730524dca59355a1c40 (diff)
myri10ge: small rx_done refactoring
Avoid theoretical race condition regarding accessing dev->features NETIF_F_LRO flag, which is illustrated below. CPU1 CPU2 myri10ge_clean_rx_done(): myri10ge_set_flags(): or myri10ge_set_rx_csum(): if (dev->features & NETIF_F_LRO) setup lro dev->features |= NETIF_F_LRO or dev->features &= ~NETIF_F_LRO; if (dev->features & NETIF_F_LRO) flush lro On the way reduce myri10ge_rx_done() number of arguments and calls by moving mgp->small_bytes check into that function. That reduce code size from: text data bss dec hex filename 36644 248 100 36992 9080 drivers/net/myri10ge/myri10ge.o to: text data bss dec hex filename 36037 247 100 36384 8e20 drivers/net/myri10ge/myri10ge.o on my i686 system, what should also make myri10ge_clean_rx_done() being faster. Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/myri10ge/myri10ge.c37
1 files changed, 23 insertions, 14 deletions
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 1f4e8680a96a..673dc600c891 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1312,17 +1312,26 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
1312 * page into an skb */ 1312 * page into an skb */
1313 1313
1314static inline int 1314static inline int
1315myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx, 1315myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum,
1316 int bytes, int len, __wsum csum) 1316 int lro_enabled)
1317{ 1317{
1318 struct myri10ge_priv *mgp = ss->mgp; 1318 struct myri10ge_priv *mgp = ss->mgp;
1319 struct sk_buff *skb; 1319 struct sk_buff *skb;
1320 struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME]; 1320 struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME];
1321 int i, idx, hlen, remainder; 1321 struct myri10ge_rx_buf *rx;
1322 int i, idx, hlen, remainder, bytes;
1322 struct pci_dev *pdev = mgp->pdev; 1323 struct pci_dev *pdev = mgp->pdev;
1323 struct net_device *dev = mgp->dev; 1324 struct net_device *dev = mgp->dev;
1324 u8 *va; 1325 u8 *va;
1325 1326
1327 if (len <= mgp->small_bytes) {
1328 rx = &ss->rx_small;
1329 bytes = mgp->small_bytes;
1330 } else {
1331 rx = &ss->rx_big;
1332 bytes = mgp->big_bytes;
1333 }
1334
1326 len += MXGEFW_PAD; 1335 len += MXGEFW_PAD;
1327 idx = rx->cnt & rx->mask; 1336 idx = rx->cnt & rx->mask;
1328 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; 1337 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset;
@@ -1341,7 +1350,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx,
1341 remainder -= MYRI10GE_ALLOC_SIZE; 1350 remainder -= MYRI10GE_ALLOC_SIZE;
1342 } 1351 }
1343 1352
1344 if (dev->features & NETIF_F_LRO) { 1353 if (lro_enabled) {
1345 rx_frags[0].page_offset += MXGEFW_PAD; 1354 rx_frags[0].page_offset += MXGEFW_PAD;
1346 rx_frags[0].size -= MXGEFW_PAD; 1355 rx_frags[0].size -= MXGEFW_PAD;
1347 len -= MXGEFW_PAD; 1356 len -= MXGEFW_PAD;
@@ -1463,7 +1472,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
1463{ 1472{
1464 struct myri10ge_rx_done *rx_done = &ss->rx_done; 1473 struct myri10ge_rx_done *rx_done = &ss->rx_done;
1465 struct myri10ge_priv *mgp = ss->mgp; 1474 struct myri10ge_priv *mgp = ss->mgp;
1466 struct net_device *netdev = mgp->dev; 1475
1467 unsigned long rx_bytes = 0; 1476 unsigned long rx_bytes = 0;
1468 unsigned long rx_packets = 0; 1477 unsigned long rx_packets = 0;
1469 unsigned long rx_ok; 1478 unsigned long rx_ok;
@@ -1474,18 +1483,18 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
1474 u16 length; 1483 u16 length;
1475 __wsum checksum; 1484 __wsum checksum;
1476 1485
1486 /*
1487 * Prevent compiler from generating more than one ->features memory
1488 * access to avoid theoretical race condition with functions that
1489 * change NETIF_F_LRO flag at runtime.
1490 */
1491 bool lro_enabled = ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO;
1492
1477 while (rx_done->entry[idx].length != 0 && work_done < budget) { 1493 while (rx_done->entry[idx].length != 0 && work_done < budget) {
1478 length = ntohs(rx_done->entry[idx].length); 1494 length = ntohs(rx_done->entry[idx].length);
1479 rx_done->entry[idx].length = 0; 1495 rx_done->entry[idx].length = 0;
1480 checksum = csum_unfold(rx_done->entry[idx].checksum); 1496 checksum = csum_unfold(rx_done->entry[idx].checksum);
1481 if (length <= mgp->small_bytes) 1497 rx_ok = myri10ge_rx_done(ss, length, checksum, lro_enabled);
1482 rx_ok = myri10ge_rx_done(ss, &ss->rx_small,
1483 mgp->small_bytes,
1484 length, checksum);
1485 else
1486 rx_ok = myri10ge_rx_done(ss, &ss->rx_big,
1487 mgp->big_bytes,
1488 length, checksum);
1489 rx_packets += rx_ok; 1498 rx_packets += rx_ok;
1490 rx_bytes += rx_ok * (unsigned long)length; 1499 rx_bytes += rx_ok * (unsigned long)length;
1491 cnt++; 1500 cnt++;
@@ -1497,7 +1506,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
1497 ss->stats.rx_packets += rx_packets; 1506 ss->stats.rx_packets += rx_packets;
1498 ss->stats.rx_bytes += rx_bytes; 1507 ss->stats.rx_bytes += rx_bytes;
1499 1508
1500 if (netdev->features & NETIF_F_LRO) 1509 if (lro_enabled)
1501 lro_flush_all(&rx_done->lro_mgr); 1510 lro_flush_all(&rx_done->lro_mgr);
1502 1511
1503 /* restock receive rings if needed */ 1512 /* restock receive rings if needed */