aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinas Vepstas <linas@austin.ibm.com>2007-06-11 14:21:13 -0400
committerJeff Garzik <jeff@garzik.org>2007-06-20 19:09:32 -0400
commit4c4bd5a97a87670d2c368ed4ed8a8f2c93080605 (patch)
tree62367f49c10b278951f3610eba99b21dc9fc279b
parent83d35145c443d8394aaf1743301e79fa0a8054d7 (diff)
spidernet: Cure RX ram full bug
This patch fixes a rare deadlock that can occur when the kernel is not able to empty out the RX ring quickly enough. Below follows a detailed description of the bug and the fix. As long as the OS can empty out the RX buffers at a rate faster than the hardware can fill them, there is no problem. If, for some reason, the OS fails to empty the RX ring fast enough, the hardware GDACTDPA pointer will catch up to the head, notice the not-empty condition, ad stop. However, RX packets may still continue arriving on the wire. The spidernet chip can save some limited number of these in local RAM. When this local ram fills up, the spider chip will issue an interrupt indicating this (GHIINT0STS will show ERRINT, and the GRMFLLINT bit will be set in GHIINT1STS). When te RX ram full condition occurs, a certain bug/feature is triggered that has to be specially handled. This section describes the special handling for this condition. When the OS finally has a chance to run, it will empty out the RX ring. In particular, it will clear the descriptor on which the hardware had stopped. However, once the hardware has decided that a certain descriptor is invalid, it will not restart at that descriptor; instead it will restart at the next descr. This potentially will lead to a deadlock condition, as the tail pointer will be pointing at this descr, which, from the OS point of view, is empty; the OS will be waiting for this descr to be filled. However, the hardware has skipped this descr, and is filling the next descrs. Since the OS doesn't see this, there is a potential deadlock, with the OS waiting for one descr to fill, while the hardware is waiting for a differen set of descrs to become empty. A call to show_rx_chain() at this point indicates the nature of the problem. A typical print when the network is hung shows the following: net eth1: Spider RX RAM full, incoming packets might be discarded! net eth1: Total number of descrs=256 net eth1: Chain tail located at descr=255 net eth1: Chain head is at 255 net eth1: HW curr desc (GDACTDPA) is at 0 net eth1: Have 1 descrs with stat=xa0800000 net eth1: HW next desc (GDACNEXTDA) is at 1 net eth1: Have 127 descrs with stat=x40800101 net eth1: Have 1 descrs with stat=x40800001 net eth1: Have 126 descrs with stat=x40800101 net eth1: Last 1 descrs with stat=xa0800000 Both the tail and head pointers are pointing at descr 255, which is marked xa... which is "empty". Thus, from the OS point of view, there is nothing to be done. In particular, there is the implicit assumption that everything in front of the "empty" descr must surely also be empty, as explained in the last section. The OS is waiting for descr 255 to become non-empty, which, in this case, will never happen. The HW pointer is at descr 0. This descr is marked 0x4.. or "full". Since its already full, the hardware can do nothing more, and thus has halted processing. Notice that descrs 0 through 254 are all marked "full", while descr 254 and 255 are empty. (The "Last 1 descrs" is descr 254, since tail was at 255.) Thus, the system is deadlocked, and there can be no forward progress; the OS thinks there's nothing to do, and the hardware has nowhere to put incoming data. This bug/feature is worked around with the spider_net_resync_head_ptr() routine. When the driver receives RX interrupts, but an examination of the RX chain seems to show it is empty, then it is probable that the hardware has skipped a descr or two (sometimes dozens under heavy network conditions). The spider_net_resync_head_ptr() subroutine will search the ring for the next full descr, and the driver will resume operations there. Since this will leave "holes" in the ring, there is also a spider_net_resync_tail_ptr() that will skip over such holes. Signed-off-by: Linas Vepstas <linas@austin.ibm.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
-rw-r--r--drivers/net/spider_net.c86
-rw-r--r--drivers/net/spider_net.h3
2 files changed, 82 insertions, 7 deletions
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 69005d113d4d..c99980a14203 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1051,6 +1051,66 @@ static void show_rx_chain(struct spider_net_card *card)
1051#endif 1051#endif
1052 1052
1053/** 1053/**
1054 * spider_net_resync_head_ptr - Advance head ptr past empty descrs
1055 *
1056 * If the driver fails to keep up and empty the queue, then the
1057 * hardware wil run out of room to put incoming packets. This
1058 * will cause the hardware to skip descrs that are full (instead
1059 * of halting/retrying). Thus, once the driver runs, it wil need
1060 * to "catch up" to where the hardware chain pointer is at.
1061 */
1062static void spider_net_resync_head_ptr(struct spider_net_card *card)
1063{
1064 unsigned long flags;
1065 struct spider_net_descr_chain *chain = &card->rx_chain;
1066 struct spider_net_descr *descr;
1067 int i, status;
1068
1069 /* Advance head pointer past any empty descrs */
1070 descr = chain->head;
1071 status = spider_net_get_descr_status(descr->hwdescr);
1072
1073 if (status == SPIDER_NET_DESCR_NOT_IN_USE)
1074 return;
1075
1076 spin_lock_irqsave(&chain->lock, flags);
1077
1078 descr = chain->head;
1079 status = spider_net_get_descr_status(descr->hwdescr);
1080 for (i=0; i<chain->num_desc; i++) {
1081 if (status != SPIDER_NET_DESCR_CARDOWNED) break;
1082 descr = descr->next;
1083 status = spider_net_get_descr_status(descr->hwdescr);
1084 }
1085 chain->head = descr;
1086
1087 spin_unlock_irqrestore(&chain->lock, flags);
1088}
1089
1090static int spider_net_resync_tail_ptr(struct spider_net_card *card)
1091{
1092 struct spider_net_descr_chain *chain = &card->rx_chain;
1093 struct spider_net_descr *descr;
1094 int i, status;
1095
1096 /* Advance tail pointer past any empty and reaped descrs */
1097 descr = chain->tail;
1098 status = spider_net_get_descr_status(descr->hwdescr);
1099
1100 for (i=0; i<chain->num_desc; i++) {
1101 if ((status != SPIDER_NET_DESCR_CARDOWNED) &&
1102 (status != SPIDER_NET_DESCR_NOT_IN_USE)) break;
1103 descr = descr->next;
1104 status = spider_net_get_descr_status(descr->hwdescr);
1105 }
1106 chain->tail = descr;
1107
1108 if ((i == chain->num_desc) || (i == 0))
1109 return 1;
1110 return 0;
1111}
1112
1113/**
1054 * spider_net_decode_one_descr - processes an RX descriptor 1114 * spider_net_decode_one_descr - processes an RX descriptor
1055 * @card: card structure 1115 * @card: card structure
1056 * 1116 *
@@ -1175,6 +1235,12 @@ spider_net_poll(struct net_device *netdev, int *budget)
1175 } 1235 }
1176 } 1236 }
1177 1237
1238 if ((packets_done == 0) && (card->num_rx_ints != 0)) {
1239 no_more_packets = spider_net_resync_tail_ptr(card);
1240 spider_net_resync_head_ptr(card);
1241 }
1242 card->num_rx_ints = 0;
1243
1178 netdev->quota -= packets_done; 1244 netdev->quota -= packets_done;
1179 *budget -= packets_done; 1245 *budget -= packets_done;
1180 spider_net_refill_rx_chain(card); 1246 spider_net_refill_rx_chain(card);
@@ -1421,7 +1487,11 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
1421 if (netif_msg_intr(card) && net_ratelimit()) 1487 if (netif_msg_intr(card) && net_ratelimit())
1422 pr_err("Spider RX RAM full, incoming packets " 1488 pr_err("Spider RX RAM full, incoming packets "
1423 "might be discarded!\n"); 1489 "might be discarded!\n");
1424 spider_net_rx_irq_off(card); 1490 /* Could happen when rx chain is full */
1491 spider_net_resync_head_ptr(card);
1492 spider_net_refill_rx_chain(card);
1493 spider_net_enable_rxdmac(card);
1494 card->num_rx_ints ++;
1425 netif_rx_schedule(card->netdev); 1495 netif_rx_schedule(card->netdev);
1426 show_error = 0; 1496 show_error = 0;
1427 break; 1497 break;
@@ -1437,12 +1507,11 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
1437 case SPIDER_NET_GDCDCEINT: /* fallthrough */ 1507 case SPIDER_NET_GDCDCEINT: /* fallthrough */
1438 case SPIDER_NET_GDBDCEINT: /* fallthrough */ 1508 case SPIDER_NET_GDBDCEINT: /* fallthrough */
1439 case SPIDER_NET_GDADCEINT: 1509 case SPIDER_NET_GDADCEINT:
1440 if (netif_msg_intr(card) && net_ratelimit()) 1510 spider_net_resync_head_ptr(card);
1441 pr_err("got descriptor chain end interrupt, "
1442 "restarting DMAC %c.\n",
1443 'D'-(i-SPIDER_NET_GDDDCEINT)/3);
1444 spider_net_refill_rx_chain(card); 1511 spider_net_refill_rx_chain(card);
1445 spider_net_enable_rxdmac(card); 1512 spider_net_enable_rxdmac(card);
1513 card->num_rx_ints ++;
1514 netif_rx_schedule(card->netdev);
1446 show_error = 0; 1515 show_error = 0;
1447 break; 1516 break;
1448 1517
@@ -1451,9 +1520,12 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
1451 case SPIDER_NET_GDCINVDINT: /* fallthrough */ 1520 case SPIDER_NET_GDCINVDINT: /* fallthrough */
1452 case SPIDER_NET_GDBINVDINT: /* fallthrough */ 1521 case SPIDER_NET_GDBINVDINT: /* fallthrough */
1453 case SPIDER_NET_GDAINVDINT: 1522 case SPIDER_NET_GDAINVDINT:
1454 /* could happen when rx chain is full */ 1523 /* Could happen when rx chain is full */
1524 spider_net_resync_head_ptr(card);
1455 spider_net_refill_rx_chain(card); 1525 spider_net_refill_rx_chain(card);
1456 spider_net_enable_rxdmac(card); 1526 spider_net_enable_rxdmac(card);
1527 card->num_rx_ints ++;
1528 netif_rx_schedule(card->netdev);
1457 show_error = 0; 1529 show_error = 0;
1458 break; 1530 break;
1459 1531
@@ -1546,6 +1618,7 @@ spider_net_interrupt(int irq, void *ptr)
1546 if (status_reg & SPIDER_NET_RXINT ) { 1618 if (status_reg & SPIDER_NET_RXINT ) {
1547 spider_net_rx_irq_off(card); 1619 spider_net_rx_irq_off(card);
1548 netif_rx_schedule(netdev); 1620 netif_rx_schedule(netdev);
1621 card->num_rx_ints ++;
1549 } 1622 }
1550 if (status_reg & SPIDER_NET_TXINT) 1623 if (status_reg & SPIDER_NET_TXINT)
1551 netif_rx_schedule(netdev); 1624 netif_rx_schedule(netdev);
@@ -2191,6 +2264,7 @@ spider_net_setup_netdev(struct spider_net_card *card)
2191 * NETIF_F_HW_VLAN_FILTER */ 2264 * NETIF_F_HW_VLAN_FILTER */
2192 2265
2193 netdev->irq = card->pdev->irq; 2266 netdev->irq = card->pdev->irq;
2267 card->num_rx_ints = 0;
2194 2268
2195 dn = pci_device_to_OF_node(card->pdev); 2269 dn = pci_device_to_OF_node(card->pdev);
2196 if (!dn) 2270 if (!dn)
diff --git a/drivers/net/spider_net.h b/drivers/net/spider_net.h
index 4a1e0d28a502..b620f181227c 100644
--- a/drivers/net/spider_net.h
+++ b/drivers/net/spider_net.h
@@ -25,7 +25,7 @@
25#ifndef _SPIDER_NET_H 25#ifndef _SPIDER_NET_H
26#define _SPIDER_NET_H 26#define _SPIDER_NET_H
27 27
28#define VERSION "2.0 A" 28#define VERSION "2.0 B"
29 29
30#include "sungem_phy.h" 30#include "sungem_phy.h"
31 31
@@ -461,6 +461,7 @@ struct spider_net_card {
461 struct work_struct tx_timeout_task; 461 struct work_struct tx_timeout_task;
462 atomic_t tx_timeout_task_counter; 462 atomic_t tx_timeout_task_counter;
463 wait_queue_head_t waitq; 463 wait_queue_head_t waitq;
464 int num_rx_ints;
464 465
465 /* for ethtool */ 466 /* for ethtool */
466 int msg_enable; 467 int msg_enable;