aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDivy Le Ray <divy@chelsio.com>2008-10-08 20:36:03 -0400
committerDavid S. Miller <davem@davemloft.net>2008-10-08 20:36:03 -0400
commit20d3fc11505a2706a33b4c9a932af036d836727f (patch)
tree8c72b6673ac62166225d30a409b09c2354f5286b
parent45cec1bac0719c904bb5f4405c2937f7e715888c (diff)
cxgb3: reset the adapter on fatal error
when a fatal error occurs, bring ports down, reset the chip, and bring ports back up. Factorize code used for both EEH and fatal error recovery. Fix timer usage when bringing up/resetting sge queue sets. Signed-off-by: Divy Le Ray <divy@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/cxgb3/adapter.h1
-rw-r--r--drivers/net/cxgb3/common.h1
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c164
-rw-r--r--drivers/net/cxgb3/sge.c9
-rw-r--r--drivers/net/cxgb3/t3_hw.c4
5 files changed, 119 insertions, 60 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index e9da28597233..02dd69b90abe 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -240,6 +240,7 @@ struct adapter {
240 unsigned int check_task_cnt; 240 unsigned int check_task_cnt;
241 struct delayed_work adap_check_task; 241 struct delayed_work adap_check_task;
242 struct work_struct ext_intr_handler_task; 242 struct work_struct ext_intr_handler_task;
243 struct work_struct fatal_error_handler_task;
243 244
244 struct dentry *debugfs_root; 245 struct dentry *debugfs_root;
245 246
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index 9ecf8a6dc97f..d6dbcd403a7d 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -698,6 +698,7 @@ int t3_check_fw_version(struct adapter *adapter, int *must_load);
698int t3_init_hw(struct adapter *adapter, u32 fw_params); 698int t3_init_hw(struct adapter *adapter, u32 fw_params);
699void mac_prep(struct cmac *mac, struct adapter *adapter, int index); 699void mac_prep(struct cmac *mac, struct adapter *adapter, int index);
700void early_hw_init(struct adapter *adapter, const struct adapter_info *ai); 700void early_hw_init(struct adapter *adapter, const struct adapter_info *ai);
701int t3_reset_adapter(struct adapter *adapter);
701int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, 702int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
702 int reset); 703 int reset);
703int t3_replay_prep_adapter(struct adapter *adapter); 704int t3_replay_prep_adapter(struct adapter *adapter);
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index d355c826b9b9..0e51d49842fa 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -892,6 +892,13 @@ static int cxgb_up(struct adapter *adap)
892 goto out; 892 goto out;
893 } 893 }
894 894
895 /*
896 * Clear interrupts now to catch errors if t3_init_hw fails.
897 * We clear them again later as initialization may trigger
898 * conditions that can interrupt.
899 */
900 t3_intr_clear(adap);
901
895 err = t3_init_hw(adap, 0); 902 err = t3_init_hw(adap, 0);
896 if (err) 903 if (err)
897 goto out; 904 goto out;
@@ -1101,9 +1108,9 @@ static int cxgb_close(struct net_device *dev)
1101 netif_carrier_off(dev); 1108 netif_carrier_off(dev);
1102 t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); 1109 t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1103 1110
1104 spin_lock(&adapter->work_lock); /* sync with update task */ 1111 spin_lock_irq(&adapter->work_lock); /* sync with update task */
1105 clear_bit(pi->port_id, &adapter->open_device_map); 1112 clear_bit(pi->port_id, &adapter->open_device_map);
1106 spin_unlock(&adapter->work_lock); 1113 spin_unlock_irq(&adapter->work_lock);
1107 1114
1108 if (!(adapter->open_device_map & PORT_MASK)) 1115 if (!(adapter->open_device_map & PORT_MASK))
1109 cancel_rearming_delayed_workqueue(cxgb3_wq, 1116 cancel_rearming_delayed_workqueue(cxgb3_wq,
@@ -2356,10 +2363,10 @@ static void t3_adap_check_task(struct work_struct *work)
2356 check_t3b2_mac(adapter); 2363 check_t3b2_mac(adapter);
2357 2364
2358 /* Schedule the next check update if any port is active. */ 2365 /* Schedule the next check update if any port is active. */
2359 spin_lock(&adapter->work_lock); 2366 spin_lock_irq(&adapter->work_lock);
2360 if (adapter->open_device_map & PORT_MASK) 2367 if (adapter->open_device_map & PORT_MASK)
2361 schedule_chk_task(adapter); 2368 schedule_chk_task(adapter);
2362 spin_unlock(&adapter->work_lock); 2369 spin_unlock_irq(&adapter->work_lock);
2363} 2370}
2364 2371
2365/* 2372/*
@@ -2404,6 +2411,96 @@ void t3_os_ext_intr_handler(struct adapter *adapter)
2404 spin_unlock(&adapter->work_lock); 2411 spin_unlock(&adapter->work_lock);
2405} 2412}
2406 2413
2414static int t3_adapter_error(struct adapter *adapter, int reset)
2415{
2416 int i, ret = 0;
2417
2418 /* Stop all ports */
2419 for_each_port(adapter, i) {
2420 struct net_device *netdev = adapter->port[i];
2421
2422 if (netif_running(netdev))
2423 cxgb_close(netdev);
2424 }
2425
2426 if (is_offload(adapter) &&
2427 test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
2428 offload_close(&adapter->tdev);
2429
2430 /* Stop SGE timers */
2431 t3_stop_sge_timers(adapter);
2432
2433 adapter->flags &= ~FULL_INIT_DONE;
2434
2435 if (reset)
2436 ret = t3_reset_adapter(adapter);
2437
2438 pci_disable_device(adapter->pdev);
2439
2440 return ret;
2441}
2442
2443static int t3_reenable_adapter(struct adapter *adapter)
2444{
2445 if (pci_enable_device(adapter->pdev)) {
2446 dev_err(&adapter->pdev->dev,
2447 "Cannot re-enable PCI device after reset.\n");
2448 goto err;
2449 }
2450 pci_set_master(adapter->pdev);
2451 pci_restore_state(adapter->pdev);
2452
2453 /* Free sge resources */
2454 t3_free_sge_resources(adapter);
2455
2456 if (t3_replay_prep_adapter(adapter))
2457 goto err;
2458
2459 return 0;
2460err:
2461 return -1;
2462}
2463
2464static void t3_resume_ports(struct adapter *adapter)
2465{
2466 int i;
2467
2468 /* Restart the ports */
2469 for_each_port(adapter, i) {
2470 struct net_device *netdev = adapter->port[i];
2471
2472 if (netif_running(netdev)) {
2473 if (cxgb_open(netdev)) {
2474 dev_err(&adapter->pdev->dev,
2475 "can't bring device back up"
2476 " after reset\n");
2477 continue;
2478 }
2479 }
2480 }
2481}
2482
2483/*
2484 * processes a fatal error.
2485 * Bring the ports down, reset the chip, bring the ports back up.
2486 */
2487static void fatal_error_task(struct work_struct *work)
2488{
2489 struct adapter *adapter = container_of(work, struct adapter,
2490 fatal_error_handler_task);
2491 int err = 0;
2492
2493 rtnl_lock();
2494 err = t3_adapter_error(adapter, 1);
2495 if (!err)
2496 err = t3_reenable_adapter(adapter);
2497 if (!err)
2498 t3_resume_ports(adapter);
2499
2500 CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded");
2501 rtnl_unlock();
2502}
2503
2407void t3_fatal_err(struct adapter *adapter) 2504void t3_fatal_err(struct adapter *adapter)
2408{ 2505{
2409 unsigned int fw_status[4]; 2506 unsigned int fw_status[4];
@@ -2414,7 +2511,11 @@ void t3_fatal_err(struct adapter *adapter)
2414 t3_write_reg(adapter, A_XGM_RX_CTRL, 0); 2511 t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
2415 t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0); 2512 t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
2416 t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0); 2513 t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0);
2514
2515 spin_lock(&adapter->work_lock);
2417 t3_intr_disable(adapter); 2516 t3_intr_disable(adapter);
2517 queue_work(cxgb3_wq, &adapter->fatal_error_handler_task);
2518 spin_unlock(&adapter->work_lock);
2418 } 2519 }
2419 CH_ALERT(adapter, "encountered fatal error, operation suspended\n"); 2520 CH_ALERT(adapter, "encountered fatal error, operation suspended\n");
2420 if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status)) 2521 if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status))
@@ -2436,26 +2537,9 @@ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev,
2436 pci_channel_state_t state) 2537 pci_channel_state_t state)
2437{ 2538{
2438 struct adapter *adapter = pci_get_drvdata(pdev); 2539 struct adapter *adapter = pci_get_drvdata(pdev);
2439 int i; 2540 int ret;
2440
2441 /* Stop all ports */
2442 for_each_port(adapter, i) {
2443 struct net_device *netdev = adapter->port[i];
2444
2445 if (netif_running(netdev))
2446 cxgb_close(netdev);
2447 }
2448
2449 if (is_offload(adapter) &&
2450 test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
2451 offload_close(&adapter->tdev);
2452
2453 /* Stop SGE timers */
2454 t3_stop_sge_timers(adapter);
2455
2456 adapter->flags &= ~FULL_INIT_DONE;
2457 2541
2458 pci_disable_device(pdev); 2542 ret = t3_adapter_error(adapter, 0);
2459 2543
2460 /* Request a slot reset. */ 2544 /* Request a slot reset. */
2461 return PCI_ERS_RESULT_NEED_RESET; 2545 return PCI_ERS_RESULT_NEED_RESET;
@@ -2471,22 +2555,9 @@ static pci_ers_result_t t3_io_slot_reset(struct pci_dev *pdev)
2471{ 2555{
2472 struct adapter *adapter = pci_get_drvdata(pdev); 2556 struct adapter *adapter = pci_get_drvdata(pdev);
2473 2557
2474 if (pci_enable_device(pdev)) { 2558 if (!t3_reenable_adapter(adapter))
2475 dev_err(&pdev->dev, 2559 return PCI_ERS_RESULT_RECOVERED;
2476 "Cannot re-enable PCI device after reset.\n");
2477 goto err;
2478 }
2479 pci_set_master(pdev);
2480 pci_restore_state(pdev);
2481
2482 /* Free sge resources */
2483 t3_free_sge_resources(adapter);
2484
2485 if (t3_replay_prep_adapter(adapter))
2486 goto err;
2487 2560
2488 return PCI_ERS_RESULT_RECOVERED;
2489err:
2490 return PCI_ERS_RESULT_DISCONNECT; 2561 return PCI_ERS_RESULT_DISCONNECT;
2491} 2562}
2492 2563
@@ -2500,22 +2571,8 @@ err:
2500static void t3_io_resume(struct pci_dev *pdev) 2571static void t3_io_resume(struct pci_dev *pdev)
2501{ 2572{
2502 struct adapter *adapter = pci_get_drvdata(pdev); 2573 struct adapter *adapter = pci_get_drvdata(pdev);
2503 int i;
2504
2505 /* Restart the ports */
2506 for_each_port(adapter, i) {
2507 struct net_device *netdev = adapter->port[i];
2508 2574
2509 if (netif_running(netdev)) { 2575 t3_resume_ports(adapter);
2510 if (cxgb_open(netdev)) {
2511 dev_err(&pdev->dev,
2512 "can't bring device back up"
2513 " after reset\n");
2514 continue;
2515 }
2516 netif_device_attach(netdev);
2517 }
2518 }
2519} 2576}
2520 2577
2521static struct pci_error_handlers t3_err_handler = { 2578static struct pci_error_handlers t3_err_handler = {
@@ -2664,6 +2721,7 @@ static int __devinit init_one(struct pci_dev *pdev,
2664 2721
2665 INIT_LIST_HEAD(&adapter->adapter_list); 2722 INIT_LIST_HEAD(&adapter->adapter_list);
2666 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); 2723 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
2724 INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
2667 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); 2725 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
2668 2726
2669 for (i = 0; i < ai->nports; ++i) { 2727 for (i = 0; i < ai->nports; ++i) {
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 7346a8e26da1..87919419b707 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -351,7 +351,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
351 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), 351 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
352 q->buf_size, PCI_DMA_FROMDEVICE); 352 q->buf_size, PCI_DMA_FROMDEVICE);
353 if (q->use_pages) { 353 if (q->use_pages) {
354 put_page(d->pg_chunk.page); 354 if (d->pg_chunk.page)
355 put_page(d->pg_chunk.page);
355 d->pg_chunk.page = NULL; 356 d->pg_chunk.page = NULL;
356 } else { 357 } else {
357 kfree_skb(d->skb); 358 kfree_skb(d->skb);
@@ -583,7 +584,7 @@ static void t3_reset_qset(struct sge_qset *q)
583 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET); 584 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
584 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); 585 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
585 q->txq_stopped = 0; 586 q->txq_stopped = 0;
586 memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer)); 587 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
587 kfree(q->lro_frag_tbl); 588 kfree(q->lro_frag_tbl);
588 q->lro_nfrags = q->lro_frag_len = 0; 589 q->lro_nfrags = q->lro_frag_len = 0;
589} 590}
@@ -2840,9 +2841,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2840 struct net_lro_mgr *lro_mgr = &q->lro_mgr; 2841 struct net_lro_mgr *lro_mgr = &q->lro_mgr;
2841 2842
2842 init_qset_cntxt(q, id); 2843 init_qset_cntxt(q, id);
2843 init_timer(&q->tx_reclaim_timer); 2844 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
2844 q->tx_reclaim_timer.data = (unsigned long)q;
2845 q->tx_reclaim_timer.function = sge_timer_cb;
2846 2845
2847 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, 2846 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2848 sizeof(struct rx_desc), 2847 sizeof(struct rx_desc),
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index 04c0e90119af..33470c79ac1c 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -1221,7 +1221,7 @@ struct intr_info {
1221 unsigned int mask; /* bits to check in interrupt status */ 1221 unsigned int mask; /* bits to check in interrupt status */
1222 const char *msg; /* message to print or NULL */ 1222 const char *msg; /* message to print or NULL */
1223 short stat_idx; /* stat counter to increment or -1 */ 1223 short stat_idx; /* stat counter to increment or -1 */
1224 unsigned short fatal:1; /* whether the condition reported is fatal */ 1224 unsigned short fatal; /* whether the condition reported is fatal */
1225}; 1225};
1226 1226
1227/** 1227/**
@@ -3488,7 +3488,7 @@ void early_hw_init(struct adapter *adapter, const struct adapter_info *ai)
3488 * Older PCIe cards lose their config space during reset, PCI-X 3488 * Older PCIe cards lose their config space during reset, PCI-X
3489 * ones don't. 3489 * ones don't.
3490 */ 3490 */
3491static int t3_reset_adapter(struct adapter *adapter) 3491int t3_reset_adapter(struct adapter *adapter)
3492{ 3492{
3493 int i, save_and_restore_pcie = 3493 int i, save_and_restore_pcie =
3494 adapter->params.rev < T3_REV_B2 && is_pcie(adapter); 3494 adapter->params.rev < T3_REV_B2 && is_pcie(adapter);