diff options
author | Divy Le Ray <divy@chelsio.com> | 2008-10-08 20:36:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-10-08 20:36:03 -0400 |
commit | 20d3fc11505a2706a33b4c9a932af036d836727f (patch) | |
tree | 8c72b6673ac62166225d30a409b09c2354f5286b /drivers | |
parent | 45cec1bac0719c904bb5f4405c2937f7e715888c (diff) |
cxgb3: reset the adapter on fatal error
when a fatal error occurs, bring ports down, reset the chip,
and bring ports back up.
Factorize code used for both EEH and fatal error recovery.
Fix timer usage when bringing up/resetting sge queue sets.
Signed-off-by: Divy Le Ray <divy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/cxgb3/adapter.h | 1 | ||||
-rw-r--r-- | drivers/net/cxgb3/common.h | 1 | ||||
-rw-r--r-- | drivers/net/cxgb3/cxgb3_main.c | 164 | ||||
-rw-r--r-- | drivers/net/cxgb3/sge.c | 9 | ||||
-rw-r--r-- | drivers/net/cxgb3/t3_hw.c | 4 |
5 files changed, 119 insertions, 60 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index e9da28597233..02dd69b90abe 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h | |||
@@ -240,6 +240,7 @@ struct adapter { | |||
240 | unsigned int check_task_cnt; | 240 | unsigned int check_task_cnt; |
241 | struct delayed_work adap_check_task; | 241 | struct delayed_work adap_check_task; |
242 | struct work_struct ext_intr_handler_task; | 242 | struct work_struct ext_intr_handler_task; |
243 | struct work_struct fatal_error_handler_task; | ||
243 | 244 | ||
244 | struct dentry *debugfs_root; | 245 | struct dentry *debugfs_root; |
245 | 246 | ||
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h index 9ecf8a6dc97f..d6dbcd403a7d 100644 --- a/drivers/net/cxgb3/common.h +++ b/drivers/net/cxgb3/common.h | |||
@@ -698,6 +698,7 @@ int t3_check_fw_version(struct adapter *adapter, int *must_load); | |||
698 | int t3_init_hw(struct adapter *adapter, u32 fw_params); | 698 | int t3_init_hw(struct adapter *adapter, u32 fw_params); |
699 | void mac_prep(struct cmac *mac, struct adapter *adapter, int index); | 699 | void mac_prep(struct cmac *mac, struct adapter *adapter, int index); |
700 | void early_hw_init(struct adapter *adapter, const struct adapter_info *ai); | 700 | void early_hw_init(struct adapter *adapter, const struct adapter_info *ai); |
701 | int t3_reset_adapter(struct adapter *adapter); | ||
701 | int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, | 702 | int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, |
702 | int reset); | 703 | int reset); |
703 | int t3_replay_prep_adapter(struct adapter *adapter); | 704 | int t3_replay_prep_adapter(struct adapter *adapter); |
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index d355c826b9b9..0e51d49842fa 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c | |||
@@ -892,6 +892,13 @@ static int cxgb_up(struct adapter *adap) | |||
892 | goto out; | 892 | goto out; |
893 | } | 893 | } |
894 | 894 | ||
895 | /* | ||
896 | * Clear interrupts now to catch errors if t3_init_hw fails. | ||
897 | * We clear them again later as initialization may trigger | ||
898 | * conditions that can interrupt. | ||
899 | */ | ||
900 | t3_intr_clear(adap); | ||
901 | |||
895 | err = t3_init_hw(adap, 0); | 902 | err = t3_init_hw(adap, 0); |
896 | if (err) | 903 | if (err) |
897 | goto out; | 904 | goto out; |
@@ -1101,9 +1108,9 @@ static int cxgb_close(struct net_device *dev) | |||
1101 | netif_carrier_off(dev); | 1108 | netif_carrier_off(dev); |
1102 | t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); | 1109 | t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); |
1103 | 1110 | ||
1104 | spin_lock(&adapter->work_lock); /* sync with update task */ | 1111 | spin_lock_irq(&adapter->work_lock); /* sync with update task */ |
1105 | clear_bit(pi->port_id, &adapter->open_device_map); | 1112 | clear_bit(pi->port_id, &adapter->open_device_map); |
1106 | spin_unlock(&adapter->work_lock); | 1113 | spin_unlock_irq(&adapter->work_lock); |
1107 | 1114 | ||
1108 | if (!(adapter->open_device_map & PORT_MASK)) | 1115 | if (!(adapter->open_device_map & PORT_MASK)) |
1109 | cancel_rearming_delayed_workqueue(cxgb3_wq, | 1116 | cancel_rearming_delayed_workqueue(cxgb3_wq, |
@@ -2356,10 +2363,10 @@ static void t3_adap_check_task(struct work_struct *work) | |||
2356 | check_t3b2_mac(adapter); | 2363 | check_t3b2_mac(adapter); |
2357 | 2364 | ||
2358 | /* Schedule the next check update if any port is active. */ | 2365 | /* Schedule the next check update if any port is active. */ |
2359 | spin_lock(&adapter->work_lock); | 2366 | spin_lock_irq(&adapter->work_lock); |
2360 | if (adapter->open_device_map & PORT_MASK) | 2367 | if (adapter->open_device_map & PORT_MASK) |
2361 | schedule_chk_task(adapter); | 2368 | schedule_chk_task(adapter); |
2362 | spin_unlock(&adapter->work_lock); | 2369 | spin_unlock_irq(&adapter->work_lock); |
2363 | } | 2370 | } |
2364 | 2371 | ||
2365 | /* | 2372 | /* |
@@ -2404,6 +2411,96 @@ void t3_os_ext_intr_handler(struct adapter *adapter) | |||
2404 | spin_unlock(&adapter->work_lock); | 2411 | spin_unlock(&adapter->work_lock); |
2405 | } | 2412 | } |
2406 | 2413 | ||
2414 | static int t3_adapter_error(struct adapter *adapter, int reset) | ||
2415 | { | ||
2416 | int i, ret = 0; | ||
2417 | |||
2418 | /* Stop all ports */ | ||
2419 | for_each_port(adapter, i) { | ||
2420 | struct net_device *netdev = adapter->port[i]; | ||
2421 | |||
2422 | if (netif_running(netdev)) | ||
2423 | cxgb_close(netdev); | ||
2424 | } | ||
2425 | |||
2426 | if (is_offload(adapter) && | ||
2427 | test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) | ||
2428 | offload_close(&adapter->tdev); | ||
2429 | |||
2430 | /* Stop SGE timers */ | ||
2431 | t3_stop_sge_timers(adapter); | ||
2432 | |||
2433 | adapter->flags &= ~FULL_INIT_DONE; | ||
2434 | |||
2435 | if (reset) | ||
2436 | ret = t3_reset_adapter(adapter); | ||
2437 | |||
2438 | pci_disable_device(adapter->pdev); | ||
2439 | |||
2440 | return ret; | ||
2441 | } | ||
2442 | |||
2443 | static int t3_reenable_adapter(struct adapter *adapter) | ||
2444 | { | ||
2445 | if (pci_enable_device(adapter->pdev)) { | ||
2446 | dev_err(&adapter->pdev->dev, | ||
2447 | "Cannot re-enable PCI device after reset.\n"); | ||
2448 | goto err; | ||
2449 | } | ||
2450 | pci_set_master(adapter->pdev); | ||
2451 | pci_restore_state(adapter->pdev); | ||
2452 | |||
2453 | /* Free sge resources */ | ||
2454 | t3_free_sge_resources(adapter); | ||
2455 | |||
2456 | if (t3_replay_prep_adapter(adapter)) | ||
2457 | goto err; | ||
2458 | |||
2459 | return 0; | ||
2460 | err: | ||
2461 | return -1; | ||
2462 | } | ||
2463 | |||
2464 | static void t3_resume_ports(struct adapter *adapter) | ||
2465 | { | ||
2466 | int i; | ||
2467 | |||
2468 | /* Restart the ports */ | ||
2469 | for_each_port(adapter, i) { | ||
2470 | struct net_device *netdev = adapter->port[i]; | ||
2471 | |||
2472 | if (netif_running(netdev)) { | ||
2473 | if (cxgb_open(netdev)) { | ||
2474 | dev_err(&adapter->pdev->dev, | ||
2475 | "can't bring device back up" | ||
2476 | " after reset\n"); | ||
2477 | continue; | ||
2478 | } | ||
2479 | } | ||
2480 | } | ||
2481 | } | ||
2482 | |||
2483 | /* | ||
2484 | * processes a fatal error. | ||
2485 | * Bring the ports down, reset the chip, bring the ports back up. | ||
2486 | */ | ||
2487 | static void fatal_error_task(struct work_struct *work) | ||
2488 | { | ||
2489 | struct adapter *adapter = container_of(work, struct adapter, | ||
2490 | fatal_error_handler_task); | ||
2491 | int err = 0; | ||
2492 | |||
2493 | rtnl_lock(); | ||
2494 | err = t3_adapter_error(adapter, 1); | ||
2495 | if (!err) | ||
2496 | err = t3_reenable_adapter(adapter); | ||
2497 | if (!err) | ||
2498 | t3_resume_ports(adapter); | ||
2499 | |||
2500 | CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded"); | ||
2501 | rtnl_unlock(); | ||
2502 | } | ||
2503 | |||
2407 | void t3_fatal_err(struct adapter *adapter) | 2504 | void t3_fatal_err(struct adapter *adapter) |
2408 | { | 2505 | { |
2409 | unsigned int fw_status[4]; | 2506 | unsigned int fw_status[4]; |
@@ -2414,7 +2511,11 @@ void t3_fatal_err(struct adapter *adapter) | |||
2414 | t3_write_reg(adapter, A_XGM_RX_CTRL, 0); | 2511 | t3_write_reg(adapter, A_XGM_RX_CTRL, 0); |
2415 | t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0); | 2512 | t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0); |
2416 | t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0); | 2513 | t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0); |
2514 | |||
2515 | spin_lock(&adapter->work_lock); | ||
2417 | t3_intr_disable(adapter); | 2516 | t3_intr_disable(adapter); |
2517 | queue_work(cxgb3_wq, &adapter->fatal_error_handler_task); | ||
2518 | spin_unlock(&adapter->work_lock); | ||
2418 | } | 2519 | } |
2419 | CH_ALERT(adapter, "encountered fatal error, operation suspended\n"); | 2520 | CH_ALERT(adapter, "encountered fatal error, operation suspended\n"); |
2420 | if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status)) | 2521 | if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status)) |
@@ -2436,26 +2537,9 @@ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev, | |||
2436 | pci_channel_state_t state) | 2537 | pci_channel_state_t state) |
2437 | { | 2538 | { |
2438 | struct adapter *adapter = pci_get_drvdata(pdev); | 2539 | struct adapter *adapter = pci_get_drvdata(pdev); |
2439 | int i; | 2540 | int ret; |
2440 | |||
2441 | /* Stop all ports */ | ||
2442 | for_each_port(adapter, i) { | ||
2443 | struct net_device *netdev = adapter->port[i]; | ||
2444 | |||
2445 | if (netif_running(netdev)) | ||
2446 | cxgb_close(netdev); | ||
2447 | } | ||
2448 | |||
2449 | if (is_offload(adapter) && | ||
2450 | test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) | ||
2451 | offload_close(&adapter->tdev); | ||
2452 | |||
2453 | /* Stop SGE timers */ | ||
2454 | t3_stop_sge_timers(adapter); | ||
2455 | |||
2456 | adapter->flags &= ~FULL_INIT_DONE; | ||
2457 | 2541 | ||
2458 | pci_disable_device(pdev); | 2542 | ret = t3_adapter_error(adapter, 0); |
2459 | 2543 | ||
2460 | /* Request a slot reset. */ | 2544 | /* Request a slot reset. */ |
2461 | return PCI_ERS_RESULT_NEED_RESET; | 2545 | return PCI_ERS_RESULT_NEED_RESET; |
@@ -2471,22 +2555,9 @@ static pci_ers_result_t t3_io_slot_reset(struct pci_dev *pdev) | |||
2471 | { | 2555 | { |
2472 | struct adapter *adapter = pci_get_drvdata(pdev); | 2556 | struct adapter *adapter = pci_get_drvdata(pdev); |
2473 | 2557 | ||
2474 | if (pci_enable_device(pdev)) { | 2558 | if (!t3_reenable_adapter(adapter)) |
2475 | dev_err(&pdev->dev, | 2559 | return PCI_ERS_RESULT_RECOVERED; |
2476 | "Cannot re-enable PCI device after reset.\n"); | ||
2477 | goto err; | ||
2478 | } | ||
2479 | pci_set_master(pdev); | ||
2480 | pci_restore_state(pdev); | ||
2481 | |||
2482 | /* Free sge resources */ | ||
2483 | t3_free_sge_resources(adapter); | ||
2484 | |||
2485 | if (t3_replay_prep_adapter(adapter)) | ||
2486 | goto err; | ||
2487 | 2560 | ||
2488 | return PCI_ERS_RESULT_RECOVERED; | ||
2489 | err: | ||
2490 | return PCI_ERS_RESULT_DISCONNECT; | 2561 | return PCI_ERS_RESULT_DISCONNECT; |
2491 | } | 2562 | } |
2492 | 2563 | ||
@@ -2500,22 +2571,8 @@ err: | |||
2500 | static void t3_io_resume(struct pci_dev *pdev) | 2571 | static void t3_io_resume(struct pci_dev *pdev) |
2501 | { | 2572 | { |
2502 | struct adapter *adapter = pci_get_drvdata(pdev); | 2573 | struct adapter *adapter = pci_get_drvdata(pdev); |
2503 | int i; | ||
2504 | |||
2505 | /* Restart the ports */ | ||
2506 | for_each_port(adapter, i) { | ||
2507 | struct net_device *netdev = adapter->port[i]; | ||
2508 | 2574 | ||
2509 | if (netif_running(netdev)) { | 2575 | t3_resume_ports(adapter); |
2510 | if (cxgb_open(netdev)) { | ||
2511 | dev_err(&pdev->dev, | ||
2512 | "can't bring device back up" | ||
2513 | " after reset\n"); | ||
2514 | continue; | ||
2515 | } | ||
2516 | netif_device_attach(netdev); | ||
2517 | } | ||
2518 | } | ||
2519 | } | 2576 | } |
2520 | 2577 | ||
2521 | static struct pci_error_handlers t3_err_handler = { | 2578 | static struct pci_error_handlers t3_err_handler = { |
@@ -2664,6 +2721,7 @@ static int __devinit init_one(struct pci_dev *pdev, | |||
2664 | 2721 | ||
2665 | INIT_LIST_HEAD(&adapter->adapter_list); | 2722 | INIT_LIST_HEAD(&adapter->adapter_list); |
2666 | INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); | 2723 | INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); |
2724 | INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task); | ||
2667 | INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); | 2725 | INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); |
2668 | 2726 | ||
2669 | for (i = 0; i < ai->nports; ++i) { | 2727 | for (i = 0; i < ai->nports; ++i) { |
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 7346a8e26da1..87919419b707 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c | |||
@@ -351,7 +351,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) | |||
351 | pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), | 351 | pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), |
352 | q->buf_size, PCI_DMA_FROMDEVICE); | 352 | q->buf_size, PCI_DMA_FROMDEVICE); |
353 | if (q->use_pages) { | 353 | if (q->use_pages) { |
354 | put_page(d->pg_chunk.page); | 354 | if (d->pg_chunk.page) |
355 | put_page(d->pg_chunk.page); | ||
355 | d->pg_chunk.page = NULL; | 356 | d->pg_chunk.page = NULL; |
356 | } else { | 357 | } else { |
357 | kfree_skb(d->skb); | 358 | kfree_skb(d->skb); |
@@ -583,7 +584,7 @@ static void t3_reset_qset(struct sge_qset *q) | |||
583 | memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET); | 584 | memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET); |
584 | memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); | 585 | memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); |
585 | q->txq_stopped = 0; | 586 | q->txq_stopped = 0; |
586 | memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer)); | 587 | q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ |
587 | kfree(q->lro_frag_tbl); | 588 | kfree(q->lro_frag_tbl); |
588 | q->lro_nfrags = q->lro_frag_len = 0; | 589 | q->lro_nfrags = q->lro_frag_len = 0; |
589 | } | 590 | } |
@@ -2840,9 +2841,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, | |||
2840 | struct net_lro_mgr *lro_mgr = &q->lro_mgr; | 2841 | struct net_lro_mgr *lro_mgr = &q->lro_mgr; |
2841 | 2842 | ||
2842 | init_qset_cntxt(q, id); | 2843 | init_qset_cntxt(q, id); |
2843 | init_timer(&q->tx_reclaim_timer); | 2844 | setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q); |
2844 | q->tx_reclaim_timer.data = (unsigned long)q; | ||
2845 | q->tx_reclaim_timer.function = sge_timer_cb; | ||
2846 | 2845 | ||
2847 | q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, | 2846 | q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, |
2848 | sizeof(struct rx_desc), | 2847 | sizeof(struct rx_desc), |
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c index 04c0e90119af..33470c79ac1c 100644 --- a/drivers/net/cxgb3/t3_hw.c +++ b/drivers/net/cxgb3/t3_hw.c | |||
@@ -1221,7 +1221,7 @@ struct intr_info { | |||
1221 | unsigned int mask; /* bits to check in interrupt status */ | 1221 | unsigned int mask; /* bits to check in interrupt status */ |
1222 | const char *msg; /* message to print or NULL */ | 1222 | const char *msg; /* message to print or NULL */ |
1223 | short stat_idx; /* stat counter to increment or -1 */ | 1223 | short stat_idx; /* stat counter to increment or -1 */ |
1224 | unsigned short fatal:1; /* whether the condition reported is fatal */ | 1224 | unsigned short fatal; /* whether the condition reported is fatal */ |
1225 | }; | 1225 | }; |
1226 | 1226 | ||
1227 | /** | 1227 | /** |
@@ -3488,7 +3488,7 @@ void early_hw_init(struct adapter *adapter, const struct adapter_info *ai) | |||
3488 | * Older PCIe cards lose their config space during reset, PCI-X | 3488 | * Older PCIe cards lose their config space during reset, PCI-X |
3489 | * ones don't. | 3489 | * ones don't. |
3490 | */ | 3490 | */ |
3491 | static int t3_reset_adapter(struct adapter *adapter) | 3491 | int t3_reset_adapter(struct adapter *adapter) |
3492 | { | 3492 | { |
3493 | int i, save_and_restore_pcie = | 3493 | int i, save_and_restore_pcie = |
3494 | adapter->params.rev < T3_REV_B2 && is_pcie(adapter); | 3494 | adapter->params.rev < T3_REV_B2 && is_pcie(adapter); |