diff options
author | Chip Coldwell <coldwell@redhat.com> | 2009-02-16 07:11:56 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-02-18 04:32:01 -0500 |
commit | 82eb03cfd862a65363fa2826de0dbd5474cfe5e2 (patch) | |
tree | e92ab2b93e73d01395cbf88648a8e7ebc841a17b | |
parent | c8cbec6bdf6329279fd14696020f6b59d1d3124d (diff) |
cciss: PCI power management reset for kexec
The kexec kernel resets the CCISS hardware in three steps:
1. Use PCI power management states to reset the controller in the
kexec kernel.
2. Clear the MSI/MSI-X bits in PCI configuration space so that MSI
initialization in the kexec kernel doesn't fail.
3. Use the CCISS "No-op" message to determine when the controller
firmware has recovered from the PCI PM reset.
[akpm@linux-foundation.org: cleanups]
Signed-off-by: Mike Miller <mike.miller@hp.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | drivers/block/cciss.c | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 01e69383d9c..d2cb67b6117 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -3390,6 +3390,203 @@ static void free_hba(int i) | |||
3390 | kfree(p); | 3390 | kfree(p); |
3391 | } | 3391 | } |
3392 | 3392 | ||
3393 | /* Send a message CDB to the firmware. */ | ||
3394 | static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type) | ||
3395 | { | ||
3396 | typedef struct { | ||
3397 | CommandListHeader_struct CommandHeader; | ||
3398 | RequestBlock_struct Request; | ||
3399 | ErrDescriptor_struct ErrorDescriptor; | ||
3400 | } Command; | ||
3401 | static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct); | ||
3402 | Command *cmd; | ||
3403 | dma_addr_t paddr64; | ||
3404 | uint32_t paddr32, tag; | ||
3405 | void __iomem *vaddr; | ||
3406 | int i, err; | ||
3407 | |||
3408 | vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); | ||
3409 | if (vaddr == NULL) | ||
3410 | return -ENOMEM; | ||
3411 | |||
3412 | /* The Inbound Post Queue only accepts 32-bit physical addresses for the | ||
3413 | CCISS commands, so they must be allocated from the lower 4GiB of | ||
3414 | memory. */ | ||
3415 | err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); | ||
3416 | if (err) { | ||
3417 | iounmap(vaddr); | ||
3418 | return -ENOMEM; | ||
3419 | } | ||
3420 | |||
3421 | cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64); | ||
3422 | if (cmd == NULL) { | ||
3423 | iounmap(vaddr); | ||
3424 | return -ENOMEM; | ||
3425 | } | ||
3426 | |||
3427 | /* This must fit, because of the 32-bit consistent DMA mask. Also, | ||
3428 | although there's no guarantee, we assume that the address is at | ||
3429 | least 4-byte aligned (most likely, it's page-aligned). */ | ||
3430 | paddr32 = paddr64; | ||
3431 | |||
3432 | cmd->CommandHeader.ReplyQueue = 0; | ||
3433 | cmd->CommandHeader.SGList = 0; | ||
3434 | cmd->CommandHeader.SGTotal = 0; | ||
3435 | cmd->CommandHeader.Tag.lower = paddr32; | ||
3436 | cmd->CommandHeader.Tag.upper = 0; | ||
3437 | memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8); | ||
3438 | |||
3439 | cmd->Request.CDBLen = 16; | ||
3440 | cmd->Request.Type.Type = TYPE_MSG; | ||
3441 | cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE; | ||
3442 | cmd->Request.Type.Direction = XFER_NONE; | ||
3443 | cmd->Request.Timeout = 0; /* Don't time out */ | ||
3444 | cmd->Request.CDB[0] = opcode; | ||
3445 | cmd->Request.CDB[1] = type; | ||
3446 | memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */ | ||
3447 | |||
3448 | cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command); | ||
3449 | cmd->ErrorDescriptor.Addr.upper = 0; | ||
3450 | cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct); | ||
3451 | |||
3452 | writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET); | ||
3453 | |||
3454 | for (i = 0; i < 10; i++) { | ||
3455 | tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); | ||
3456 | if ((tag & ~3) == paddr32) | ||
3457 | break; | ||
3458 | schedule_timeout_uninterruptible(HZ); | ||
3459 | } | ||
3460 | |||
3461 | iounmap(vaddr); | ||
3462 | |||
3463 | /* we leak the DMA buffer here ... no choice since the controller could | ||
3464 | still complete the command. */ | ||
3465 | if (i == 10) { | ||
3466 | printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n", | ||
3467 | opcode, type); | ||
3468 | return -ETIMEDOUT; | ||
3469 | } | ||
3470 | |||
3471 | pci_free_consistent(pdev, cmd_sz, cmd, paddr64); | ||
3472 | |||
3473 | if (tag & 2) { | ||
3474 | printk(KERN_ERR "cciss: controller message %02x:%02x failed\n", | ||
3475 | opcode, type); | ||
3476 | return -EIO; | ||
3477 | } | ||
3478 | |||
3479 | printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n", | ||
3480 | opcode, type); | ||
3481 | return 0; | ||
3482 | } | ||
3483 | |||
3484 | #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) | ||
3485 | #define cciss_noop(p) cciss_message(p, 3, 0) | ||
3486 | |||
3487 | static __devinit int cciss_reset_msi(struct pci_dev *pdev) | ||
3488 | { | ||
3489 | /* the #defines are stolen from drivers/pci/msi.h. */ | ||
3490 | #define msi_control_reg(base) (base + PCI_MSI_FLAGS) | ||
3491 | #define PCI_MSIX_FLAGS_ENABLE (1 << 15) | ||
3492 | |||
3493 | int pos; | ||
3494 | u16 control = 0; | ||
3495 | |||
3496 | pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); | ||
3497 | if (pos) { | ||
3498 | pci_read_config_word(pdev, msi_control_reg(pos), &control); | ||
3499 | if (control & PCI_MSI_FLAGS_ENABLE) { | ||
3500 | printk(KERN_INFO "cciss: resetting MSI\n"); | ||
3501 | pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE); | ||
3502 | } | ||
3503 | } | ||
3504 | |||
3505 | pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); | ||
3506 | if (pos) { | ||
3507 | pci_read_config_word(pdev, msi_control_reg(pos), &control); | ||
3508 | if (control & PCI_MSIX_FLAGS_ENABLE) { | ||
3509 | printk(KERN_INFO "cciss: resetting MSI-X\n"); | ||
3510 | pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE); | ||
3511 | } | ||
3512 | } | ||
3513 | |||
3514 | return 0; | ||
3515 | } | ||
3516 | |||
3517 | /* This does a hard reset of the controller using PCI power management | ||
3518 | * states. */ | ||
3519 | static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev) | ||
3520 | { | ||
3521 | u16 pmcsr, saved_config_space[32]; | ||
3522 | int i, pos; | ||
3523 | |||
3524 | printk(KERN_INFO "cciss: using PCI PM to reset controller\n"); | ||
3525 | |||
3526 | /* This is very nearly the same thing as | ||
3527 | |||
3528 | pci_save_state(pci_dev); | ||
3529 | pci_set_power_state(pci_dev, PCI_D3hot); | ||
3530 | pci_set_power_state(pci_dev, PCI_D0); | ||
3531 | pci_restore_state(pci_dev); | ||
3532 | |||
3533 | but we can't use these nice canned kernel routines on | ||
3534 | kexec, because they also check the MSI/MSI-X state in PCI | ||
3535 | configuration space and do the wrong thing when it is | ||
3536 | set/cleared. Also, the pci_save/restore_state functions | ||
3537 | violate the ordering requirements for restoring the | ||
3538 | configuration space from the CCISS document (see the | ||
3539 | comment below). So we roll our own .... */ | ||
3540 | |||
3541 | for (i = 0; i < 32; i++) | ||
3542 | pci_read_config_word(pdev, 2*i, &saved_config_space[i]); | ||
3543 | |||
3544 | pos = pci_find_capability(pdev, PCI_CAP_ID_PM); | ||
3545 | if (pos == 0) { | ||
3546 | printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n"); | ||
3547 | return -ENODEV; | ||
3548 | } | ||
3549 | |||
3550 | /* Quoting from the Open CISS Specification: "The Power | ||
3551 | * Management Control/Status Register (CSR) controls the power | ||
3552 | * state of the device. The normal operating state is D0, | ||
3553 | * CSR=00h. The software off state is D3, CSR=03h. To reset | ||
3554 | * the controller, place the interface device in D3 then to | ||
3555 | * D0, this causes a secondary PCI reset which will reset the | ||
3556 | * controller." */ | ||
3557 | |||
3558 | /* enter the D3hot power management state */ | ||
3559 | pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr); | ||
3560 | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; | ||
3561 | pmcsr |= PCI_D3hot; | ||
3562 | pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); | ||
3563 | |||
3564 | schedule_timeout_uninterruptible(HZ >> 1); | ||
3565 | |||
3566 | /* enter the D0 power management state */ | ||
3567 | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; | ||
3568 | pmcsr |= PCI_D0; | ||
3569 | pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); | ||
3570 | |||
3571 | schedule_timeout_uninterruptible(HZ >> 1); | ||
3572 | |||
3573 | /* Restore the PCI configuration space. The Open CISS | ||
3574 | * Specification says, "Restore the PCI Configuration | ||
3575 | * Registers, offsets 00h through 60h. It is important to | ||
3576 | * restore the command register, 16-bits at offset 04h, | ||
3577 | * last. Do not restore the configuration status register, | ||
3578 | * 16-bits at offset 06h." Note that the offset is 2*i. */ | ||
3579 | for (i = 0; i < 32; i++) { | ||
3580 | if (i == 2 || i == 3) | ||
3581 | continue; | ||
3582 | pci_write_config_word(pdev, 2*i, saved_config_space[i]); | ||
3583 | } | ||
3584 | wmb(); | ||
3585 | pci_write_config_word(pdev, 4, saved_config_space[2]); | ||
3586 | |||
3587 | return 0; | ||
3588 | } | ||
3589 | |||
3393 | /* | 3590 | /* |
3394 | * This is it. Find all the controllers and register them. I really hate | 3591 | * This is it. Find all the controllers and register them. I really hate |
3395 | * stealing all these major device numbers. | 3592 | * stealing all these major device numbers. |
@@ -3404,6 +3601,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
3404 | int dac, return_code; | 3601 | int dac, return_code; |
3405 | InquiryData_struct *inq_buff = NULL; | 3602 | InquiryData_struct *inq_buff = NULL; |
3406 | 3603 | ||
3604 | if (reset_devices) { | ||
3605 | /* Reset the controller with a PCI power-cycle */ | ||
3606 | if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev)) | ||
3607 | return -ENODEV; | ||
3608 | |||
3609 | /* Some devices (notably the HP Smart Array 5i Controller) | ||
3610 | need a little pause here */ | ||
3611 | schedule_timeout_uninterruptible(30*HZ); | ||
3612 | |||
3613 | /* Now try to get the controller to respond to a no-op */ | ||
3614 | for (i=0; i<12; i++) { | ||
3615 | if (cciss_noop(pdev) == 0) | ||
3616 | break; | ||
3617 | else | ||
3618 | printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); | ||
3619 | } | ||
3620 | } | ||
3621 | |||
3407 | i = alloc_cciss_hba(); | 3622 | i = alloc_cciss_hba(); |
3408 | if (i < 0) | 3623 | if (i < 0) |
3409 | return -1; | 3624 | return -1; |