aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/megaraid
diff options
context:
space:
mode:
authorSumit.Saxena@avagotech.com <Sumit.Saxena@avagotech.com>2014-09-12 09:27:28 -0400
committerChristoph Hellwig <hch@lst.de>2014-09-16 12:14:23 -0400
commitfc62b3fc9021526d096d940ec62e74af72eb1e10 (patch)
tree4836d52f490e67063915ff80df65c68c206d70a7 /drivers/scsi/megaraid
parentdb4fc864ae2a27153f7f0c2af169ad4447cb82bc (diff)
megaraid_sas : Firmware crash dump feature support
Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/scsi/megaraid')
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.h58
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c294
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c172
3 files changed, 519 insertions, 5 deletions
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index bc7adcf537f2..e0f03e2f6ddf 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -105,6 +105,9 @@
105#define MFI_STATE_READY 0xB0000000 105#define MFI_STATE_READY 0xB0000000
106#define MFI_STATE_OPERATIONAL 0xC0000000 106#define MFI_STATE_OPERATIONAL 0xC0000000
107#define MFI_STATE_FAULT 0xF0000000 107#define MFI_STATE_FAULT 0xF0000000
108#define MFI_STATE_FORCE_OCR 0x00000080
109#define MFI_STATE_DMADONE 0x00000008
110#define MFI_STATE_CRASH_DUMP_DONE 0x00000004
108#define MFI_RESET_REQUIRED 0x00000001 111#define MFI_RESET_REQUIRED 0x00000001
109#define MFI_RESET_ADAPTER 0x00000002 112#define MFI_RESET_ADAPTER 0x00000002
110#define MEGAMFI_FRAME_SIZE 64 113#define MEGAMFI_FRAME_SIZE 64
@@ -191,6 +194,9 @@
191#define MR_DCMD_CLUSTER_RESET_LD 0x08010200 194#define MR_DCMD_CLUSTER_RESET_LD 0x08010200
192#define MR_DCMD_PD_LIST_QUERY 0x02010100 195#define MR_DCMD_PD_LIST_QUERY 0x02010100
193 196
197#define MR_DCMD_CTRL_SET_CRASH_DUMP_PARAMS 0x01190100
198#define MR_DRIVER_SET_APP_CRASHDUMP_MODE (0xF0010000 | 0x0600)
199
194/* 200/*
195 * Global functions 201 * Global functions
196 */ 202 */
@@ -264,6 +270,25 @@ enum MFI_STAT {
264}; 270};
265 271
266/* 272/*
273 * Crash dump related defines
274 */
275#define MAX_CRASH_DUMP_SIZE 512
276#define CRASH_DMA_BUF_SIZE (1024 * 1024)
277
278enum MR_FW_CRASH_DUMP_STATE {
279 UNAVAILABLE = 0,
280 AVAILABLE = 1,
281 COPYING = 2,
282 COPIED = 3,
283 COPY_ERROR = 4,
284};
285
286enum _MR_CRASH_BUF_STATUS {
287 MR_CRASH_BUF_TURN_OFF = 0,
288 MR_CRASH_BUF_TURN_ON = 1,
289};
290
291/*
267 * Number of mailbox bytes in DCMD message frame 292 * Number of mailbox bytes in DCMD message frame
268 */ 293 */
269#define MFI_MBOX_SIZE 12 294#define MFI_MBOX_SIZE 12
@@ -933,7 +958,19 @@ struct megasas_ctrl_info {
933 u8 reserved; /*0x7E7*/ 958 u8 reserved; /*0x7E7*/
934 } iov; 959 } iov;
935 960
936 u8 pad[0x800-0x7E8]; /*0x7E8 pad to 2k */ 961 struct {
962#if defined(__BIG_ENDIAN_BITFIELD)
963 u32 reserved:25;
964 u32 supportCrashDump:1;
965 u32 reserved1:6;
966#else
967 u32 reserved1:6;
968 u32 supportCrashDump:1;
969 u32 reserved:25;
970#endif
971 } adapterOperations3;
972
973 u8 pad[0x800-0x7EC];
937} __packed; 974} __packed;
938 975
939/* 976/*
@@ -1559,6 +1596,20 @@ struct megasas_instance {
1559 u32 *reply_queue; 1596 u32 *reply_queue;
1560 dma_addr_t reply_queue_h; 1597 dma_addr_t reply_queue_h;
1561 1598
1599 u32 *crash_dump_buf;
1600 dma_addr_t crash_dump_h;
1601 void *crash_buf[MAX_CRASH_DUMP_SIZE];
1602 u32 crash_buf_pages;
1603 unsigned int fw_crash_buffer_size;
1604 unsigned int fw_crash_state;
1605 unsigned int fw_crash_buffer_offset;
1606 u32 drv_buf_index;
1607 u32 drv_buf_alloc;
1608 u32 crash_dump_fw_support;
1609 u32 crash_dump_drv_support;
1610 u32 crash_dump_app_support;
1611 spinlock_t crashdump_lock;
1612
1562 struct megasas_register_set __iomem *reg_set; 1613 struct megasas_register_set __iomem *reg_set;
1563 u32 *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY]; 1614 u32 *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY];
1564 struct megasas_pd_list pd_list[MEGASAS_MAX_PD]; 1615 struct megasas_pd_list pd_list[MEGASAS_MAX_PD];
@@ -1606,6 +1657,7 @@ struct megasas_instance {
1606 struct megasas_instance_template *instancet; 1657 struct megasas_instance_template *instancet;
1607 struct tasklet_struct isr_tasklet; 1658 struct tasklet_struct isr_tasklet;
1608 struct work_struct work_init; 1659 struct work_struct work_init;
1660 struct work_struct crash_init;
1609 1661
1610 u8 flag; 1662 u8 flag;
1611 u8 unload; 1663 u8 unload;
@@ -1830,4 +1882,8 @@ u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map);
1830u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map); 1882u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map);
1831u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map); 1883u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map);
1832 1884
1885int megasas_set_crash_dump_params(struct megasas_instance *instance,
1886 u8 crash_buf_state);
1887void megasas_free_host_crash_buffer(struct megasas_instance *instance);
1888void megasas_fusion_crash_dump_wq(struct work_struct *work);
1833#endif /*LSI_MEGARAID_SAS_H */ 1889#endif /*LSI_MEGARAID_SAS_H */
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index a894f13c246f..ff96e3c58fbf 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2560,6 +2560,152 @@ static int megasas_change_queue_depth(struct scsi_device *sdev,
2560 return queue_depth; 2560 return queue_depth;
2561} 2561}
2562 2562
2563static ssize_t
2564megasas_fw_crash_buffer_store(struct device *cdev,
2565 struct device_attribute *attr, const char *buf, size_t count)
2566{
2567 struct Scsi_Host *shost = class_to_shost(cdev);
2568 struct megasas_instance *instance =
2569 (struct megasas_instance *) shost->hostdata;
2570 int val = 0;
2571 unsigned long flags;
2572
2573 if (kstrtoint(buf, 0, &val) != 0)
2574 return -EINVAL;
2575
2576 spin_lock_irqsave(&instance->crashdump_lock, flags);
2577 instance->fw_crash_buffer_offset = val;
2578 spin_unlock_irqrestore(&instance->crashdump_lock, flags);
2579 return strlen(buf);
2580}
2581
2582static ssize_t
2583megasas_fw_crash_buffer_show(struct device *cdev,
2584 struct device_attribute *attr, char *buf)
2585{
2586 struct Scsi_Host *shost = class_to_shost(cdev);
2587 struct megasas_instance *instance =
2588 (struct megasas_instance *) shost->hostdata;
2589 u32 size;
2590 unsigned long buff_addr;
2591 unsigned long dmachunk = CRASH_DMA_BUF_SIZE;
2592 unsigned long src_addr;
2593 unsigned long flags;
2594 u32 buff_offset;
2595
2596 spin_lock_irqsave(&instance->crashdump_lock, flags);
2597 buff_offset = instance->fw_crash_buffer_offset;
2598 if (!instance->crash_dump_buf &&
2599 !((instance->fw_crash_state == AVAILABLE) ||
2600 (instance->fw_crash_state == COPYING))) {
2601 dev_err(&instance->pdev->dev,
2602 "Firmware crash dump is not available\n");
2603 spin_unlock_irqrestore(&instance->crashdump_lock, flags);
2604 return -EINVAL;
2605 }
2606
2607 buff_addr = (unsigned long) buf;
2608
2609 if (buff_offset >
2610 (instance->fw_crash_buffer_size * dmachunk)) {
2611 dev_err(&instance->pdev->dev,
2612 "Firmware crash dump offset is out of range\n");
2613 spin_unlock_irqrestore(&instance->crashdump_lock, flags);
2614 return 0;
2615 }
2616
2617 size = (instance->fw_crash_buffer_size * dmachunk) - buff_offset;
2618 size = (size >= PAGE_SIZE) ? (PAGE_SIZE - 1) : size;
2619
2620 src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] +
2621 (buff_offset % dmachunk);
2622 memcpy(buf, (void *)src_addr, size);
2623 spin_unlock_irqrestore(&instance->crashdump_lock, flags);
2624
2625 return size;
2626}
2627
2628static ssize_t
2629megasas_fw_crash_buffer_size_show(struct device *cdev,
2630 struct device_attribute *attr, char *buf)
2631{
2632 struct Scsi_Host *shost = class_to_shost(cdev);
2633 struct megasas_instance *instance =
2634 (struct megasas_instance *) shost->hostdata;
2635
2636 return snprintf(buf, PAGE_SIZE, "%ld\n", (unsigned long)
2637 ((instance->fw_crash_buffer_size) * 1024 * 1024)/PAGE_SIZE);
2638}
2639
2640static ssize_t
2641megasas_fw_crash_state_store(struct device *cdev,
2642 struct device_attribute *attr, const char *buf, size_t count)
2643{
2644 struct Scsi_Host *shost = class_to_shost(cdev);
2645 struct megasas_instance *instance =
2646 (struct megasas_instance *) shost->hostdata;
2647 int val = 0;
2648 unsigned long flags;
2649
2650 if (kstrtoint(buf, 0, &val) != 0)
2651 return -EINVAL;
2652
2653 if ((val <= AVAILABLE || val > COPY_ERROR)) {
2654 dev_err(&instance->pdev->dev, "application updates invalid "
2655 "firmware crash state\n");
2656 return -EINVAL;
2657 }
2658
2659 instance->fw_crash_state = val;
2660
2661 if ((val == COPIED) || (val == COPY_ERROR)) {
2662 spin_lock_irqsave(&instance->crashdump_lock, flags);
2663 megasas_free_host_crash_buffer(instance);
2664 spin_unlock_irqrestore(&instance->crashdump_lock, flags);
2665 if (val == COPY_ERROR)
2666 dev_info(&instance->pdev->dev, "application failed to "
2667 "copy Firmware crash dump\n");
2668 else
2669 dev_info(&instance->pdev->dev, "Firmware crash dump "
2670 "copied successfully\n");
2671 }
2672 return strlen(buf);
2673}
2674
2675static ssize_t
2676megasas_fw_crash_state_show(struct device *cdev,
2677 struct device_attribute *attr, char *buf)
2678{
2679 struct Scsi_Host *shost = class_to_shost(cdev);
2680 struct megasas_instance *instance =
2681 (struct megasas_instance *) shost->hostdata;
2682 return snprintf(buf, PAGE_SIZE, "%d\n", instance->fw_crash_state);
2683}
2684
2685static ssize_t
2686megasas_page_size_show(struct device *cdev,
2687 struct device_attribute *attr, char *buf)
2688{
2689 return snprintf(buf, PAGE_SIZE, "%ld\n", (unsigned long)PAGE_SIZE - 1);
2690}
2691
2692static DEVICE_ATTR(fw_crash_buffer, S_IRUGO | S_IWUSR,
2693 megasas_fw_crash_buffer_show, megasas_fw_crash_buffer_store);
2694static DEVICE_ATTR(fw_crash_buffer_size, S_IRUGO,
2695 megasas_fw_crash_buffer_size_show, NULL);
2696static DEVICE_ATTR(fw_crash_state, S_IRUGO | S_IWUSR,
2697 megasas_fw_crash_state_show, megasas_fw_crash_state_store);
2698static DEVICE_ATTR(page_size, S_IRUGO,
2699 megasas_page_size_show, NULL);
2700
2701struct device_attribute *megaraid_host_attrs[] = {
2702 &dev_attr_fw_crash_buffer_size,
2703 &dev_attr_fw_crash_buffer,
2704 &dev_attr_fw_crash_state,
2705 &dev_attr_page_size,
2706 NULL,
2707};
2708
2563/* 2709/*
2564 * Scsi host template for megaraid_sas driver 2710 * Scsi host template for megaraid_sas driver
2565 */ 2711 */
@@ -2575,6 +2721,7 @@ static struct scsi_host_template megasas_template = {
2575 .eh_bus_reset_handler = megasas_reset_bus_host, 2721 .eh_bus_reset_handler = megasas_reset_bus_host,
2576 .eh_host_reset_handler = megasas_reset_bus_host, 2722 .eh_host_reset_handler = megasas_reset_bus_host,
2577 .eh_timed_out = megasas_reset_timer, 2723 .eh_timed_out = megasas_reset_timer,
2724 .shost_attrs = megaraid_host_attrs,
2578 .bios_param = megasas_bios_param, 2725 .bios_param = megasas_bios_param,
2579 .use_clustering = ENABLE_CLUSTERING, 2726 .use_clustering = ENABLE_CLUSTERING,
2580 .change_queue_depth = megasas_change_queue_depth, 2727 .change_queue_depth = megasas_change_queue_depth,
@@ -3887,6 +4034,59 @@ megasas_get_ctrl_info(struct megasas_instance *instance,
3887 return ret; 4034 return ret;
3888} 4035}
3889 4036
4037/*
4038 * megasas_set_crash_dump_params - Sends address of crash dump DMA buffer
4039 * to firmware
4040 *
4041 * @instance: Adapter soft state
4042 * @crash_buf_state - tell FW to turn ON/OFF crash dump feature
4043 MR_CRASH_BUF_TURN_OFF = 0
4044 MR_CRASH_BUF_TURN_ON = 1
4045 * @return 0 on success non-zero on failure.
4046 * Issues an internal command (DCMD) to set parameters for crash dump feature.
4047 * Driver will send address of crash dump DMA buffer and set mbox to tell FW
4048 * that driver supports crash dump feature. This DCMD will be sent only if
4049 * crash dump feature is supported by the FW.
4050 *
4051 */
4052int megasas_set_crash_dump_params(struct megasas_instance *instance,
4053 u8 crash_buf_state)
4054{
4055 int ret = 0;
4056 struct megasas_cmd *cmd;
4057 struct megasas_dcmd_frame *dcmd;
4058
4059 cmd = megasas_get_cmd(instance);
4060
4061 if (!cmd) {
4062 dev_err(&instance->pdev->dev, "Failed to get a free cmd\n");
4063 return -ENOMEM;
4064 }
4065
4066
4067 dcmd = &cmd->frame->dcmd;
4068
4069 memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
4070 dcmd->mbox.b[0] = crash_buf_state;
4071 dcmd->cmd = MFI_CMD_DCMD;
4072 dcmd->cmd_status = 0xFF;
4073 dcmd->sge_count = 1;
4074 dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE);
4075 dcmd->timeout = 0;
4076 dcmd->pad_0 = 0;
4077 dcmd->data_xfer_len = cpu_to_le32(CRASH_DMA_BUF_SIZE);
4078 dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_SET_CRASH_DUMP_PARAMS);
4079 dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->crash_dump_h);
4080 dcmd->sgl.sge32[0].length = cpu_to_le32(CRASH_DMA_BUF_SIZE);
4081
4082 if (!megasas_issue_polled(instance, cmd))
4083 ret = 0;
4084 else
4085 ret = -1;
4086 megasas_return_cmd(instance, cmd);
4087 return ret;
4088}
4089
3890/** 4090/**
3891 * megasas_issue_init_mfi - Initializes the FW 4091 * megasas_issue_init_mfi - Initializes the FW
3892 * @instance: Adapter soft state 4092 * @instance: Adapter soft state
@@ -4272,6 +4472,27 @@ static int megasas_init_fw(struct megasas_instance *instance)
4272 printk(KERN_WARNING "megaraid_sas: I am VF " 4472 printk(KERN_WARNING "megaraid_sas: I am VF "
4273 "requestorId %d\n", instance->requestorId); 4473 "requestorId %d\n", instance->requestorId);
4274 } 4474 }
4475
4476 le32_to_cpus((u32 *)&ctrl_info->adapterOperations3);
4477 instance->crash_dump_fw_support =
4478 ctrl_info->adapterOperations3.supportCrashDump;
4479 instance->crash_dump_drv_support =
4480 (instance->crash_dump_fw_support &&
4481 instance->crash_dump_buf);
4482 if (instance->crash_dump_drv_support) {
4483 dev_info(&instance->pdev->dev, "Firmware Crash dump "
4484 "feature is supported\n");
4485 megasas_set_crash_dump_params(instance,
4486 MR_CRASH_BUF_TURN_OFF);
4487
4488 } else {
4489 if (instance->crash_dump_buf)
4490 pci_free_consistent(instance->pdev,
4491 CRASH_DMA_BUF_SIZE,
4492 instance->crash_dump_buf,
4493 instance->crash_dump_h);
4494 instance->crash_dump_buf = NULL;
4495 }
4275 } 4496 }
4276 instance->max_sectors_per_req = instance->max_num_sge * 4497 instance->max_sectors_per_req = instance->max_num_sge *
4277 PAGE_SIZE / 512; 4498 PAGE_SIZE / 512;
@@ -4791,6 +5012,23 @@ static int megasas_probe_one(struct pci_dev *pdev,
4791 break; 5012 break;
4792 } 5013 }
4793 5014
5015 /* Crash dump feature related initialisation*/
5016 instance->drv_buf_index = 0;
5017 instance->drv_buf_alloc = 0;
5018 instance->crash_dump_fw_support = 0;
5019 instance->crash_dump_app_support = 0;
5020 instance->fw_crash_state = UNAVAILABLE;
5021 spin_lock_init(&instance->crashdump_lock);
5022 instance->crash_dump_buf = NULL;
5023
5024 if (!reset_devices)
5025 instance->crash_dump_buf = pci_alloc_consistent(pdev,
5026 CRASH_DMA_BUF_SIZE,
5027 &instance->crash_dump_h);
5028 if (!instance->crash_dump_buf)
5029 dev_err(&instance->pdev->dev, "Can't allocate Firmware "
5030 "crash dump DMA buffer\n");
5031
4794 megasas_poll_wait_aen = 0; 5032 megasas_poll_wait_aen = 0;
4795 instance->flag_ieee = 0; 5033 instance->flag_ieee = 0;
4796 instance->ev = NULL; 5034 instance->ev = NULL;
@@ -4852,9 +5090,10 @@ static int megasas_probe_one(struct pci_dev *pdev,
4852 if ((instance->pdev->device == PCI_DEVICE_ID_LSI_FUSION) || 5090 if ((instance->pdev->device == PCI_DEVICE_ID_LSI_FUSION) ||
4853 (instance->pdev->device == PCI_DEVICE_ID_LSI_PLASMA) || 5091 (instance->pdev->device == PCI_DEVICE_ID_LSI_PLASMA) ||
4854 (instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) || 5092 (instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) ||
4855 (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) 5093 (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) {
4856 INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); 5094 INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq);
4857 else 5095 INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq);
5096 } else
4858 INIT_WORK(&instance->work_init, process_fw_state_change_wq); 5097 INIT_WORK(&instance->work_init, process_fw_state_change_wq);
4859 5098
4860 /* 5099 /*
@@ -5342,6 +5581,8 @@ static void megasas_detach_one(struct pci_dev *pdev)
5342 if (instance->requestorId && !instance->skip_heartbeat_timer_del) 5581 if (instance->requestorId && !instance->skip_heartbeat_timer_del)
5343 del_timer_sync(&instance->sriov_heartbeat_timer); 5582 del_timer_sync(&instance->sriov_heartbeat_timer);
5344 5583
5584 if (instance->fw_crash_state != UNAVAILABLE)
5585 megasas_free_host_crash_buffer(instance);
5345 scsi_remove_host(instance->host); 5586 scsi_remove_host(instance->host);
5346 megasas_flush_cache(instance); 5587 megasas_flush_cache(instance);
5347 megasas_shutdown_controller(instance, MR_DCMD_CTRL_SHUTDOWN); 5588 megasas_shutdown_controller(instance, MR_DCMD_CTRL_SHUTDOWN);
@@ -5432,6 +5673,10 @@ static void megasas_detach_one(struct pci_dev *pdev)
5432 instance->hb_host_mem, 5673 instance->hb_host_mem,
5433 instance->hb_host_mem_h); 5674 instance->hb_host_mem_h);
5434 5675
5676 if (instance->crash_dump_buf)
5677 pci_free_consistent(pdev, CRASH_DMA_BUF_SIZE,
5678 instance->crash_dump_buf, instance->crash_dump_h);
5679
5435 scsi_host_put(host); 5680 scsi_host_put(host);
5436 5681
5437 pci_disable_device(pdev); 5682 pci_disable_device(pdev);
@@ -5523,6 +5768,45 @@ static unsigned int megasas_mgmt_poll(struct file *file, poll_table *wait)
5523 return mask; 5768 return mask;
5524} 5769}
5525 5770
5771/*
5772 * megasas_set_crash_dump_params_ioctl:
5773 * Send CRASH_DUMP_MODE DCMD to all controllers
5774 * @cmd: MFI command frame
5775 */
5776
5777static int megasas_set_crash_dump_params_ioctl(
5778 struct megasas_cmd *cmd)
5779{
5780 struct megasas_instance *local_instance;
5781 int i, error = 0;
5782 int crash_support;
5783
5784 crash_support = cmd->frame->dcmd.mbox.w[0];
5785
5786 for (i = 0; i < megasas_mgmt_info.max_index; i++) {
5787 local_instance = megasas_mgmt_info.instance[i];
5788 if (local_instance && local_instance->crash_dump_drv_support) {
5789 if ((local_instance->adprecovery ==
5790 MEGASAS_HBA_OPERATIONAL) &&
5791 !megasas_set_crash_dump_params(local_instance,
5792 crash_support)) {
5793 local_instance->crash_dump_app_support =
5794 crash_support;
5795 dev_info(&local_instance->pdev->dev,
5796 "Application firmware crash "
5797 "dump mode set success\n");
5798 error = 0;
5799 } else {
5800 dev_info(&local_instance->pdev->dev,
5801 "Application firmware crash "
5802 "dump mode set failed\n");
5803 error = -1;
5804 }
5805 }
5806 }
5807 return error;
5808}
5809
5526/** 5810/**
5527 * megasas_mgmt_fw_ioctl - Issues management ioctls to FW 5811 * megasas_mgmt_fw_ioctl - Issues management ioctls to FW
5528 * @instance: Adapter soft state 5812 * @instance: Adapter soft state
@@ -5569,6 +5853,12 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
5569 MFI_FRAME_SGL64 | 5853 MFI_FRAME_SGL64 |
5570 MFI_FRAME_SENSE64)); 5854 MFI_FRAME_SENSE64));
5571 5855
5856 if (cmd->frame->dcmd.opcode == MR_DRIVER_SET_APP_CRASHDUMP_MODE) {
5857 error = megasas_set_crash_dump_params_ioctl(cmd);
5858 megasas_return_cmd(instance, cmd);
5859 return error;
5860 }
5861
5572 /* 5862 /*
5573 * The management interface between applications and the fw uses 5863 * The management interface between applications and the fw uses
5574 * MFI frames. E.g, RAID configuration changes, LD property changes 5864 * MFI frames. E.g, RAID configuration changes, LD property changes
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 155b8b1a8f4b..913e9fa8fc15 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -91,6 +91,8 @@ void megasas_start_timer(struct megasas_instance *instance,
91extern struct megasas_mgmt_info megasas_mgmt_info; 91extern struct megasas_mgmt_info megasas_mgmt_info;
92extern int resetwaittime; 92extern int resetwaittime;
93 93
94
95
94/** 96/**
95 * megasas_enable_intr_fusion - Enables interrupts 97 * megasas_enable_intr_fusion - Enables interrupts
96 * @regs: MFI register set 98 * @regs: MFI register set
@@ -2055,7 +2057,7 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
2055{ 2057{
2056 struct megasas_irq_context *irq_context = devp; 2058 struct megasas_irq_context *irq_context = devp;
2057 struct megasas_instance *instance = irq_context->instance; 2059 struct megasas_instance *instance = irq_context->instance;
2058 u32 mfiStatus, fw_state; 2060 u32 mfiStatus, fw_state, dma_state;
2059 2061
2060 if (instance->mask_interrupts) 2062 if (instance->mask_interrupts)
2061 return IRQ_NONE; 2063 return IRQ_NONE;
@@ -2077,7 +2079,16 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
2077 /* If we didn't complete any commands, check for FW fault */ 2079 /* If we didn't complete any commands, check for FW fault */
2078 fw_state = instance->instancet->read_fw_status_reg( 2080 fw_state = instance->instancet->read_fw_status_reg(
2079 instance->reg_set) & MFI_STATE_MASK; 2081 instance->reg_set) & MFI_STATE_MASK;
2080 if (fw_state == MFI_STATE_FAULT) { 2082 dma_state = instance->instancet->read_fw_status_reg
2083 (instance->reg_set) & MFI_STATE_DMADONE;
2084 if (instance->crash_dump_drv_support &&
2085 instance->crash_dump_app_support) {
2086 /* Start collecting crash, if DMA bit is done */
2087 if ((fw_state == MFI_STATE_FAULT) && dma_state)
2088 schedule_work(&instance->crash_init);
2089 else if (fw_state == MFI_STATE_FAULT)
2090 schedule_work(&instance->work_init);
2091 } else if (fw_state == MFI_STATE_FAULT) {
2081 printk(KERN_WARNING "megaraid_sas: Iop2SysDoorbellInt" 2092 printk(KERN_WARNING "megaraid_sas: Iop2SysDoorbellInt"
2082 "for scsi%d\n", instance->host->host_no); 2093 "for scsi%d\n", instance->host->host_no);
2083 schedule_work(&instance->work_init); 2094 schedule_work(&instance->work_init);
@@ -2230,6 +2241,49 @@ megasas_read_fw_status_reg_fusion(struct megasas_register_set __iomem *regs)
2230} 2241}
2231 2242
2232/** 2243/**
2244 * megasas_alloc_host_crash_buffer - Host buffers for Crash dump collection from Firmware
2245 * @instance: Controller's soft instance
2246 * return: Number of allocated host crash buffers
2247 */
2248static void
2249megasas_alloc_host_crash_buffer(struct megasas_instance *instance)
2250{
2251 unsigned int i;
2252
2253 instance->crash_buf_pages = get_order(CRASH_DMA_BUF_SIZE);
2254 for (i = 0; i < MAX_CRASH_DUMP_SIZE; i++) {
2255 instance->crash_buf[i] = (void *)__get_free_pages(GFP_KERNEL,
2256 instance->crash_buf_pages);
2257 if (!instance->crash_buf[i]) {
2258 dev_info(&instance->pdev->dev, "Firmware crash dump "
2259 "memory allocation failed at index %d\n", i);
2260 break;
2261 }
2262 }
2263 instance->drv_buf_alloc = i;
2264}
2265
2266/**
2267 * megasas_free_host_crash_buffer - Host buffers for Crash dump collection from Firmware
2268 * @instance: Controller's soft instance
2269 */
2270void
2271megasas_free_host_crash_buffer(struct megasas_instance *instance)
2272{
2273 unsigned int i
2274;
2275 for (i = 0; i < instance->drv_buf_alloc; i++) {
2276 if (instance->crash_buf[i])
2277 free_pages((ulong)instance->crash_buf[i],
2278 instance->crash_buf_pages);
2279 }
2280 instance->drv_buf_index = 0;
2281 instance->drv_buf_alloc = 0;
2282 instance->fw_crash_state = UNAVAILABLE;
2283 instance->fw_crash_buffer_size = 0;
2284}
2285
2286/**
2233 * megasas_adp_reset_fusion - For controller reset 2287 * megasas_adp_reset_fusion - For controller reset
2234 * @regs: MFI register set 2288 * @regs: MFI register set
2235 */ 2289 */
@@ -2372,6 +2426,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int iotimeout)
2372 struct megasas_cmd *cmd_mfi; 2426 struct megasas_cmd *cmd_mfi;
2373 union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; 2427 union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc;
2374 u32 host_diag, abs_state, status_reg, reset_adapter; 2428 u32 host_diag, abs_state, status_reg, reset_adapter;
2429 u32 io_timeout_in_crash_mode = 0;
2375 2430
2376 instance = (struct megasas_instance *)shost->hostdata; 2431 instance = (struct megasas_instance *)shost->hostdata;
2377 fusion = instance->ctrl_context; 2432 fusion = instance->ctrl_context;
@@ -2385,6 +2440,42 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int iotimeout)
2385 mutex_unlock(&instance->reset_mutex); 2440 mutex_unlock(&instance->reset_mutex);
2386 return FAILED; 2441 return FAILED;
2387 } 2442 }
2443 status_reg = instance->instancet->read_fw_status_reg(instance->reg_set);
2444 abs_state = status_reg & MFI_STATE_MASK;
2445
2446 /* IO timeout detected, forcibly put FW in FAULT state */
2447 if (abs_state != MFI_STATE_FAULT && instance->crash_dump_buf &&
2448 instance->crash_dump_app_support && iotimeout) {
2449 dev_info(&instance->pdev->dev, "IO timeout is detected, "
2450 "forcibly FAULT Firmware\n");
2451 instance->adprecovery = MEGASAS_ADPRESET_SM_INFAULT;
2452 status_reg = readl(&instance->reg_set->doorbell);
2453 writel(status_reg | MFI_STATE_FORCE_OCR,
2454 &instance->reg_set->doorbell);
2455 readl(&instance->reg_set->doorbell);
2456 mutex_unlock(&instance->reset_mutex);
2457 do {
2458 ssleep(3);
2459 io_timeout_in_crash_mode++;
2460 dev_dbg(&instance->pdev->dev, "waiting for [%d] "
2461 "seconds for crash dump collection and OCR "
2462 "to be done\n", (io_timeout_in_crash_mode * 3));
2463 } while ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) &&
2464 (io_timeout_in_crash_mode < 80));
2465
2466 if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) {
2467 dev_info(&instance->pdev->dev, "OCR done for IO "
2468 "timeout case\n");
2469 retval = SUCCESS;
2470 } else {
2471 dev_info(&instance->pdev->dev, "Controller is not "
2472 "operational after 240 seconds wait for IO "
2473 "timeout case in FW crash dump mode\n do "
2474 "OCR/kill adapter\n");
2475 retval = megasas_reset_fusion(shost, 0);
2476 }
2477 return retval;
2478 }
2388 2479
2389 if (instance->requestorId && !instance->skip_heartbeat_timer_del) 2480 if (instance->requestorId && !instance->skip_heartbeat_timer_del)
2390 del_timer_sync(&instance->sriov_heartbeat_timer); 2481 del_timer_sync(&instance->sriov_heartbeat_timer);
@@ -2651,6 +2742,15 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int iotimeout)
2651 printk(KERN_WARNING "megaraid_sas: Reset " 2742 printk(KERN_WARNING "megaraid_sas: Reset "
2652 "successful for scsi%d.\n", 2743 "successful for scsi%d.\n",
2653 instance->host->host_no); 2744 instance->host->host_no);
2745
2746 if (instance->crash_dump_drv_support) {
2747 if (instance->crash_dump_app_support)
2748 megasas_set_crash_dump_params(instance,
2749 MR_CRASH_BUF_TURN_ON);
2750 else
2751 megasas_set_crash_dump_params(instance,
2752 MR_CRASH_BUF_TURN_OFF);
2753 }
2654 retval = SUCCESS; 2754 retval = SUCCESS;
2655 goto out; 2755 goto out;
2656 } 2756 }
@@ -2679,6 +2779,74 @@ out:
2679 return retval; 2779 return retval;
2680} 2780}
2681 2781
2782/* Fusion Crash dump collection work queue */
2783void megasas_fusion_crash_dump_wq(struct work_struct *work)
2784{
2785 struct megasas_instance *instance =
2786 container_of(work, struct megasas_instance, crash_init);
2787 u32 status_reg;
2788 u8 partial_copy = 0;
2789
2790
2791 status_reg = instance->instancet->read_fw_status_reg(instance->reg_set);
2792
2793 /*
2794 * Allocate host crash buffers to copy data from 1 MB DMA crash buffer
2795 * to host crash buffers
2796 */
2797 if (instance->drv_buf_index == 0) {
2798 /* Buffer is already allocated for old Crash dump.
2799 * Do OCR and do not wait for crash dump collection
2800 */
2801 if (instance->drv_buf_alloc) {
2802 dev_info(&instance->pdev->dev, "earlier crash dump is "
2803 "not yet copied by application, ignoring this "
2804 "crash dump and initiating OCR\n");
2805 status_reg |= MFI_STATE_CRASH_DUMP_DONE;
2806 writel(status_reg,
2807 &instance->reg_set->outbound_scratch_pad);
2808 readl(&instance->reg_set->outbound_scratch_pad);
2809 return;
2810 }
2811 megasas_alloc_host_crash_buffer(instance);
2812 dev_info(&instance->pdev->dev, "Number of host crash buffers "
2813 "allocated: %d\n", instance->drv_buf_alloc);
2814 }
2815
2816 /*
2817 * Driver has allocated max buffers, which can be allocated
2818 * and FW has more crash dump data, then driver will
2819 * ignore the data.
2820 */
2821 if (instance->drv_buf_index >= (instance->drv_buf_alloc)) {
2822 dev_info(&instance->pdev->dev, "Driver is done copying "
2823 "the buffer: %d\n", instance->drv_buf_alloc);
2824 status_reg |= MFI_STATE_CRASH_DUMP_DONE;
2825 partial_copy = 1;
2826 } else {
2827 memcpy(instance->crash_buf[instance->drv_buf_index],
2828 instance->crash_dump_buf, CRASH_DMA_BUF_SIZE);
2829 instance->drv_buf_index++;
2830 status_reg &= ~MFI_STATE_DMADONE;
2831 }
2832
2833 if (status_reg & MFI_STATE_CRASH_DUMP_DONE) {
2834 dev_info(&instance->pdev->dev, "Crash Dump is available,number "
2835 "of copied buffers: %d\n", instance->drv_buf_index);
2836 instance->fw_crash_buffer_size = instance->drv_buf_index;
2837 instance->fw_crash_state = AVAILABLE;
2838 instance->drv_buf_index = 0;
2839 writel(status_reg, &instance->reg_set->outbound_scratch_pad);
2840 readl(&instance->reg_set->outbound_scratch_pad);
2841 if (!partial_copy)
2842 megasas_reset_fusion(instance->host, 0);
2843 } else {
2844 writel(status_reg, &instance->reg_set->outbound_scratch_pad);
2845 readl(&instance->reg_set->outbound_scratch_pad);
2846 }
2847}
2848
2849
2682/* Fusion OCR work queue */ 2850/* Fusion OCR work queue */
2683void megasas_fusion_ocr_wq(struct work_struct *work) 2851void megasas_fusion_ocr_wq(struct work_struct *work)
2684{ 2852{