/* * Linux driver for VMware's para-virtualized SCSI HBA. * * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Maintained by: Alok N Kataria * */ #include #include #include #include #include #include #include #include #include #include #include "vmw_pvscsi.h" #define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver" MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC); MODULE_AUTHOR("VMware, Inc."); MODULE_LICENSE("GPL"); MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING); #define PVSCSI_DEFAULT_NUM_PAGES_PER_RING 8 #define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING 1 #define PVSCSI_DEFAULT_QUEUE_DEPTH 64 #define SGL_SIZE PAGE_SIZE struct pvscsi_sg_list { struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT]; }; struct pvscsi_ctx { /* * The index of the context in cmd_map serves as the context ID for a * 1-to-1 mapping completions back to requests. */ struct scsi_cmnd *cmd; struct pvscsi_sg_list *sgl; struct list_head list; dma_addr_t dataPA; dma_addr_t sensePA; dma_addr_t sglPA; }; struct pvscsi_adapter { char *mmioBase; unsigned int irq; u8 rev; bool use_msi; bool use_msix; bool use_msg; spinlock_t hw_lock; struct workqueue_struct *workqueue; struct work_struct work; struct PVSCSIRingReqDesc *req_ring; unsigned req_pages; unsigned req_depth; dma_addr_t reqRingPA; struct PVSCSIRingCmpDesc *cmp_ring; unsigned cmp_pages; dma_addr_t cmpRingPA; struct PVSCSIRingMsgDesc *msg_ring; unsigned msg_pages; dma_addr_t msgRingPA; struct PVSCSIRingsState *rings_state; dma_addr_t ringStatePA; struct pci_dev *dev; struct Scsi_Host *host; struct list_head cmd_pool; struct pvscsi_ctx *cmd_map; }; /* Command line parameters */ static int pvscsi_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_PER_RING; static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING; static int pvscsi_cmd_per_lun = PVSCSI_DEFAULT_QUEUE_DEPTH; static bool pvscsi_disable_msi; static bool pvscsi_disable_msix; static bool pvscsi_use_msg = true; #define PVSCSI_RW (S_IRUSR | S_IWUSR) module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW); MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default=" __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING) ")"); module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW); MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default=" __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")"); module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW); MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default=" __stringify(PVSCSI_MAX_REQ_QUEUE_DEPTH) ")"); module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW); MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW); MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW); MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)"); static const struct pci_device_id pvscsi_pci_tbl[] = { { PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) }, { 0 } }; MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl); static struct device * pvscsi_dev(const struct pvscsi_adapter *adapter) { return &(adapter->dev->dev); } static struct pvscsi_ctx * pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd) { struct pvscsi_ctx *ctx, *end; end = &adapter->cmd_map[adapter->req_depth]; for (ctx = adapter->cmd_map; ctx < end; ctx++) if (ctx->cmd == cmd) return ctx; return NULL; } static struct pvscsi_ctx * pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd) { struct pvscsi_ctx *ctx; if (list_empty(&adapter->cmd_pool)) return NULL; ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list); ctx->cmd = cmd; list_del(&ctx->list); return ctx; } static void pvscsi_release_context(struct pvscsi_adapter *adapter, struct pvscsi_ctx *ctx) { ctx->cmd = NULL; list_add(&ctx->list, &adapter->cmd_pool); } /* * Map a pvscsi_ctx struct to a context ID field value; we map to a simple * non-zero integer. ctx always points to an entry in cmd_map array, hence * the return value is always >=1. */ static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter, const struct pvscsi_ctx *ctx) { return ctx - adapter->cmd_map + 1; } static struct pvscsi_ctx * pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context) { return &adapter->cmd_map[context - 1]; } static void pvscsi_reg_write(const struct pvscsi_adapter *adapter, u32 offset, u32 val) { writel(val, adapter->mmioBase + offset); } static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset) { return readl(adapter->mmioBase + offset); } static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter) { return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS); } static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter, u32 val) { pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val); } static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter) { u32 intr_bits; intr_bits = PVSCSI_INTR_CMPL_MASK; if (adapter->use_msg) intr_bits |= PVSCSI_INTR_MSG_MASK; pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits); } static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter) { pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0); } static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter, u32 cmd, const void *desc, size_t len) { const u32 *ptr = desc; size_t i; len /= sizeof(*ptr); pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd); for (i = 0; i < len; i++) pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]); } static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter, const struct pvscsi_ctx *ctx) { struct PVSCSICmdDescAbortCmd cmd = { 0 }; cmd.target = ctx->cmd->device->id; cmd.context = pvscsi_map_context(adapter, ctx); pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd)); } static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter) { pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0); } static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter) { pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0); } static int scsi_is_rw(unsigned char op) { return op == READ_6 || op == WRITE_6 || op == READ_10 || op == WRITE_10 || op == READ_12 || op == WRITE_12 || op == READ_16 || op == WRITE_16; } static void pvscsi_kick_io(const struct pvscsi_adapter *adapter, unsigned char op) { if (scsi_is_rw(op)) pvscsi_kick_rw_io(adapter); else pvscsi_process_request_ring(adapter); } static void ll_adapter_reset(const struct pvscsi_adapter *adapter) { dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter); pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0); } static void ll_bus_reset(const struct pvscsi_adapter *adapter) { dev_dbg(pvscsi_dev(adapter), "Reseting bus on %p\n", adapter); pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0); } static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target) { struct PVSCSICmdDescResetDevice cmd = { 0 }; dev_dbg(pvscsi_dev(adapter), "Reseting device: target=%u\n", target); cmd.target = target; pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof(cmd)); } static void pvscsi_create_sg(struct pvscsi_ctx *ctx, struct scatterlist *sg, unsigned count) { unsigned i; struct PVSCSISGElement *sge; BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT); sge = &ctx->sgl->sge[0]; for (i = 0; i < count; i++, sg++) { sge[i].addr = sg_dma_address(sg); sge[i].length = sg_dma_len(sg); sge[i].flags = 0; } } /* * Map all data buffers for a command into PCI space and * setup the scatter/gather list if needed. */ static void pvscsi_map_buffers(struct pvscsi_adapter *adapter, struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd, struct PVSCSIRingReqDesc *e) { unsigned count; unsigned bufflen = scsi_bufflen(cmd); struct scatterlist *sg; e->dataLen = bufflen; e->dataAddr = 0; if (bufflen == 0) return; sg = scsi_sglist(cmd); count = scsi_sg_count(cmd); if (count != 0) { int segs = scsi_dma_map(cmd); if (segs > 1) { pvscsi_create_sg(ctx, sg, segs); e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST; ctx->sglPA = pci_map_single(adapter->dev, ctx->sgl, SGL_SIZE, PCI_DMA_TODEVICE); e->dataAddr = ctx->sglPA; } else e->dataAddr = sg_dma_address(sg); } else { /* * In case there is no S/G list, scsi_sglist points * directly to the buffer. */ ctx->dataPA = pci_map_single(adapter->dev, sg, bufflen, cmd->sc_data_direction); e->dataAddr = ctx->dataPA; } } static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter, struct pvscsi_ctx *ctx) { struct scsi_cmnd *cmd; unsigned bufflen; cmd = ctx->cmd; bufflen = scsi_bufflen(cmd); if (bufflen != 0) { unsigned count = scsi_sg_count(cmd); if (count != 0) { scsi_dma_unmap(cmd); if (ctx->sglPA) { pci_unmap_single(adapter->dev, ctx->sglPA, SGL_SIZE, PCI_DMA_TODEVICE); ctx->sglPA = 0; } } else pci_unmap_single(adapter->dev, ctx->dataPA, bufflen, cmd->sc_data_direction); } if (cmd->sense_buffer) pci_unmap_single(adapter->dev, ctx->sensePA, SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE); } static int __devinit pvscsi_allocate_rings(struct pvscsi_adapter *adapter) { adapter->rings_state = pci_alloc_consistent(adapter->dev, PAGE_SIZE, &adapter->ringStatePA); if (!adapter->rings_state) return -ENOMEM; adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages); adapter->req_depth = adapter->req_pages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; adapter->req_ring = pci_alloc_consistent(adapter->dev, adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA); if (!adapter->req_ring) return -ENOMEM; adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING, pvscsi_ring_pages); adapter->cmp_ring = pci_alloc_consistent(adapter->dev, adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA); if (!adapter->cmp_ring) return -ENOMEM; BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE)); BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE)); BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE)); if (!adapter->use_msg) return 0; adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING, pvscsi_msg_ring_pages); adapter->msg_ring = pci_alloc_consistent(adapter->dev, adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA); if (!adapter->msg_ring) return -ENOMEM; BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE)); return 0; } static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter) { struct PVSCSICmdDescSetupRings cmd = { 0 }; dma_addr_t base; unsigned i; cmd.ringsStatePPN = adapter->ringStatePA >> PAGE_SHIFT; cmd.reqRingNumPages = adapter->req_pages; cmd.cmpRingNumPages = adapter->cmp_pages; base = adapter->reqRingPA; for (i = 0; i < adapter->req_pages; i++) { cmd.reqRingPPNs[i] = base >> PAGE_SHIFT; base += PAGE_SIZE; } base = adapter->cmpRingPA; for (i = 0; i < adapter->cmp_pages; i++) { cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT; base += PAGE_SIZE; } memset(adapter->rings_# perf script event handlers, generated by perf script -g python # (c) 2010, Tom Zanussi <tzanussi@gmail.com> # Licensed under the terms of the GNU GPL License version 2 # # This script tests basic functionality such as flag and symbol # strings, common_xxx() calls back into perf, begin, end, unhandled # events, etc. Basically, if this script runs successfully and # displays expected results, Python scripting support should be ok. import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from Core import * from perf_trace_context import * unhandled = autodict() def trace_begin(): print "trace_begin" pass def trace_end(): print_unhandled() def irq__softirq_entry(event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, vec): print_header(event_name, common_cpu, common_secs, common_nsecs, common_pid, common_comm) print_uncommon(context) print "vec=%s\n" % \ (symbol_str("irq__softirq_entry", "vec", vec)), def kmem__kmalloc(event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, call_site, ptr, bytes_req, bytes_alloc, gfp_flags): print_header(event_name, common_cpu, common_secs, common_nsecs, common_pid, common_comm) print_uncommon(context) print "call_site=%u, ptr=%u, bytes_req=%u, " \ "bytes_alloc=%u, gfp_flags=%s\n" % \ (call_site, ptr, bytes_req, bytes_alloc, flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)), def trace_unhandled(event_name, context, event_fields_dict): try: unhandled[event_name] += 1 except TypeError: unhandled[event_name] = 1 def print_header(event_name, cpu, secs, nsecs, pid, comm): print "%-20s %5u %05u.%09u %8u %-20s " % \ (event_name, cpu, secs, nsecs, pid, comm), # print trace fields not included in handler args def print_uncommon(context): print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \ % (common_pc(context), trace_flag_str(common_flags(context)), \ common_lock_depth(context)) def print_unhandled(): keys = unhandled.keys() if not keys: return print "\nunhandled events:\n\n", print "%-40s %10s\n" % ("event", "count"), print "%-40s %10s\n" % ("----------------------------------------", \ "-----------"), for event_name in keys: print "%-40s %10d\n" % (event_name, unhandled[event_name]) cess_completion_ring(adapter); pvscsi_reset_all(adapter); adapter->use_msg = use_msg; pvscsi_setup_all_rings(adapter); pvscsi_unmask_intr(adapter); spin_unlock_irqrestore(&adapter->hw_lock, flags); return SUCCESS; } static int pvscsi_bus_reset(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct pvscsi_adapter *adapter = shost_priv(host); unsigned long flags; scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n"); /* * We don't want to queue new requests for this bus after * flushing all pending requests to emulation, since new * requests could then sneak in during this bus reset phase, * so take the lock now. */ spin_lock_irqsave(&adapter->hw_lock, flags); pvscsi_process_request_ring(adapter); ll_bus_reset(adapter); pvscsi_process_completion_ring(adapter); spin_unlock_irqrestore(&adapter->hw_lock, flags); return SUCCESS; } static int pvscsi_device_reset(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct pvscsi_adapter *adapter = shost_priv(host); unsigned long flags; scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n", host->host_no, cmd->device->id); /* * We don't want to queue new requests for this device after flushing * all pending requests to emulation, since new requests could then * sneak in during this device reset phase, so take the lock now. */ spin_lock_irqsave(&adapter->hw_lock, flags); pvscsi_process_request_ring(adapter); ll_device_reset(adapter, cmd->device->id); pvscsi_process_completion_ring(adapter); spin_unlock_irqrestore(&adapter->hw_lock, flags); return SUCCESS; } static struct scsi_host_template pvscsi_template; static const char *pvscsi_info(struct Scsi_Host *host) { struct pvscsi_adapter *adapter = shost_priv(host); static char buf[256]; sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: " "%u/%u/%u pages, cmd_per_lun=%u", adapter->rev, adapter->req_pages, adapter->cmp_pages, adapter->msg_pages, pvscsi_template.cmd_per_lun); return buf; } static struct scsi_host_template pvscsi_template = { .module = THIS_MODULE, .name = "VMware PVSCSI Host Adapter", .proc_name = "vmw_pvscsi", .info = pvscsi_info, .queuecommand = pvscsi_queue, .this_id = -1, .sg_tablesize = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT, .dma_boundary = UINT_MAX, .max_sectors = 0xffff, .use_clustering = ENABLE_CLUSTERING, .eh_abort_handler = pvscsi_abort, .eh_device_reset_handler = pvscsi_device_reset, .eh_bus_reset_handler = pvscsi_bus_reset, .eh_host_reset_handler = pvscsi_host_reset, }; static void pvscsi_process_msg(const struct pvscsi_adapter *adapter, const struct PVSCSIRingMsgDesc *e) { struct PVSCSIRingsState *s = adapter->rings_state; struct Scsi_Host *host = adapter->host; struct scsi_device *sdev; printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n", e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2); BUILD_BUG_ON(PVSCSI_MSG_LAST != 2); if (e->type == PVSCSI_MSG_DEV_ADDED) { struct PVSCSIMsgDescDevStatusChanged *desc; desc = (struct PVSCSIMsgDescDevStatusChanged *)e; printk(KERN_INFO "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n", desc->bus, desc->target, desc->lun[1]); if (!scsi_host_get(host)) return; sdev = scsi_device_lookup(host, desc->bus, desc->target, desc->lun[1]); if (sdev) { printk(KERN_INFO "vmw_pvscsi: device already exists\n"); scsi_device_put(sdev); } else scsi_add_device(adapter->host, desc->bus, desc->target, desc->lun[1]); scsi_host_put(host); } else if (e->type == PVSCSI_MSG_DEV_REMOVED) { struct PVSCSIMsgDescDevStatusChanged *desc; desc = (struct PVSCSIMsgDescDevStatusChanged *)e; printk(KERN_INFO "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n", desc->bus, desc->target, desc->lun[1]); if (!scsi_host_get(host)) return; sdev = scsi_device_lookup(host, desc->bus, desc->target, desc->lun[1]); if (sdev) { scsi_remove_device(sdev); scsi_device_put(sdev); } else printk(KERN_INFO "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n", desc->bus, desc->target, desc->lun[1]); scsi_host_put(host); } } static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter) { struct PVSCSIRingsState *s = adapter->rings_state; return s->msgProdIdx != s->msgConsIdx; } static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter) { struct PVSCSIRingsState *s = adapter->rings_state; struct PVSCSIRingMsgDesc *ring = adapter->msg_ring; u32 msg_entries = s->msgNumEntriesLog2; while (pvscsi_msg_pending(adapter)) { struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx & MASK(msg_entries)); barrier(); pvscsi_process_msg(adapter, e); barrier(); s->msgConsIdx++; } } static void pvscsi_msg_workqueue_handler(struct work_struct *data) { struct pvscsi_adapter *adapter; adapter = container_of(data, struct pvscsi_adapter, work); pvscsi_process_msg_ring(adapter); } static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter) { char name[32]; if (!pvscsi_use_msg) return 0; pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, PVSCSI_CMD_SETUP_MSG_RING); if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1) return 0; snprintf(name, sizeof(name), "vmw_pvscsi_wq_%u", adapter->host->host_no); adapter->workqueue = create_singlethread_workqueue(name); if (!adapter->workqueue) { printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n"); return 0; } INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler); return 1; } static irqreturn_t pvscsi_isr(int irq, void *devp) { struct pvscsi_adapter *adapter = devp; int handled; if (adapter->use_msi || adapter->use_msix) handled = true; else { u32 val = pvscsi_read_intr_status(adapter); handled = (val & PVSCSI_INTR_ALL_SUPPORTED) != 0; if (handled) pvscsi_write_intr_status(devp, val); } if (handled) { unsigned long flags; spin_lock_irqsave(&adapter->hw_lock, flags); pvscsi_process_completion_ring(adapter); if (adapter->use_msg && pvscsi_msg_pending(adapter)) queue_work(adapter->workqueue, &adapter->work); spin_unlock_irqrestore(&adapter->hw_lock, flags); } return IRQ_RETVAL(handled); } static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter) { struct pvscsi_ctx *ctx = adapter->cmd_map; unsigned i; for (i = 0; i < adapter->req_depth; ++i, ++ctx) free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE)); } static int pvscsi_setup_msix(const struct pvscsi_adapter *adapter, unsigned int *irq) { struct msix_entry entry = { 0, PVSCSI_VECTOR_COMPLETION }; int ret; ret = pci_enable_msix(adapter->dev, &entry, 1); if (ret) return ret; *irq = entry.vector; return 0; } static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter) { if (adapter->irq) { free_irq(adapter->irq, adapter); adapter->irq = 0; } if (adapter->use_msi) { pci_disable_msi(adapter->dev); adapter->use_msi = 0; } else if (adapter->use_msix) { pci_disable_msix(adapter->dev); adapter->use_msix = 0; } } static void pvscsi_release_resources(struct pvscsi_adapter *adapter) { pvscsi_shutdown_intr(adapter); if (adapter->workqueue) destroy_workqueue(adapter->workqueue); if (adapter->mmioBase) pci_iounmap(adapter->dev, adapter->mmioBase); pci_release_regions(adapter->dev); if (adapter->cmd_map) { pvscsi_free_sgls(adapter); kfree(adapter->cmd_map); } if (adapter->rings_state) pci_free_consistent(adapter->dev, PAGE_SIZE, adapter->rings_state, adapter->ringStatePA); if (adapter->req_ring) pci_free_consistent(adapter->dev, adapter->req_pages * PAGE_SIZE, adapter->req_ring, adapter->reqRingPA); if (adapter->cmp_ring) pci_free_consistent(adapter->dev, adapter->cmp_pages * PAGE_SIZE, adapter->cmp_ring, adapter->cmpRingPA); if (adapter->msg_ring) pci_free_consistent(adapter->dev, adapter->msg_pages * PAGE_SIZE, adapter->msg_ring, adapter->msgRingPA); } /* * Allocate scatter gather lists. * * These are statically allocated. Trying to be clever was not worth it. * * Dynamic allocation can fail, and we can't go deeep into the memory * allocator, since we're a SCSI driver, and trying too hard to allocate * memory might generate disk I/O. We also don't want to fail disk I/O * in that case because we can't get an allocation - the I/O could be * trying to swap out data to free memory. Since that is pathological, * just use a statically allocated scatter list. * */ static int __devinit pvscsi_allocate_sg(struct pvscsi_adapter *adapter) { struct pvscsi_ctx *ctx; int i; ctx = adapter->cmd_map; BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE); for (i = 0; i < adapter->req_depth; ++i, ++ctx) { ctx->sgl = (void *)__get_free_pages(GFP_KERNEL, get_order(SGL_SIZE)); ctx->sglPA = 0; BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE)); if (!ctx->sgl) { for (; i >= 0; --i, --ctx) { free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE)); ctx->sgl = NULL; } return -ENOMEM; } } return 0; } static int __devinit pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pvscsi_adapter *adapter; struct Scsi_Host *host; unsigned int i; unsigned long flags = 0; int error; error = -ENODEV; if (pci_enable_device(pdev)) return error; if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0 && pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n"); } else if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) == 0 && pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) == 0) { printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n"); } else { printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n"); goto out_disable_device; } pvscsi_template.can_queue = min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; pvscsi_template.cmd_per_lun = min(pvscsi_template.can_queue, pvscsi_cmd_per_lun); host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter)); if (!host) { printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n"); goto out_disable_device; } adapter = shost_priv(host); memset(adapter, 0, sizeof(*adapter)); adapter->dev = pdev; adapter->host = host; spin_lock_init(&adapter->hw_lock); host->max_channel = 0; host->max_id = 16; host->max_lun = 1; host->max_cmd_len = 16; adapter->rev = pdev->revision; if (pci_request_regions(pdev, "vmw_pvscsi")) { printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n"); goto out_free_host; } for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO)) continue; if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE) continue; break; } if (i == DEVICE_COUNT_RESOURCE) { printk(KERN_ERR "vmw_pvscsi: adapter has no suitable MMIO region\n"); goto out_release_resources; } adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE); if (!adapter->mmioBase) { printk(KERN_ERR "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n", i, PVSCSI_MEM_SPACE_SIZE); goto out_release_resources; } pci_set_master(pdev); pci_set_drvdata(pdev, host); ll_adapter_reset(adapter); adapter->use_msg = pvscsi_setup_msg_workqueue(adapter); error = pvscsi_allocate_rings(adapter); if (error) { printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n"); goto out_release_resources; } /* * From this point on we should reset the adapter if anything goes * wrong. */ pvscsi_setup_all_rings(adapter); adapter->cmd_map = kcalloc(adapter->req_depth, sizeof(struct pvscsi_ctx), GFP_KERNEL); if (!adapter->cmd_map) { printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n"); error = -ENOMEM; goto out_reset_adapter; } INIT_LIST_HEAD(&adapter->cmd_pool); for (i = 0; i < adapter->req_depth; i++) { struct pvscsi_ctx *ctx = adapter->cmd_map + i; list_add(&ctx->list, &adapter->cmd_pool); } error = pvscsi_allocate_sg(adapter); if (error) { printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n"); goto out_reset_adapter; } if (!pvscsi_disable_msix && pvscsi_setup_msix(adapter, &adapter->irq) == 0) { printk(KERN_INFO "vmw_pvscsi: using MSI-X\n"); adapter->use_msix = 1; } else if (!pvscsi_disable_msi && pci_enable_msi(pdev) == 0) { printk(KERN_INFO "vmw_pvscsi: using MSI\n"); adapter->use_msi = 1; adapter->irq = pdev->irq; } else { printk(KERN_INFO "vmw_pvscsi: using INTx\n"); adapter->irq = pdev->irq; flags = IRQF_SHARED; } error = request_irq(adapter->irq, pvscsi_isr, flags, "vmw_pvscsi", adapter); if (error) { printk(KERN_ERR "vmw_pvscsi: unable to request IRQ: %d\n", error); adapter->irq = 0; goto out_reset_adapter; } error = scsi_add_host(host, &pdev->dev); if (error) { printk(KERN_ERR "vmw_pvscsi: scsi_add_host failed: %d\n", error); goto out_reset_adapter; } dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n", adapter->rev, host->host_no); pvscsi_unmask_intr(adapter); scsi_scan_host(host); return 0; out_reset_adapter: ll_adapter_reset(adapter); out_release_resources: pvscsi_release_resources(adapter); out_free_host: scsi_host_put(host); out_disable_device: pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); return error; } static void __pvscsi_shutdown(struct pvscsi_adapter *adapter) { pvscsi_mask_intr(adapter); if (adapter->workqueue) flush_workqueue(adapter->workqueue); pvscsi_shutdown_intr(adapter); pvscsi_process_request_ring(adapter); pvscsi_process_completion_ring(adapter); ll_adapter_reset(adapter); } static void pvscsi_shutdown(struct pci_dev *dev) { struct Scsi_Host *host = pci_get_drvdata(dev); struct pvscsi_adapter *adapter = shost_priv(host); __pvscsi_shutdown(adapter); } static void pvscsi_remove(struct pci_dev *pdev) { struct Scsi_Host *host = pci_get_drvdata(pdev); struct pvscsi_adapter *adapter = shost_priv(host); scsi_remove_host(host); __pvscsi_shutdown(adapter); pvscsi_release_resources(adapter); scsi_host_put(host); pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); } static struct pci_driver pvscsi_pci_driver = { .name = "vmw_pvscsi", .id_table = pvscsi_pci_tbl, .probe = pvscsi_probe, .remove = __devexit_p(pvscsi_remove), .shutdown = pvscsi_shutdown, }; static int __init pvscsi_init(void) { pr_info("%s - version %s\n", PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING); return pci_register_driver(&pvscsi_pci_driver); } static void __exit pvscsi_exit(void) { pci_unregister_driver(&pvscsi_pci_driver); } module_init(pvscsi_init); module_exit(pvscsi_exit);