aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Gates <matthew.gates@hp.com>2014-02-18 14:55:17 -0500
committerJames Bottomley <JBottomley@Parallels.com>2014-03-15 13:19:02 -0400
commite1f7de0cdd68d246d7008241cd9e443a54f880a8 (patch)
tree060525e7cdc5f2f86f9fcf4bb5ee3a22861c9e8f
parente1d9cbfa09cdd39c4821777a6980dd643e5493ad (diff)
[SCSI] hpsa: add support for 'fastpath' i/o
For certain i/o's to certain devices (unmasked physical disks) we can bypass the RAID stack firmware and do the i/o to the device directly and it will be faster. Signed-off-by: Matt Gates <matthew.gates@hp.com> Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/hpsa.c296
-rw-r--r--drivers/scsi/hpsa.h51
-rw-r--r--drivers/scsi/hpsa_cmd.h60
3 files changed, 388 insertions, 19 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index f867e46c3991..4047c37aa4f8 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -204,7 +204,7 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
204 struct CommandList *c); 204 struct CommandList *c);
205/* performant mode helper functions */ 205/* performant mode helper functions */
206static void calc_bucket_map(int *bucket, int num_buckets, 206static void calc_bucket_map(int *bucket, int num_buckets,
207 int nsgs, int *bucket_map); 207 int nsgs, int min_blocks, int *bucket_map);
208static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h); 208static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h);
209static inline u32 next_command(struct ctlr_info *h, u8 q); 209static inline u32 next_command(struct ctlr_info *h, u8 q);
210static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr, 210static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr,
@@ -570,6 +570,9 @@ static inline u32 next_command(struct ctlr_info *h, u8 q)
570 struct reply_pool *rq = &h->reply_queue[q]; 570 struct reply_pool *rq = &h->reply_queue[q];
571 unsigned long flags; 571 unsigned long flags;
572 572
573 if (h->transMethod & CFGTBL_Trans_io_accel1)
574 return h->access.command_completed(h, q);
575
573 if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) 576 if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
574 return h->access.command_completed(h, q); 577 return h->access.command_completed(h, q);
575 578
@@ -1203,7 +1206,8 @@ static void complete_scsi_command(struct CommandList *cp)
1203 h = cp->h; 1206 h = cp->h;
1204 1207
1205 scsi_dma_unmap(cmd); /* undo the DMA mappings */ 1208 scsi_dma_unmap(cmd); /* undo the DMA mappings */
1206 if (cp->Header.SGTotal > h->max_cmd_sg_entries) 1209 if ((cp->cmd_type == CMD_SCSI) &&
1210 (cp->Header.SGTotal > h->max_cmd_sg_entries))
1207 hpsa_unmap_sg_chain_block(h, cp); 1211 hpsa_unmap_sg_chain_block(h, cp);
1208 1212
1209 cmd->result = (DID_OK << 16); /* host byte */ 1213 cmd->result = (DID_OK << 16); /* host byte */
@@ -1227,6 +1231,19 @@ static void complete_scsi_command(struct CommandList *cp)
1227 return; 1231 return;
1228 } 1232 }
1229 1233
1234 /* For I/O accelerator commands, copy over some fields to the normal
1235 * CISS header used below for error handling.
1236 */
1237 if (cp->cmd_type == CMD_IOACCEL1) {
1238 struct io_accel1_cmd *c = &h->ioaccel_cmd_pool[cp->cmdindex];
1239 cp->Header.SGList = cp->Header.SGTotal = scsi_sg_count(cmd);
1240 cp->Request.CDBLen = c->io_flags & IOACCEL1_IOFLAGS_CDBLEN_MASK;
1241 cp->Header.Tag.lower = c->Tag.lower;
1242 cp->Header.Tag.upper = c->Tag.upper;
1243 memcpy(cp->Header.LUN.LunAddrBytes, c->CISS_LUN, 8);
1244 memcpy(cp->Request.CDB, c->CDB, cp->Request.CDBLen);
1245 }
1246
1230 /* an error has occurred */ 1247 /* an error has occurred */
1231 switch (ei->CommandStatus) { 1248 switch (ei->CommandStatus) {
1232 1249
@@ -2070,6 +2087,9 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
2070 case TYPE_DISK: 2087 case TYPE_DISK:
2071 if (i < nphysicals) 2088 if (i < nphysicals)
2072 break; 2089 break;
2090 memcpy(&this_device->ioaccel_handle,
2091 &lunaddrbytes[20],
2092 sizeof(this_device->ioaccel_handle));
2073 ncurrent++; 2093 ncurrent++;
2074 break; 2094 break;
2075 case TYPE_TAPE: 2095 case TYPE_TAPE:
@@ -2164,6 +2184,104 @@ sglist_finished:
2164 return 0; 2184 return 0;
2165} 2185}
2166 2186
2187/*
2188 * Queue a command to the I/O accelerator path.
2189 * This method does not currently support S/G chaining.
2190 */
2191static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
2192 struct CommandList *c)
2193{
2194 struct scsi_cmnd *cmd = c->scsi_cmd;
2195 struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
2196 struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
2197 unsigned int len;
2198 unsigned int total_len = 0;
2199 struct scatterlist *sg;
2200 u64 addr64;
2201 int use_sg, i;
2202 struct SGDescriptor *curr_sg;
2203 u32 control = IOACCEL1_CONTROL_SIMPLEQUEUE;
2204
2205 BUG_ON(cmd->cmd_len > IOACCEL1_IOFLAGS_CDBLEN_MAX);
2206
2207 c->cmd_type = CMD_IOACCEL1;
2208
2209 /* Adjust the DMA address to point to the accelerated command buffer */
2210 c->busaddr = (u32) h->ioaccel_cmd_pool_dhandle +
2211 (c->cmdindex * sizeof(*cp));
2212 BUG_ON(c->busaddr & 0x0000007F);
2213
2214 use_sg = scsi_dma_map(cmd);
2215 if (use_sg < 0)
2216 return use_sg;
2217
2218 if (use_sg) {
2219 curr_sg = cp->SG;
2220 scsi_for_each_sg(cmd, sg, use_sg, i) {
2221 addr64 = (u64) sg_dma_address(sg);
2222 len = sg_dma_len(sg);
2223 total_len += len;
2224 curr_sg->Addr.lower = (u32) (addr64 & 0x0FFFFFFFFULL);
2225 curr_sg->Addr.upper =
2226 (u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
2227 curr_sg->Len = len;
2228
2229 if (i == (scsi_sg_count(cmd) - 1))
2230 curr_sg->Ext = HPSA_SG_LAST;
2231 else
2232 curr_sg->Ext = 0; /* we are not chaining */
2233 curr_sg++;
2234 }
2235
2236 switch (cmd->sc_data_direction) {
2237 case DMA_TO_DEVICE:
2238 control |= IOACCEL1_CONTROL_DATA_OUT;
2239 break;
2240 case DMA_FROM_DEVICE:
2241 control |= IOACCEL1_CONTROL_DATA_IN;
2242 break;
2243 case DMA_NONE:
2244 control |= IOACCEL1_CONTROL_NODATAXFER;
2245 break;
2246 default:
2247 dev_err(&h->pdev->dev, "unknown data direction: %d\n",
2248 cmd->sc_data_direction);
2249 BUG();
2250 break;
2251 }
2252 } else {
2253 control |= IOACCEL1_CONTROL_NODATAXFER;
2254 }
2255
2256 /* Fill out the command structure to submit */
2257 cp->dev_handle = dev->ioaccel_handle;
2258 cp->transfer_len = total_len;
2259 cp->io_flags = IOACCEL1_IOFLAGS_IO_REQ |
2260 (cmd->cmd_len & IOACCEL1_IOFLAGS_CDBLEN_MASK);
2261 cp->control = control;
2262 memcpy(cp->CDB, cmd->cmnd, cmd->cmd_len);
2263 memcpy(cp->CISS_LUN, dev->scsi3addr, 8);
2264
2265 /* Tell the controller to post the reply to the queue for this
2266 * processor. This seems to give the best I/O throughput.
2267 */
2268 cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
2269
2270 /* Set the bits in the address sent down to include:
2271 * - performant mode bit (bit 0)
2272 * - pull count (bits 1-3)
2273 * - command type (bits 4-6)
2274 */
2275 c->busaddr |= 1 | (h->ioaccel1_blockFetchTable[use_sg] << 1) |
2276 IOACCEL1_BUSADDR_CMDTYPE;
2277
2278 /* execute command (bypassing cmd queue if possible) */
2279 if (unlikely(h->access.fifo_full(h)))
2280 enqueue_cmd_and_start_io(h, c);
2281 else
2282 h->access.submit_command(h, c);
2283 return 0;
2284}
2167 2285
2168static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, 2286static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
2169 void (*done)(struct scsi_cmnd *)) 2287 void (*done)(struct scsi_cmnd *))
@@ -2207,6 +2325,14 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
2207 2325
2208 c->cmd_type = CMD_SCSI; 2326 c->cmd_type = CMD_SCSI;
2209 c->scsi_cmd = cmd; 2327 c->scsi_cmd = cmd;
2328
2329 /* Call alternate submit routine for I/O accelerated commands */
2330 if ((likely(h->transMethod & CFGTBL_Trans_io_accel1)) &&
2331 (dev->ioaccel_handle) &&
2332 ((cmd->cmnd[0] == READ_10) || (cmd->cmnd[0] == WRITE_10)) &&
2333 (scsi_sg_count(cmd) <= IOACCEL1_MAXSGENTRIES))
2334 return hpsa_scsi_ioaccel_queue_command(h, c);
2335
2210 c->Header.ReplyQueue = 0; /* unused in simple mode */ 2336 c->Header.ReplyQueue = 0; /* unused in simple mode */
2211 memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8); 2337 memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
2212 c->Header.Tag.lower = (c->cmdindex << DIRECT_LOOKUP_SHIFT); 2338 c->Header.Tag.lower = (c->cmdindex << DIRECT_LOOKUP_SHIFT);
@@ -2780,6 +2906,7 @@ static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
2780 return NULL; 2906 return NULL;
2781 memset(c, 0, sizeof(*c)); 2907 memset(c, 0, sizeof(*c));
2782 2908
2909 c->cmd_type = CMD_SCSI;
2783 c->cmdindex = -1; 2910 c->cmdindex = -1;
2784 2911
2785 c->err_info = pci_alloc_consistent(h->pdev, sizeof(*c->err_info), 2912 c->err_info = pci_alloc_consistent(h->pdev, sizeof(*c->err_info),
@@ -3565,7 +3692,7 @@ static inline void finish_cmd(struct CommandList *c)
3565 spin_unlock_irqrestore(&h->lock, flags); 3692 spin_unlock_irqrestore(&h->lock, flags);
3566 3693
3567 dial_up_lockup_detection_on_fw_flash_complete(c->h, c); 3694 dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
3568 if (likely(c->cmd_type == CMD_SCSI)) 3695 if (likely(c->cmd_type == CMD_IOACCEL1 || c->cmd_type == CMD_SCSI))
3569 complete_scsi_command(c); 3696 complete_scsi_command(c);
3570 else if (c->cmd_type == CMD_IOCTL_PEND) 3697 else if (c->cmd_type == CMD_IOCTL_PEND)
3571 complete(c->waiting); 3698 complete(c->waiting);
@@ -4588,6 +4715,10 @@ static void hpsa_free_cmd_pool(struct ctlr_info *h)
4588 h->nr_cmds * sizeof(struct ErrorInfo), 4715 h->nr_cmds * sizeof(struct ErrorInfo),
4589 h->errinfo_pool, 4716 h->errinfo_pool,
4590 h->errinfo_pool_dhandle); 4717 h->errinfo_pool_dhandle);
4718 if (h->ioaccel_cmd_pool)
4719 pci_free_consistent(h->pdev,
4720 h->nr_cmds * sizeof(struct io_accel1_cmd),
4721 h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
4591} 4722}
4592 4723
4593static int hpsa_request_irq(struct ctlr_info *h, 4724static int hpsa_request_irq(struct ctlr_info *h,
@@ -4687,6 +4818,7 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
4687 hpsa_free_irqs_and_disable_msix(h); 4818 hpsa_free_irqs_and_disable_msix(h);
4688 hpsa_free_sg_chain_blocks(h); 4819 hpsa_free_sg_chain_blocks(h);
4689 hpsa_free_cmd_pool(h); 4820 hpsa_free_cmd_pool(h);
4821 kfree(h->ioaccel1_blockFetchTable);
4690 kfree(h->blockFetchTable); 4822 kfree(h->blockFetchTable);
4691 pci_free_consistent(h->pdev, h->reply_pool_size, 4823 pci_free_consistent(h->pdev, h->reply_pool_size,
4692 h->reply_pool, h->reply_pool_dhandle); 4824 h->reply_pool, h->reply_pool_dhandle);
@@ -5040,6 +5172,7 @@ static void hpsa_remove_one(struct pci_dev *pdev)
5040 h->reply_pool, h->reply_pool_dhandle); 5172 h->reply_pool, h->reply_pool_dhandle);
5041 kfree(h->cmd_pool_bits); 5173 kfree(h->cmd_pool_bits);
5042 kfree(h->blockFetchTable); 5174 kfree(h->blockFetchTable);
5175 kfree(h->ioaccel1_blockFetchTable);
5043 kfree(h->hba_inquiry_data); 5176 kfree(h->hba_inquiry_data);
5044 pci_disable_device(pdev); 5177 pci_disable_device(pdev);
5045 pci_release_regions(pdev); 5178 pci_release_regions(pdev);
@@ -5080,20 +5213,17 @@ static struct pci_driver hpsa_pci_driver = {
5080 * bits of the command address. 5213 * bits of the command address.
5081 */ 5214 */
5082static void calc_bucket_map(int bucket[], int num_buckets, 5215static void calc_bucket_map(int bucket[], int num_buckets,
5083 int nsgs, int *bucket_map) 5216 int nsgs, int min_blocks, int *bucket_map)
5084{ 5217{
5085 int i, j, b, size; 5218 int i, j, b, size;
5086 5219
5087 /* even a command with 0 SGs requires 4 blocks */
5088#define MINIMUM_TRANSFER_BLOCKS 4
5089#define NUM_BUCKETS 8
5090 /* Note, bucket_map must have nsgs+1 entries. */ 5220 /* Note, bucket_map must have nsgs+1 entries. */
5091 for (i = 0; i <= nsgs; i++) { 5221 for (i = 0; i <= nsgs; i++) {
5092 /* Compute size of a command with i SG entries */ 5222 /* Compute size of a command with i SG entries */
5093 size = i + MINIMUM_TRANSFER_BLOCKS; 5223 size = i + min_blocks;
5094 b = num_buckets; /* Assume the biggest bucket */ 5224 b = num_buckets; /* Assume the biggest bucket */
5095 /* Find the bucket that is just big enough */ 5225 /* Find the bucket that is just big enough */
5096 for (j = 0; j < 8; j++) { 5226 for (j = 0; j < num_buckets; j++) {
5097 if (bucket[j] >= size) { 5227 if (bucket[j] >= size) {
5098 b = j; 5228 b = j;
5099 break; 5229 break;
@@ -5104,10 +5234,16 @@ static void calc_bucket_map(int bucket[], int num_buckets,
5104 } 5234 }
5105} 5235}
5106 5236
5107static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 use_short_tags) 5237static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
5108{ 5238{
5109 int i; 5239 int i;
5110 unsigned long register_value; 5240 unsigned long register_value;
5241 unsigned long transMethod = CFGTBL_Trans_Performant |
5242 (trans_support & CFGTBL_Trans_use_short_tags) |
5243 CFGTBL_Trans_enable_directed_msix |
5244 (trans_support & CFGTBL_Trans_io_accel1);
5245
5246 struct access_method access = SA5_performant_access;
5111 5247
5112 /* This is a bit complicated. There are 8 registers on 5248 /* This is a bit complicated. There are 8 registers on
5113 * the controller which we write to to tell it 8 different 5249 * the controller which we write to to tell it 8 different
@@ -5139,7 +5275,7 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 use_short_tags)
5139 5275
5140 bft[7] = SG_ENTRIES_IN_CMD + 4; 5276 bft[7] = SG_ENTRIES_IN_CMD + 4;
5141 calc_bucket_map(bft, ARRAY_SIZE(bft), 5277 calc_bucket_map(bft, ARRAY_SIZE(bft),
5142 SG_ENTRIES_IN_CMD, h->blockFetchTable); 5278 SG_ENTRIES_IN_CMD, 4, h->blockFetchTable);
5143 for (i = 0; i < 8; i++) 5279 for (i = 0; i < 8; i++)
5144 writel(bft[i], &h->transtable->BlockFetch[i]); 5280 writel(bft[i], &h->transtable->BlockFetch[i]);
5145 5281
@@ -5156,9 +5292,15 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 use_short_tags)
5156 &h->transtable->RepQAddr[i].lower); 5292 &h->transtable->RepQAddr[i].lower);
5157 } 5293 }
5158 5294
5159 writel(CFGTBL_Trans_Performant | use_short_tags | 5295 writel(transMethod, &(h->cfgtable->HostWrite.TransportRequest));
5160 CFGTBL_Trans_enable_directed_msix, 5296 /*
5161 &(h->cfgtable->HostWrite.TransportRequest)); 5297 * enable outbound interrupt coalescing in accelerator mode;
5298 */
5299 if (trans_support & CFGTBL_Trans_io_accel1) {
5300 access = SA5_ioaccel_mode1_access;
5301 writel(10, &h->cfgtable->HostWrite.CoalIntDelay);
5302 writel(4, &h->cfgtable->HostWrite.CoalIntCount);
5303 }
5162 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); 5304 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
5163 hpsa_wait_for_mode_change_ack(h); 5305 hpsa_wait_for_mode_change_ack(h);
5164 register_value = readl(&(h->cfgtable->TransportActive)); 5306 register_value = readl(&(h->cfgtable->TransportActive));
@@ -5168,18 +5310,102 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 use_short_tags)
5168 return; 5310 return;
5169 } 5311 }
5170 /* Change the access methods to the performant access methods */ 5312 /* Change the access methods to the performant access methods */
5171 h->access = SA5_performant_access; 5313 h->access = access;
5172 h->transMethod = CFGTBL_Trans_Performant; 5314 h->transMethod = transMethod;
5315
5316 if (!(trans_support & CFGTBL_Trans_io_accel1))
5317 return;
5318
5319 /* Set up I/O accelerator mode */
5320 for (i = 0; i < h->nreply_queues; i++) {
5321 writel(i, h->vaddr + IOACCEL_MODE1_REPLY_QUEUE_INDEX);
5322 h->reply_queue[i].current_entry =
5323 readl(h->vaddr + IOACCEL_MODE1_PRODUCER_INDEX);
5324 }
5325 bft[7] = IOACCEL1_MAXSGENTRIES + 8;
5326 calc_bucket_map(bft, ARRAY_SIZE(bft), IOACCEL1_MAXSGENTRIES, 8,
5327 h->ioaccel1_blockFetchTable);
5328
5329 /* initialize all reply queue entries to unused */
5330 memset(h->reply_pool, (u8) IOACCEL_MODE1_REPLY_UNUSED,
5331 h->reply_pool_size);
5332
5333 /* set all the constant fields in the accelerator command
5334 * frames once at init time to save CPU cycles later.
5335 */
5336 for (i = 0; i < h->nr_cmds; i++) {
5337 struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[i];
5338
5339 cp->function = IOACCEL1_FUNCTION_SCSIIO;
5340 cp->err_info = (u32) (h->errinfo_pool_dhandle +
5341 (i * sizeof(struct ErrorInfo)));
5342 cp->err_info_len = sizeof(struct ErrorInfo);
5343 cp->sgl_offset = IOACCEL1_SGLOFFSET;
5344 cp->host_context_flags = IOACCEL1_HCFLAGS_CISS_FORMAT;
5345 cp->timeout_sec = 0;
5346 cp->ReplyQueue = 0;
5347 cp->Tag.lower = (i << DIRECT_LOOKUP_SHIFT) | DIRECT_LOOKUP_BIT;
5348 cp->Tag.upper = 0;
5349 cp->host_addr.lower = (u32) (h->ioaccel_cmd_pool_dhandle +
5350 (i * sizeof(struct io_accel1_cmd)));
5351 cp->host_addr.upper = 0;
5352 }
5353}
5354
5355static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h)
5356{
5357 /* Command structures must be aligned on a 128-byte boundary
5358 * because the 7 lower bits of the address are used by the
5359 * hardware.
5360 */
5361#define IOACCEL1_COMMANDLIST_ALIGNMENT 128
5362 BUILD_BUG_ON(sizeof(struct io_accel1_cmd) %
5363 IOACCEL1_COMMANDLIST_ALIGNMENT);
5364 h->ioaccel_cmd_pool =
5365 pci_alloc_consistent(h->pdev,
5366 h->nr_cmds * sizeof(*h->ioaccel_cmd_pool),
5367 &(h->ioaccel_cmd_pool_dhandle));
5368
5369 h->ioaccel1_blockFetchTable =
5370 kmalloc(((IOACCEL1_MAXSGENTRIES + 1) *
5371 sizeof(u32)), GFP_KERNEL);
5372
5373 if ((h->ioaccel_cmd_pool == NULL) ||
5374 (h->ioaccel1_blockFetchTable == NULL))
5375 goto clean_up;
5376
5377 memset(h->ioaccel_cmd_pool, 0,
5378 h->nr_cmds * sizeof(*h->ioaccel_cmd_pool));
5379 return 0;
5380
5381clean_up:
5382 if (h->ioaccel_cmd_pool)
5383 pci_free_consistent(h->pdev,
5384 h->nr_cmds * sizeof(*h->ioaccel_cmd_pool),
5385 h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
5386 kfree(h->ioaccel1_blockFetchTable);
5387 return 1;
5173} 5388}
5174 5389
5175static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) 5390static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
5176{ 5391{
5177 u32 trans_support; 5392 u32 trans_support;
5393 unsigned long transMethod = CFGTBL_Trans_Performant |
5394 CFGTBL_Trans_use_short_tags;
5178 int i; 5395 int i;
5179 5396
5180 if (hpsa_simple_mode) 5397 if (hpsa_simple_mode)
5181 return; 5398 return;
5182 5399
5400 /* Check for I/O accelerator mode support */
5401 if (trans_support & CFGTBL_Trans_io_accel1) {
5402 transMethod |= CFGTBL_Trans_io_accel1 |
5403 CFGTBL_Trans_enable_directed_msix;
5404 if (hpsa_alloc_ioaccel_cmd_and_bft(h))
5405 goto clean_up;
5406 }
5407
5408 /* TODO, check that this next line h->nreply_queues is correct */
5183 trans_support = readl(&(h->cfgtable->TransportSupport)); 5409 trans_support = readl(&(h->cfgtable->TransportSupport));
5184 if (!(trans_support & PERFORMANT_MODE)) 5410 if (!(trans_support & PERFORMANT_MODE))
5185 return; 5411 return;
@@ -5206,9 +5432,7 @@ static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
5206 || (h->blockFetchTable == NULL)) 5432 || (h->blockFetchTable == NULL))
5207 goto clean_up; 5433 goto clean_up;
5208 5434
5209 hpsa_enter_performant_mode(h, 5435 hpsa_enter_performant_mode(h, trans_support);
5210 trans_support & CFGTBL_Trans_use_short_tags);
5211
5212 return; 5436 return;
5213 5437
5214clean_up: 5438clean_up:
@@ -5232,5 +5456,39 @@ static void __exit hpsa_cleanup(void)
5232 pci_unregister_driver(&hpsa_pci_driver); 5456 pci_unregister_driver(&hpsa_pci_driver);
5233} 5457}
5234 5458
5459static void __attribute__((unused)) verify_offsets(void)
5460{
5461#define VERIFY_OFFSET(member, offset) \
5462 BUILD_BUG_ON(offsetof(struct io_accel1_cmd, member) != offset)
5463
5464 VERIFY_OFFSET(dev_handle, 0x00);
5465 VERIFY_OFFSET(reserved1, 0x02);
5466 VERIFY_OFFSET(function, 0x03);
5467 VERIFY_OFFSET(reserved2, 0x04);
5468 VERIFY_OFFSET(err_info, 0x0C);
5469 VERIFY_OFFSET(reserved3, 0x10);
5470 VERIFY_OFFSET(err_info_len, 0x12);
5471 VERIFY_OFFSET(reserved4, 0x13);
5472 VERIFY_OFFSET(sgl_offset, 0x14);
5473 VERIFY_OFFSET(reserved5, 0x15);
5474 VERIFY_OFFSET(transfer_len, 0x1C);
5475 VERIFY_OFFSET(reserved6, 0x20);
5476 VERIFY_OFFSET(io_flags, 0x24);
5477 VERIFY_OFFSET(reserved7, 0x26);
5478 VERIFY_OFFSET(LUN, 0x34);
5479 VERIFY_OFFSET(control, 0x3C);
5480 VERIFY_OFFSET(CDB, 0x40);
5481 VERIFY_OFFSET(reserved8, 0x50);
5482 VERIFY_OFFSET(host_context_flags, 0x60);
5483 VERIFY_OFFSET(timeout_sec, 0x62);
5484 VERIFY_OFFSET(ReplyQueue, 0x64);
5485 VERIFY_OFFSET(reserved9, 0x65);
5486 VERIFY_OFFSET(Tag, 0x68);
5487 VERIFY_OFFSET(host_addr, 0x70);
5488 VERIFY_OFFSET(CISS_LUN, 0x78);
5489 VERIFY_OFFSET(SG, 0x78 + 8);
5490#undef VERIFY_OFFSET
5491}
5492
5235module_init(hpsa_init); 5493module_init(hpsa_init);
5236module_exit(hpsa_cleanup); 5494module_exit(hpsa_cleanup);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 01c328349c83..c7865f30ffd1 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -46,6 +46,7 @@ struct hpsa_scsi_dev_t {
46 unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ 46 unsigned char vendor[8]; /* bytes 8-15 of inquiry data */
47 unsigned char model[16]; /* bytes 16-31 of inquiry data */ 47 unsigned char model[16]; /* bytes 16-31 of inquiry data */
48 unsigned char raid_level; /* from inquiry page 0xC1 */ 48 unsigned char raid_level; /* from inquiry page 0xC1 */
49 u32 ioaccel_handle;
49}; 50};
50 51
51struct reply_pool { 52struct reply_pool {
@@ -95,6 +96,8 @@ struct ctlr_info {
95 /* pointers to command and error info pool */ 96 /* pointers to command and error info pool */
96 struct CommandList *cmd_pool; 97 struct CommandList *cmd_pool;
97 dma_addr_t cmd_pool_dhandle; 98 dma_addr_t cmd_pool_dhandle;
99 struct io_accel1_cmd *ioaccel_cmd_pool;
100 dma_addr_t ioaccel_cmd_pool_dhandle;
98 struct ErrorInfo *errinfo_pool; 101 struct ErrorInfo *errinfo_pool;
99 dma_addr_t errinfo_pool_dhandle; 102 dma_addr_t errinfo_pool_dhandle;
100 unsigned long *cmd_pool_bits; 103 unsigned long *cmd_pool_bits;
@@ -128,6 +131,7 @@ struct ctlr_info {
128 u8 nreply_queues; 131 u8 nreply_queues;
129 dma_addr_t reply_pool_dhandle; 132 dma_addr_t reply_pool_dhandle;
130 u32 *blockFetchTable; 133 u32 *blockFetchTable;
134 u32 *ioaccel1_blockFetchTable;
131 unsigned char *hba_inquiry_data; 135 unsigned char *hba_inquiry_data;
132 u64 last_intr_timestamp; 136 u64 last_intr_timestamp;
133 u32 last_heartbeat; 137 u32 last_heartbeat;
@@ -387,6 +391,45 @@ static bool SA5_performant_intr_pending(struct ctlr_info *h)
387 return register_value & SA5_OUTDB_STATUS_PERF_BIT; 391 return register_value & SA5_OUTDB_STATUS_PERF_BIT;
388} 392}
389 393
394#define SA5_IOACCEL_MODE1_INTR_STATUS_CMP_BIT 0x100
395
396static bool SA5_ioaccel_mode1_intr_pending(struct ctlr_info *h)
397{
398 unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
399
400 return (register_value & SA5_IOACCEL_MODE1_INTR_STATUS_CMP_BIT) ?
401 true : false;
402}
403
404#define IOACCEL_MODE1_REPLY_QUEUE_INDEX 0x1A0
405#define IOACCEL_MODE1_PRODUCER_INDEX 0x1B8
406#define IOACCEL_MODE1_CONSUMER_INDEX 0x1BC
407#define IOACCEL_MODE1_REPLY_UNUSED 0xFFFFFFFFFFFFFFFFULL
408
409static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h,
410 u8 q)
411{
412 u64 register_value;
413 struct reply_pool *rq = &h->reply_queue[q];
414 unsigned long flags;
415
416 BUG_ON(q >= h->nreply_queues);
417
418 register_value = rq->head[rq->current_entry];
419 if (register_value != IOACCEL_MODE1_REPLY_UNUSED) {
420 rq->head[rq->current_entry] = IOACCEL_MODE1_REPLY_UNUSED;
421 if (++rq->current_entry == rq->size)
422 rq->current_entry = 0;
423 spin_lock_irqsave(&h->lock, flags);
424 h->commands_outstanding--;
425 spin_unlock_irqrestore(&h->lock, flags);
426 } else {
427 writel((q << 24) | rq->current_entry,
428 h->vaddr + IOACCEL_MODE1_CONSUMER_INDEX);
429 }
430 return (unsigned long) register_value;
431}
432
390static struct access_method SA5_access = { 433static struct access_method SA5_access = {
391 SA5_submit_command, 434 SA5_submit_command,
392 SA5_intr_mask, 435 SA5_intr_mask,
@@ -395,6 +438,14 @@ static struct access_method SA5_access = {
395 SA5_completed, 438 SA5_completed,
396}; 439};
397 440
441static struct access_method SA5_ioaccel_mode1_access = {
442 SA5_submit_command,
443 SA5_performant_intr_mask,
444 SA5_fifo_full,
445 SA5_ioaccel_mode1_intr_pending,
446 SA5_ioaccel_mode1_completed,
447};
448
398static struct access_method SA5_performant_access = { 449static struct access_method SA5_performant_access = {
399 SA5_submit_command, 450 SA5_submit_command,
400 SA5_performant_intr_mask, 451 SA5_performant_intr_mask,
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 22cf799a2a1d..e682d2e6b387 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -129,6 +129,7 @@
129 129
130#define CFGTBL_Trans_Simple 0x00000002l 130#define CFGTBL_Trans_Simple 0x00000002l
131#define CFGTBL_Trans_Performant 0x00000004l 131#define CFGTBL_Trans_Performant 0x00000004l
132#define CFGTBL_Trans_io_accel1 0x00000080l
132#define CFGTBL_Trans_use_short_tags 0x20000000l 133#define CFGTBL_Trans_use_short_tags 0x20000000l
133#define CFGTBL_Trans_enable_directed_msix (1 << 30) 134#define CFGTBL_Trans_enable_directed_msix (1 << 30)
134 135
@@ -285,6 +286,7 @@ struct ErrorInfo {
285/* Command types */ 286/* Command types */
286#define CMD_IOCTL_PEND 0x01 287#define CMD_IOCTL_PEND 0x01
287#define CMD_SCSI 0x03 288#define CMD_SCSI 0x03
289#define CMD_IOACCEL1 0x04
288 290
289#define DIRECT_LOOKUP_SHIFT 5 291#define DIRECT_LOOKUP_SHIFT 5
290#define DIRECT_LOOKUP_BIT 0x10 292#define DIRECT_LOOKUP_BIT 0x10
@@ -335,6 +337,63 @@ struct CommandList {
335 u8 pad[COMMANDLIST_PAD]; 337 u8 pad[COMMANDLIST_PAD];
336}; 338};
337 339
340/* Max S/G elements in I/O accelerator command */
341#define IOACCEL1_MAXSGENTRIES 24
342
343/*
344 * Structure for I/O accelerator (mode 1) commands.
345 * Note that this structure must be 128-byte aligned in size.
346 */
347struct io_accel1_cmd {
348 u16 dev_handle; /* 0x00 - 0x01 */
349 u8 reserved1; /* 0x02 */
350 u8 function; /* 0x03 */
351 u8 reserved2[8]; /* 0x04 - 0x0B */
352 u32 err_info; /* 0x0C - 0x0F */
353 u8 reserved3[2]; /* 0x10 - 0x11 */
354 u8 err_info_len; /* 0x12 */
355 u8 reserved4; /* 0x13 */
356 u8 sgl_offset; /* 0x14 */
357 u8 reserved5[7]; /* 0x15 - 0x1B */
358 u32 transfer_len; /* 0x1C - 0x1F */
359 u8 reserved6[4]; /* 0x20 - 0x23 */
360 u16 io_flags; /* 0x24 - 0x25 */
361 u8 reserved7[14]; /* 0x26 - 0x33 */
362 u8 LUN[8]; /* 0x34 - 0x3B */
363 u32 control; /* 0x3C - 0x3F */
364 u8 CDB[16]; /* 0x40 - 0x4F */
365 u8 reserved8[16]; /* 0x50 - 0x5F */
366 u16 host_context_flags; /* 0x60 - 0x61 */
367 u16 timeout_sec; /* 0x62 - 0x63 */
368 u8 ReplyQueue; /* 0x64 */
369 u8 reserved9[3]; /* 0x65 - 0x67 */
370 struct vals32 Tag; /* 0x68 - 0x6F */
371 struct vals32 host_addr; /* 0x70 - 0x77 */
372 u8 CISS_LUN[8]; /* 0x78 - 0x7F */
373 struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES];
374};
375
376#define IOACCEL1_FUNCTION_SCSIIO 0x00
377#define IOACCEL1_SGLOFFSET 32
378
379#define IOACCEL1_IOFLAGS_IO_REQ 0x4000
380#define IOACCEL1_IOFLAGS_CDBLEN_MASK 0x001F
381#define IOACCEL1_IOFLAGS_CDBLEN_MAX 16
382
383#define IOACCEL1_CONTROL_NODATAXFER 0x00000000
384#define IOACCEL1_CONTROL_DATA_OUT 0x01000000
385#define IOACCEL1_CONTROL_DATA_IN 0x02000000
386#define IOACCEL1_CONTROL_TASKPRIO_MASK 0x00007800
387#define IOACCEL1_CONTROL_TASKPRIO_SHIFT 11
388#define IOACCEL1_CONTROL_SIMPLEQUEUE 0x00000000
389#define IOACCEL1_CONTROL_HEADOFQUEUE 0x00000100
390#define IOACCEL1_CONTROL_ORDEREDQUEUE 0x00000200
391#define IOACCEL1_CONTROL_ACA 0x00000400
392
393#define IOACCEL1_HCFLAGS_CISS_FORMAT 0x0013
394
395#define IOACCEL1_BUSADDR_CMDTYPE 0x00000060
396
338/* Configuration Table Structure */ 397/* Configuration Table Structure */
339struct HostWrite { 398struct HostWrite {
340 u32 TransportRequest; 399 u32 TransportRequest;
@@ -346,6 +405,7 @@ struct HostWrite {
346#define SIMPLE_MODE 0x02 405#define SIMPLE_MODE 0x02
347#define PERFORMANT_MODE 0x04 406#define PERFORMANT_MODE 0x04
348#define MEMQ_MODE 0x08 407#define MEMQ_MODE 0x08
408#define IOACCEL_MODE_1 0x80
349 409
350struct CfgTable { 410struct CfgTable {
351 u8 Signature[4]; 411 u8 Signature[4];