aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Gates <matthew.gates@hp.com>2012-05-01 12:43:06 -0400
committerJames Bottomley <JBottomley@Parallels.com>2012-05-10 04:16:25 -0400
commit254f796b9f22b1944c64caabc356a56caaa2facd (patch)
tree4c5ee6950b15456b0d11c3c36587fa659baf6632
parent1d94f94d89848762306b4a8bd5e658c11828ab12 (diff)
[SCSI] hpsa: use multiple reply queues
Smart Arrays can support multiple reply queues onto which command completions may be deposited. It can help performance quite a bit to arrange for command completions to be processed on the same CPU from which they were submitted to increase the likelihood of cache hits. Signed-off-by: Matt Gates <matthew.gates@hp.com> Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/hpsa.c181
-rw-r--r--drivers/scsi/hpsa.h40
-rw-r--r--drivers/scsi/hpsa_cmd.h5
3 files changed, 153 insertions, 73 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index bf5ed873a33e..e4b27c449ec1 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -172,7 +172,7 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
172static void calc_bucket_map(int *bucket, int num_buckets, 172static void calc_bucket_map(int *bucket, int num_buckets,
173 int nsgs, int *bucket_map); 173 int nsgs, int *bucket_map);
174static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h); 174static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h);
175static inline u32 next_command(struct ctlr_info *h); 175static inline u32 next_command(struct ctlr_info *h, u8 q);
176static int __devinit hpsa_find_cfg_addrs(struct pci_dev *pdev, 176static int __devinit hpsa_find_cfg_addrs(struct pci_dev *pdev,
177 void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index, 177 void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
178 u64 *cfg_offset); 178 u64 *cfg_offset);
@@ -529,24 +529,25 @@ static inline void addQ(struct list_head *list, struct CommandList *c)
529 list_add_tail(&c->list, list); 529 list_add_tail(&c->list, list);
530} 530}
531 531
532static inline u32 next_command(struct ctlr_info *h) 532static inline u32 next_command(struct ctlr_info *h, u8 q)
533{ 533{
534 u32 a; 534 u32 a;
535 struct reply_pool *rq = &h->reply_queue[q];
535 536
536 if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) 537 if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
537 return h->access.command_completed(h); 538 return h->access.command_completed(h, q);
538 539
539 if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { 540 if ((rq->head[rq->current_entry] & 1) == rq->wraparound) {
540 a = *(h->reply_pool_head); /* Next cmd in ring buffer */ 541 a = rq->head[rq->current_entry];
541 (h->reply_pool_head)++; 542 rq->current_entry++;
542 h->commands_outstanding--; 543 h->commands_outstanding--;
543 } else { 544 } else {
544 a = FIFO_EMPTY; 545 a = FIFO_EMPTY;
545 } 546 }
546 /* Check for wraparound */ 547 /* Check for wraparound */
547 if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { 548 if (rq->current_entry == h->max_commands) {
548 h->reply_pool_head = h->reply_pool; 549 rq->current_entry = 0;
549 h->reply_pool_wraparound ^= 1; 550 rq->wraparound ^= 1;
550 } 551 }
551 return a; 552 return a;
552} 553}
@@ -557,8 +558,12 @@ static inline u32 next_command(struct ctlr_info *h)
557 */ 558 */
558static void set_performant_mode(struct ctlr_info *h, struct CommandList *c) 559static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
559{ 560{
560 if (likely(h->transMethod & CFGTBL_Trans_Performant)) 561 if (likely(h->transMethod & CFGTBL_Trans_Performant)) {
561 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); 562 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
563 if (likely(h->msix_vector))
564 c->Header.ReplyQueue =
565 smp_processor_id() % h->nreply_queues;
566 }
562} 567}
563 568
564static void enqueue_cmd_and_start_io(struct ctlr_info *h, 569static void enqueue_cmd_and_start_io(struct ctlr_info *h,
@@ -3323,9 +3328,9 @@ static void start_io(struct ctlr_info *h)
3323 } 3328 }
3324} 3329}
3325 3330
3326static inline unsigned long get_next_completion(struct ctlr_info *h) 3331static inline unsigned long get_next_completion(struct ctlr_info *h, u8 q)
3327{ 3332{
3328 return h->access.command_completed(h); 3333 return h->access.command_completed(h, q);
3329} 3334}
3330 3335
3331static inline bool interrupt_pending(struct ctlr_info *h) 3336static inline bool interrupt_pending(struct ctlr_info *h)
@@ -3428,9 +3433,20 @@ static int ignore_bogus_interrupt(struct ctlr_info *h)
3428 return 1; 3433 return 1;
3429} 3434}
3430 3435
3431static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id) 3436/*
3437 * Convert &h->q[x] (passed to interrupt handlers) back to h.
3438 * Relies on (h-q[x] == x) being true for x such that
3439 * 0 <= x < MAX_REPLY_QUEUES.
3440 */
3441static struct ctlr_info *queue_to_hba(u8 *queue)
3432{ 3442{
3433 struct ctlr_info *h = dev_id; 3443 return container_of((queue - *queue), struct ctlr_info, q[0]);
3444}
3445
3446static irqreturn_t hpsa_intx_discard_completions(int irq, void *queue)
3447{
3448 struct ctlr_info *h = queue_to_hba(queue);
3449 u8 q = *(u8 *) queue;
3434 unsigned long flags; 3450 unsigned long flags;
3435 u32 raw_tag; 3451 u32 raw_tag;
3436 3452
@@ -3442,71 +3458,75 @@ static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id)
3442 spin_lock_irqsave(&h->lock, flags); 3458 spin_lock_irqsave(&h->lock, flags);
3443 h->last_intr_timestamp = get_jiffies_64(); 3459 h->last_intr_timestamp = get_jiffies_64();
3444 while (interrupt_pending(h)) { 3460 while (interrupt_pending(h)) {
3445 raw_tag = get_next_completion(h); 3461 raw_tag = get_next_completion(h, q);
3446 while (raw_tag != FIFO_EMPTY) 3462 while (raw_tag != FIFO_EMPTY)
3447 raw_tag = next_command(h); 3463 raw_tag = next_command(h, q);
3448 } 3464 }
3449 spin_unlock_irqrestore(&h->lock, flags); 3465 spin_unlock_irqrestore(&h->lock, flags);
3450 return IRQ_HANDLED; 3466 return IRQ_HANDLED;
3451} 3467}
3452 3468
3453static irqreturn_t hpsa_msix_discard_completions(int irq, void *dev_id) 3469static irqreturn_t hpsa_msix_discard_completions(int irq, void *queue)
3454{ 3470{
3455 struct ctlr_info *h = dev_id; 3471 struct ctlr_info *h = queue_to_hba(queue);
3456 unsigned long flags; 3472 unsigned long flags;
3457 u32 raw_tag; 3473 u32 raw_tag;
3474 u8 q = *(u8 *) queue;
3458 3475
3459 if (ignore_bogus_interrupt(h)) 3476 if (ignore_bogus_interrupt(h))
3460 return IRQ_NONE; 3477 return IRQ_NONE;
3461 3478
3462 spin_lock_irqsave(&h->lock, flags); 3479 spin_lock_irqsave(&h->lock, flags);
3480
3463 h->last_intr_timestamp = get_jiffies_64(); 3481 h->last_intr_timestamp = get_jiffies_64();
3464 raw_tag = get_next_completion(h); 3482 raw_tag = get_next_completion(h, q);
3465 while (raw_tag != FIFO_EMPTY) 3483 while (raw_tag != FIFO_EMPTY)
3466 raw_tag = next_command(h); 3484 raw_tag = next_command(h, q);
3467 spin_unlock_irqrestore(&h->lock, flags); 3485 spin_unlock_irqrestore(&h->lock, flags);
3468 return IRQ_HANDLED; 3486 return IRQ_HANDLED;
3469} 3487}
3470 3488
3471static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id) 3489static irqreturn_t do_hpsa_intr_intx(int irq, void *queue)
3472{ 3490{
3473 struct ctlr_info *h = dev_id; 3491 struct ctlr_info *h = queue_to_hba((u8 *) queue);
3474 unsigned long flags; 3492 unsigned long flags;
3475 u32 raw_tag; 3493 u32 raw_tag;
3494 u8 q = *(u8 *) queue;
3476 3495
3477 if (interrupt_not_for_us(h)) 3496 if (interrupt_not_for_us(h))
3478 return IRQ_NONE; 3497 return IRQ_NONE;
3479 spin_lock_irqsave(&h->lock, flags); 3498 spin_lock_irqsave(&h->lock, flags);
3480 h->last_intr_timestamp = get_jiffies_64(); 3499 h->last_intr_timestamp = get_jiffies_64();
3481 while (interrupt_pending(h)) { 3500 while (interrupt_pending(h)) {
3482 raw_tag = get_next_completion(h); 3501 raw_tag = get_next_completion(h, q);
3483 while (raw_tag != FIFO_EMPTY) { 3502 while (raw_tag != FIFO_EMPTY) {
3484 if (likely(hpsa_tag_contains_index(raw_tag))) 3503 if (likely(hpsa_tag_contains_index(raw_tag)))
3485 process_indexed_cmd(h, raw_tag); 3504 process_indexed_cmd(h, raw_tag);
3486 else 3505 else
3487 process_nonindexed_cmd(h, raw_tag); 3506 process_nonindexed_cmd(h, raw_tag);
3488 raw_tag = next_command(h); 3507 raw_tag = next_command(h, q);
3489 } 3508 }
3490 } 3509 }
3491 spin_unlock_irqrestore(&h->lock, flags); 3510 spin_unlock_irqrestore(&h->lock, flags);
3492 return IRQ_HANDLED; 3511 return IRQ_HANDLED;
3493} 3512}
3494 3513
3495static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id) 3514static irqreturn_t do_hpsa_intr_msi(int irq, void *queue)
3496{ 3515{
3497 struct ctlr_info *h = dev_id; 3516 struct ctlr_info *h = queue_to_hba(queue);
3498 unsigned long flags; 3517 unsigned long flags;
3499 u32 raw_tag; 3518 u32 raw_tag;
3519 u8 q = *(u8 *) queue;
3500 3520
3501 spin_lock_irqsave(&h->lock, flags); 3521 spin_lock_irqsave(&h->lock, flags);
3502 h->last_intr_timestamp = get_jiffies_64(); 3522 h->last_intr_timestamp = get_jiffies_64();
3503 raw_tag = get_next_completion(h); 3523 raw_tag = get_next_completion(h, q);
3504 while (raw_tag != FIFO_EMPTY) { 3524 while (raw_tag != FIFO_EMPTY) {
3505 if (likely(hpsa_tag_contains_index(raw_tag))) 3525 if (likely(hpsa_tag_contains_index(raw_tag)))
3506 process_indexed_cmd(h, raw_tag); 3526 process_indexed_cmd(h, raw_tag);
3507 else 3527 else
3508 process_nonindexed_cmd(h, raw_tag); 3528 process_nonindexed_cmd(h, raw_tag);
3509 raw_tag = next_command(h); 3529 raw_tag = next_command(h, q);
3510 } 3530 }
3511 spin_unlock_irqrestore(&h->lock, flags); 3531 spin_unlock_irqrestore(&h->lock, flags);
3512 return IRQ_HANDLED; 3532 return IRQ_HANDLED;
@@ -3942,10 +3962,13 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
3942static void __devinit hpsa_interrupt_mode(struct ctlr_info *h) 3962static void __devinit hpsa_interrupt_mode(struct ctlr_info *h)
3943{ 3963{
3944#ifdef CONFIG_PCI_MSI 3964#ifdef CONFIG_PCI_MSI
3945 int err; 3965 int err, i;
3946 struct msix_entry hpsa_msix_entries[4] = { {0, 0}, {0, 1}, 3966 struct msix_entry hpsa_msix_entries[MAX_REPLY_QUEUES];
3947 {0, 2}, {0, 3} 3967
3948 }; 3968 for (i = 0; i < MAX_REPLY_QUEUES; i++) {
3969 hpsa_msix_entries[i].vector = 0;
3970 hpsa_msix_entries[i].entry = i;
3971 }
3949 3972
3950 /* Some boards advertise MSI but don't really support it */ 3973 /* Some boards advertise MSI but don't really support it */
3951 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || 3974 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
@@ -3953,12 +3976,11 @@ static void __devinit hpsa_interrupt_mode(struct ctlr_info *h)
3953 goto default_int_mode; 3976 goto default_int_mode;
3954 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { 3977 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
3955 dev_info(&h->pdev->dev, "MSIX\n"); 3978 dev_info(&h->pdev->dev, "MSIX\n");
3956 err = pci_enable_msix(h->pdev, hpsa_msix_entries, 4); 3979 err = pci_enable_msix(h->pdev, hpsa_msix_entries,
3980 MAX_REPLY_QUEUES);
3957 if (!err) { 3981 if (!err) {
3958 h->intr[0] = hpsa_msix_entries[0].vector; 3982 for (i = 0; i < MAX_REPLY_QUEUES; i++)
3959 h->intr[1] = hpsa_msix_entries[1].vector; 3983 h->intr[i] = hpsa_msix_entries[i].vector;
3960 h->intr[2] = hpsa_msix_entries[2].vector;
3961 h->intr[3] = hpsa_msix_entries[3].vector;
3962 h->msix_vector = 1; 3984 h->msix_vector = 1;
3963 return; 3985 return;
3964 } 3986 }
@@ -4372,14 +4394,33 @@ static int hpsa_request_irq(struct ctlr_info *h,
4372 irqreturn_t (*msixhandler)(int, void *), 4394 irqreturn_t (*msixhandler)(int, void *),
4373 irqreturn_t (*intxhandler)(int, void *)) 4395 irqreturn_t (*intxhandler)(int, void *))
4374{ 4396{
4375 int rc; 4397 int rc, i;
4376 4398
4377 if (h->msix_vector || h->msi_vector) 4399 /*
4378 rc = request_irq(h->intr[h->intr_mode], msixhandler, 4400 * initialize h->q[x] = x so that interrupt handlers know which
4379 0, h->devname, h); 4401 * queue to process.
4380 else 4402 */
4381 rc = request_irq(h->intr[h->intr_mode], intxhandler, 4403 for (i = 0; i < MAX_REPLY_QUEUES; i++)
4382 IRQF_SHARED, h->devname, h); 4404 h->q[i] = (u8) i;
4405
4406 if (h->intr_mode == PERF_MODE_INT && h->msix_vector) {
4407 /* If performant mode and MSI-X, use multiple reply queues */
4408 for (i = 0; i < MAX_REPLY_QUEUES; i++)
4409 rc = request_irq(h->intr[i], msixhandler,
4410 0, h->devname,
4411 &h->q[i]);
4412 } else {
4413 /* Use single reply pool */
4414 if (h->msix_vector || h->msi_vector) {
4415 rc = request_irq(h->intr[h->intr_mode],
4416 msixhandler, 0, h->devname,
4417 &h->q[h->intr_mode]);
4418 } else {
4419 rc = request_irq(h->intr[h->intr_mode],
4420 intxhandler, IRQF_SHARED, h->devname,
4421 &h->q[h->intr_mode]);
4422 }
4423 }
4383 if (rc) { 4424 if (rc) {
4384 dev_err(&h->pdev->dev, "unable to get irq %d for %s\n", 4425 dev_err(&h->pdev->dev, "unable to get irq %d for %s\n",
4385 h->intr[h->intr_mode], h->devname); 4426 h->intr[h->intr_mode], h->devname);
@@ -4412,9 +4453,24 @@ static int __devinit hpsa_kdump_soft_reset(struct ctlr_info *h)
4412 return 0; 4453 return 0;
4413} 4454}
4414 4455
4456static void free_irqs(struct ctlr_info *h)
4457{
4458 int i;
4459
4460 if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
4461 /* Single reply queue, only one irq to free */
4462 i = h->intr_mode;
4463 free_irq(h->intr[i], &h->q[i]);
4464 return;
4465 }
4466
4467 for (i = 0; i < MAX_REPLY_QUEUES; i++)
4468 free_irq(h->intr[i], &h->q[i]);
4469}
4470
4415static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) 4471static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
4416{ 4472{
4417 free_irq(h->intr[h->intr_mode], h); 4473 free_irqs(h);
4418#ifdef CONFIG_PCI_MSI 4474#ifdef CONFIG_PCI_MSI
4419 if (h->msix_vector) 4475 if (h->msix_vector)
4420 pci_disable_msix(h->pdev); 4476 pci_disable_msix(h->pdev);
@@ -4682,7 +4738,7 @@ reinit_after_soft_reset:
4682 spin_lock_irqsave(&h->lock, flags); 4738 spin_lock_irqsave(&h->lock, flags);
4683 h->access.set_intr_mask(h, HPSA_INTR_OFF); 4739 h->access.set_intr_mask(h, HPSA_INTR_OFF);
4684 spin_unlock_irqrestore(&h->lock, flags); 4740 spin_unlock_irqrestore(&h->lock, flags);
4685 free_irq(h->intr[h->intr_mode], h); 4741 free_irqs(h);
4686 rc = hpsa_request_irq(h, hpsa_msix_discard_completions, 4742 rc = hpsa_request_irq(h, hpsa_msix_discard_completions,
4687 hpsa_intx_discard_completions); 4743 hpsa_intx_discard_completions);
4688 if (rc) { 4744 if (rc) {
@@ -4732,7 +4788,7 @@ reinit_after_soft_reset:
4732clean4: 4788clean4:
4733 hpsa_free_sg_chain_blocks(h); 4789 hpsa_free_sg_chain_blocks(h);
4734 hpsa_free_cmd_pool(h); 4790 hpsa_free_cmd_pool(h);
4735 free_irq(h->intr[h->intr_mode], h); 4791 free_irqs(h);
4736clean2: 4792clean2:
4737clean1: 4793clean1:
4738 kfree(h); 4794 kfree(h);
@@ -4775,7 +4831,7 @@ static void hpsa_shutdown(struct pci_dev *pdev)
4775 */ 4831 */
4776 hpsa_flush_cache(h); 4832 hpsa_flush_cache(h);
4777 h->access.set_intr_mask(h, HPSA_INTR_OFF); 4833 h->access.set_intr_mask(h, HPSA_INTR_OFF);
4778 free_irq(h->intr[h->intr_mode], h); 4834 free_irqs(h);
4779#ifdef CONFIG_PCI_MSI 4835#ifdef CONFIG_PCI_MSI
4780 if (h->msix_vector) 4836 if (h->msix_vector)
4781 pci_disable_msix(h->pdev); 4837 pci_disable_msix(h->pdev);
@@ -4915,11 +4971,8 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h,
4915 * 10 = 6 s/g entry or 24k 4971 * 10 = 6 s/g entry or 24k
4916 */ 4972 */
4917 4973
4918 h->reply_pool_wraparound = 1; /* spec: init to 1 */
4919
4920 /* Controller spec: zero out this buffer. */ 4974 /* Controller spec: zero out this buffer. */
4921 memset(h->reply_pool, 0, h->reply_pool_size); 4975 memset(h->reply_pool, 0, h->reply_pool_size);
4922 h->reply_pool_head = h->reply_pool;
4923 4976
4924 bft[7] = SG_ENTRIES_IN_CMD + 4; 4977 bft[7] = SG_ENTRIES_IN_CMD + 4;
4925 calc_bucket_map(bft, ARRAY_SIZE(bft), 4978 calc_bucket_map(bft, ARRAY_SIZE(bft),
@@ -4929,12 +4982,19 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h,
4929 4982
4930 /* size of controller ring buffer */ 4983 /* size of controller ring buffer */
4931 writel(h->max_commands, &h->transtable->RepQSize); 4984 writel(h->max_commands, &h->transtable->RepQSize);
4932 writel(1, &h->transtable->RepQCount); 4985 writel(h->nreply_queues, &h->transtable->RepQCount);
4933 writel(0, &h->transtable->RepQCtrAddrLow32); 4986 writel(0, &h->transtable->RepQCtrAddrLow32);
4934 writel(0, &h->transtable->RepQCtrAddrHigh32); 4987 writel(0, &h->transtable->RepQCtrAddrHigh32);
4935 writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); 4988
4936 writel(0, &h->transtable->RepQAddr0High32); 4989 for (i = 0; i < h->nreply_queues; i++) {
4937 writel(CFGTBL_Trans_Performant | use_short_tags, 4990 writel(0, &h->transtable->RepQAddr[i].upper);
4991 writel(h->reply_pool_dhandle +
4992 (h->max_commands * sizeof(u64) * i),
4993 &h->transtable->RepQAddr[i].lower);
4994 }
4995
4996 writel(CFGTBL_Trans_Performant | use_short_tags |
4997 CFGTBL_Trans_enable_directed_msix,
4938 &(h->cfgtable->HostWrite.TransportRequest)); 4998 &(h->cfgtable->HostWrite.TransportRequest));
4939 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); 4999 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
4940 hpsa_wait_for_mode_change_ack(h); 5000 hpsa_wait_for_mode_change_ack(h);
@@ -4952,6 +5012,7 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h,
4952static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) 5012static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
4953{ 5013{
4954 u32 trans_support; 5014 u32 trans_support;
5015 int i;
4955 5016
4956 if (hpsa_simple_mode) 5017 if (hpsa_simple_mode)
4957 return; 5018 return;
@@ -4960,12 +5021,20 @@ static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
4960 if (!(trans_support & PERFORMANT_MODE)) 5021 if (!(trans_support & PERFORMANT_MODE))
4961 return; 5022 return;
4962 5023
5024 h->nreply_queues = h->msix_vector ? MAX_REPLY_QUEUES : 1;
4963 hpsa_get_max_perf_mode_cmds(h); 5025 hpsa_get_max_perf_mode_cmds(h);
4964 /* Performant mode ring buffer and supporting data structures */ 5026 /* Performant mode ring buffer and supporting data structures */
4965 h->reply_pool_size = h->max_commands * sizeof(u64); 5027 h->reply_pool_size = h->max_commands * sizeof(u64) * h->nreply_queues;
4966 h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size, 5028 h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size,
4967 &(h->reply_pool_dhandle)); 5029 &(h->reply_pool_dhandle));
4968 5030
5031 for (i = 0; i < h->nreply_queues; i++) {
5032 h->reply_queue[i].head = &h->reply_pool[h->max_commands * i];
5033 h->reply_queue[i].size = h->max_commands;
5034 h->reply_queue[i].wraparound = 1; /* spec: init to 1 */
5035 h->reply_queue[i].current_entry = 0;
5036 }
5037
4969 /* Need a block fetch table for performant mode */ 5038 /* Need a block fetch table for performant mode */
4970 h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) * 5039 h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) *
4971 sizeof(u32)), GFP_KERNEL); 5040 sizeof(u32)), GFP_KERNEL);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index d8aa95c43f4d..486a7c099246 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -34,7 +34,7 @@ struct access_method {
34 void (*set_intr_mask)(struct ctlr_info *h, unsigned long val); 34 void (*set_intr_mask)(struct ctlr_info *h, unsigned long val);
35 unsigned long (*fifo_full)(struct ctlr_info *h); 35 unsigned long (*fifo_full)(struct ctlr_info *h);
36 bool (*intr_pending)(struct ctlr_info *h); 36 bool (*intr_pending)(struct ctlr_info *h);
37 unsigned long (*command_completed)(struct ctlr_info *h); 37 unsigned long (*command_completed)(struct ctlr_info *h, u8 q);
38}; 38};
39 39
40struct hpsa_scsi_dev_t { 40struct hpsa_scsi_dev_t {
@@ -48,6 +48,13 @@ struct hpsa_scsi_dev_t {
48 unsigned char raid_level; /* from inquiry page 0xC1 */ 48 unsigned char raid_level; /* from inquiry page 0xC1 */
49}; 49};
50 50
51struct reply_pool {
52 u64 *head;
53 size_t size;
54 u8 wraparound;
55 u32 current_entry;
56};
57
51struct ctlr_info { 58struct ctlr_info {
52 int ctlr; 59 int ctlr;
53 char devname[8]; 60 char devname[8];
@@ -68,7 +75,7 @@ struct ctlr_info {
68# define DOORBELL_INT 1 75# define DOORBELL_INT 1
69# define SIMPLE_MODE_INT 2 76# define SIMPLE_MODE_INT 2
70# define MEMQ_MODE_INT 3 77# define MEMQ_MODE_INT 3
71 unsigned int intr[4]; 78 unsigned int intr[MAX_REPLY_QUEUES];
72 unsigned int msix_vector; 79 unsigned int msix_vector;
73 unsigned int msi_vector; 80 unsigned int msi_vector;
74 int intr_mode; /* either PERF_MODE_INT or SIMPLE_MODE_INT */ 81 int intr_mode; /* either PERF_MODE_INT or SIMPLE_MODE_INT */
@@ -111,13 +118,13 @@ struct ctlr_info {
111 unsigned long transMethod; 118 unsigned long transMethod;
112 119
113 /* 120 /*
114 * Performant mode completion buffer 121 * Performant mode completion buffers
115 */ 122 */
116 u64 *reply_pool; 123 u64 *reply_pool;
117 dma_addr_t reply_pool_dhandle;
118 u64 *reply_pool_head;
119 size_t reply_pool_size; 124 size_t reply_pool_size;
120 unsigned char reply_pool_wraparound; 125 struct reply_pool reply_queue[MAX_REPLY_QUEUES];
126 u8 nreply_queues;
127 dma_addr_t reply_pool_dhandle;
121 u32 *blockFetchTable; 128 u32 *blockFetchTable;
122 unsigned char *hba_inquiry_data; 129 unsigned char *hba_inquiry_data;
123 u64 last_intr_timestamp; 130 u64 last_intr_timestamp;
@@ -125,6 +132,8 @@ struct ctlr_info {
125 u64 last_heartbeat_timestamp; 132 u64 last_heartbeat_timestamp;
126 u32 lockup_detected; 133 u32 lockup_detected;
127 struct list_head lockup_list; 134 struct list_head lockup_list;
135 /* Address of h->q[x] is passed to intr handler to know which queue */
136 u8 q[MAX_REPLY_QUEUES];
128 u32 TMFSupportFlags; /* cache what task mgmt funcs are supported. */ 137 u32 TMFSupportFlags; /* cache what task mgmt funcs are supported. */
129#define HPSATMF_BITS_SUPPORTED (1 << 0) 138#define HPSATMF_BITS_SUPPORTED (1 << 0)
130#define HPSATMF_PHYS_LUN_RESET (1 << 1) 139#define HPSATMF_PHYS_LUN_RESET (1 << 1)
@@ -275,8 +284,9 @@ static void SA5_performant_intr_mask(struct ctlr_info *h, unsigned long val)
275 } 284 }
276} 285}
277 286
278static unsigned long SA5_performant_completed(struct ctlr_info *h) 287static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q)
279{ 288{
289 struct reply_pool *rq = &h->reply_queue[q];
280 unsigned long register_value = FIFO_EMPTY; 290 unsigned long register_value = FIFO_EMPTY;
281 291
282 /* msi auto clears the interrupt pending bit. */ 292 /* msi auto clears the interrupt pending bit. */
@@ -292,19 +302,18 @@ static unsigned long SA5_performant_completed(struct ctlr_info *h)
292 register_value = readl(h->vaddr + SA5_OUTDB_STATUS); 302 register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
293 } 303 }
294 304
295 if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { 305 if ((rq->head[rq->current_entry] & 1) == rq->wraparound) {
296 register_value = *(h->reply_pool_head); 306 register_value = rq->head[rq->current_entry];
297 (h->reply_pool_head)++; 307 rq->current_entry++;
298 h->commands_outstanding--; 308 h->commands_outstanding--;
299 } else { 309 } else {
300 register_value = FIFO_EMPTY; 310 register_value = FIFO_EMPTY;
301 } 311 }
302 /* Check for wraparound */ 312 /* Check for wraparound */
303 if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { 313 if (rq->current_entry == h->max_commands) {
304 h->reply_pool_head = h->reply_pool; 314 rq->current_entry = 0;
305 h->reply_pool_wraparound ^= 1; 315 rq->wraparound ^= 1;
306 } 316 }
307
308 return register_value; 317 return register_value;
309} 318}
310 319
@@ -324,7 +333,8 @@ static unsigned long SA5_fifo_full(struct ctlr_info *h)
324 * returns value read from hardware. 333 * returns value read from hardware.
325 * returns FIFO_EMPTY if there is nothing to read 334 * returns FIFO_EMPTY if there is nothing to read
326 */ 335 */
327static unsigned long SA5_completed(struct ctlr_info *h) 336static unsigned long SA5_completed(struct ctlr_info *h,
337 __attribute__((unused)) u8 q)
328{ 338{
329 unsigned long register_value 339 unsigned long register_value
330 = readl(h->vaddr + SA5_REPLY_PORT_OFFSET); 340 = readl(h->vaddr + SA5_REPLY_PORT_OFFSET);
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 14b56c93cefa..43f163164b24 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -129,6 +129,7 @@
129#define CFGTBL_Trans_Simple 0x00000002l 129#define CFGTBL_Trans_Simple 0x00000002l
130#define CFGTBL_Trans_Performant 0x00000004l 130#define CFGTBL_Trans_Performant 0x00000004l
131#define CFGTBL_Trans_use_short_tags 0x20000000l 131#define CFGTBL_Trans_use_short_tags 0x20000000l
132#define CFGTBL_Trans_enable_directed_msix (1 << 30)
132 133
133#define CFGTBL_BusType_Ultra2 0x00000001l 134#define CFGTBL_BusType_Ultra2 0x00000001l
134#define CFGTBL_BusType_Ultra3 0x00000002l 135#define CFGTBL_BusType_Ultra3 0x00000002l
@@ -380,8 +381,8 @@ struct TransTable_struct {
380 u32 RepQCount; 381 u32 RepQCount;
381 u32 RepQCtrAddrLow32; 382 u32 RepQCtrAddrLow32;
382 u32 RepQCtrAddrHigh32; 383 u32 RepQCtrAddrHigh32;
383 u32 RepQAddr0Low32; 384#define MAX_REPLY_QUEUES 8
384 u32 RepQAddr0High32; 385 struct vals32 RepQAddr[MAX_REPLY_QUEUES];
385}; 386};
386 387
387struct hpsa_pci_info { 388struct hpsa_pci_info {