diff options
author | Matt Gates <matthew.gates@hp.com> | 2012-05-01 12:43:06 -0400 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2012-05-10 04:16:25 -0400 |
commit | 254f796b9f22b1944c64caabc356a56caaa2facd (patch) | |
tree | 4c5ee6950b15456b0d11c3c36587fa659baf6632 | |
parent | 1d94f94d89848762306b4a8bd5e658c11828ab12 (diff) |
[SCSI] hpsa: use multiple reply queues
Smart Arrays can support multiple reply queues onto which command
completions may be deposited. It can help performance quite a bit
to arrange for command completions to be processed on the same CPU
from which they were submitted to increase the likelihood of cache
hits.
Signed-off-by: Matt Gates <matthew.gates@hp.com>
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r-- | drivers/scsi/hpsa.c | 181 | ||||
-rw-r--r-- | drivers/scsi/hpsa.h | 40 | ||||
-rw-r--r-- | drivers/scsi/hpsa_cmd.h | 5 |
3 files changed, 153 insertions, 73 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index bf5ed873a33e..e4b27c449ec1 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c | |||
@@ -172,7 +172,7 @@ static void check_ioctl_unit_attention(struct ctlr_info *h, | |||
172 | static void calc_bucket_map(int *bucket, int num_buckets, | 172 | static void calc_bucket_map(int *bucket, int num_buckets, |
173 | int nsgs, int *bucket_map); | 173 | int nsgs, int *bucket_map); |
174 | static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h); | 174 | static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h); |
175 | static inline u32 next_command(struct ctlr_info *h); | 175 | static inline u32 next_command(struct ctlr_info *h, u8 q); |
176 | static int __devinit hpsa_find_cfg_addrs(struct pci_dev *pdev, | 176 | static int __devinit hpsa_find_cfg_addrs(struct pci_dev *pdev, |
177 | void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index, | 177 | void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index, |
178 | u64 *cfg_offset); | 178 | u64 *cfg_offset); |
@@ -529,24 +529,25 @@ static inline void addQ(struct list_head *list, struct CommandList *c) | |||
529 | list_add_tail(&c->list, list); | 529 | list_add_tail(&c->list, list); |
530 | } | 530 | } |
531 | 531 | ||
532 | static inline u32 next_command(struct ctlr_info *h) | 532 | static inline u32 next_command(struct ctlr_info *h, u8 q) |
533 | { | 533 | { |
534 | u32 a; | 534 | u32 a; |
535 | struct reply_pool *rq = &h->reply_queue[q]; | ||
535 | 536 | ||
536 | if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) | 537 | if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) |
537 | return h->access.command_completed(h); | 538 | return h->access.command_completed(h, q); |
538 | 539 | ||
539 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { | 540 | if ((rq->head[rq->current_entry] & 1) == rq->wraparound) { |
540 | a = *(h->reply_pool_head); /* Next cmd in ring buffer */ | 541 | a = rq->head[rq->current_entry]; |
541 | (h->reply_pool_head)++; | 542 | rq->current_entry++; |
542 | h->commands_outstanding--; | 543 | h->commands_outstanding--; |
543 | } else { | 544 | } else { |
544 | a = FIFO_EMPTY; | 545 | a = FIFO_EMPTY; |
545 | } | 546 | } |
546 | /* Check for wraparound */ | 547 | /* Check for wraparound */ |
547 | if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { | 548 | if (rq->current_entry == h->max_commands) { |
548 | h->reply_pool_head = h->reply_pool; | 549 | rq->current_entry = 0; |
549 | h->reply_pool_wraparound ^= 1; | 550 | rq->wraparound ^= 1; |
550 | } | 551 | } |
551 | return a; | 552 | return a; |
552 | } | 553 | } |
@@ -557,8 +558,12 @@ static inline u32 next_command(struct ctlr_info *h) | |||
557 | */ | 558 | */ |
558 | static void set_performant_mode(struct ctlr_info *h, struct CommandList *c) | 559 | static void set_performant_mode(struct ctlr_info *h, struct CommandList *c) |
559 | { | 560 | { |
560 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) | 561 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) { |
561 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); | 562 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); |
563 | if (likely(h->msix_vector)) | ||
564 | c->Header.ReplyQueue = | ||
565 | smp_processor_id() % h->nreply_queues; | ||
566 | } | ||
562 | } | 567 | } |
563 | 568 | ||
564 | static void enqueue_cmd_and_start_io(struct ctlr_info *h, | 569 | static void enqueue_cmd_and_start_io(struct ctlr_info *h, |
@@ -3323,9 +3328,9 @@ static void start_io(struct ctlr_info *h) | |||
3323 | } | 3328 | } |
3324 | } | 3329 | } |
3325 | 3330 | ||
3326 | static inline unsigned long get_next_completion(struct ctlr_info *h) | 3331 | static inline unsigned long get_next_completion(struct ctlr_info *h, u8 q) |
3327 | { | 3332 | { |
3328 | return h->access.command_completed(h); | 3333 | return h->access.command_completed(h, q); |
3329 | } | 3334 | } |
3330 | 3335 | ||
3331 | static inline bool interrupt_pending(struct ctlr_info *h) | 3336 | static inline bool interrupt_pending(struct ctlr_info *h) |
@@ -3428,9 +3433,20 @@ static int ignore_bogus_interrupt(struct ctlr_info *h) | |||
3428 | return 1; | 3433 | return 1; |
3429 | } | 3434 | } |
3430 | 3435 | ||
3431 | static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id) | 3436 | /* |
3437 | * Convert &h->q[x] (passed to interrupt handlers) back to h. | ||
3438 | * Relies on (h-q[x] == x) being true for x such that | ||
3439 | * 0 <= x < MAX_REPLY_QUEUES. | ||
3440 | */ | ||
3441 | static struct ctlr_info *queue_to_hba(u8 *queue) | ||
3432 | { | 3442 | { |
3433 | struct ctlr_info *h = dev_id; | 3443 | return container_of((queue - *queue), struct ctlr_info, q[0]); |
3444 | } | ||
3445 | |||
3446 | static irqreturn_t hpsa_intx_discard_completions(int irq, void *queue) | ||
3447 | { | ||
3448 | struct ctlr_info *h = queue_to_hba(queue); | ||
3449 | u8 q = *(u8 *) queue; | ||
3434 | unsigned long flags; | 3450 | unsigned long flags; |
3435 | u32 raw_tag; | 3451 | u32 raw_tag; |
3436 | 3452 | ||
@@ -3442,71 +3458,75 @@ static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id) | |||
3442 | spin_lock_irqsave(&h->lock, flags); | 3458 | spin_lock_irqsave(&h->lock, flags); |
3443 | h->last_intr_timestamp = get_jiffies_64(); | 3459 | h->last_intr_timestamp = get_jiffies_64(); |
3444 | while (interrupt_pending(h)) { | 3460 | while (interrupt_pending(h)) { |
3445 | raw_tag = get_next_completion(h); | 3461 | raw_tag = get_next_completion(h, q); |
3446 | while (raw_tag != FIFO_EMPTY) | 3462 | while (raw_tag != FIFO_EMPTY) |
3447 | raw_tag = next_command(h); | 3463 | raw_tag = next_command(h, q); |
3448 | } | 3464 | } |
3449 | spin_unlock_irqrestore(&h->lock, flags); | 3465 | spin_unlock_irqrestore(&h->lock, flags); |
3450 | return IRQ_HANDLED; | 3466 | return IRQ_HANDLED; |
3451 | } | 3467 | } |
3452 | 3468 | ||
3453 | static irqreturn_t hpsa_msix_discard_completions(int irq, void *dev_id) | 3469 | static irqreturn_t hpsa_msix_discard_completions(int irq, void *queue) |
3454 | { | 3470 | { |
3455 | struct ctlr_info *h = dev_id; | 3471 | struct ctlr_info *h = queue_to_hba(queue); |
3456 | unsigned long flags; | 3472 | unsigned long flags; |
3457 | u32 raw_tag; | 3473 | u32 raw_tag; |
3474 | u8 q = *(u8 *) queue; | ||
3458 | 3475 | ||
3459 | if (ignore_bogus_interrupt(h)) | 3476 | if (ignore_bogus_interrupt(h)) |
3460 | return IRQ_NONE; | 3477 | return IRQ_NONE; |
3461 | 3478 | ||
3462 | spin_lock_irqsave(&h->lock, flags); | 3479 | spin_lock_irqsave(&h->lock, flags); |
3480 | |||
3463 | h->last_intr_timestamp = get_jiffies_64(); | 3481 | h->last_intr_timestamp = get_jiffies_64(); |
3464 | raw_tag = get_next_completion(h); | 3482 | raw_tag = get_next_completion(h, q); |
3465 | while (raw_tag != FIFO_EMPTY) | 3483 | while (raw_tag != FIFO_EMPTY) |
3466 | raw_tag = next_command(h); | 3484 | raw_tag = next_command(h, q); |
3467 | spin_unlock_irqrestore(&h->lock, flags); | 3485 | spin_unlock_irqrestore(&h->lock, flags); |
3468 | return IRQ_HANDLED; | 3486 | return IRQ_HANDLED; |
3469 | } | 3487 | } |
3470 | 3488 | ||
3471 | static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id) | 3489 | static irqreturn_t do_hpsa_intr_intx(int irq, void *queue) |
3472 | { | 3490 | { |
3473 | struct ctlr_info *h = dev_id; | 3491 | struct ctlr_info *h = queue_to_hba((u8 *) queue); |
3474 | unsigned long flags; | 3492 | unsigned long flags; |
3475 | u32 raw_tag; | 3493 | u32 raw_tag; |
3494 | u8 q = *(u8 *) queue; | ||
3476 | 3495 | ||
3477 | if (interrupt_not_for_us(h)) | 3496 | if (interrupt_not_for_us(h)) |
3478 | return IRQ_NONE; | 3497 | return IRQ_NONE; |
3479 | spin_lock_irqsave(&h->lock, flags); | 3498 | spin_lock_irqsave(&h->lock, flags); |
3480 | h->last_intr_timestamp = get_jiffies_64(); | 3499 | h->last_intr_timestamp = get_jiffies_64(); |
3481 | while (interrupt_pending(h)) { | 3500 | while (interrupt_pending(h)) { |
3482 | raw_tag = get_next_completion(h); | 3501 | raw_tag = get_next_completion(h, q); |
3483 | while (raw_tag != FIFO_EMPTY) { | 3502 | while (raw_tag != FIFO_EMPTY) { |
3484 | if (likely(hpsa_tag_contains_index(raw_tag))) | 3503 | if (likely(hpsa_tag_contains_index(raw_tag))) |
3485 | process_indexed_cmd(h, raw_tag); | 3504 | process_indexed_cmd(h, raw_tag); |
3486 | else | 3505 | else |
3487 | process_nonindexed_cmd(h, raw_tag); | 3506 | process_nonindexed_cmd(h, raw_tag); |
3488 | raw_tag = next_command(h); | 3507 | raw_tag = next_command(h, q); |
3489 | } | 3508 | } |
3490 | } | 3509 | } |
3491 | spin_unlock_irqrestore(&h->lock, flags); | 3510 | spin_unlock_irqrestore(&h->lock, flags); |
3492 | return IRQ_HANDLED; | 3511 | return IRQ_HANDLED; |
3493 | } | 3512 | } |
3494 | 3513 | ||
3495 | static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id) | 3514 | static irqreturn_t do_hpsa_intr_msi(int irq, void *queue) |
3496 | { | 3515 | { |
3497 | struct ctlr_info *h = dev_id; | 3516 | struct ctlr_info *h = queue_to_hba(queue); |
3498 | unsigned long flags; | 3517 | unsigned long flags; |
3499 | u32 raw_tag; | 3518 | u32 raw_tag; |
3519 | u8 q = *(u8 *) queue; | ||
3500 | 3520 | ||
3501 | spin_lock_irqsave(&h->lock, flags); | 3521 | spin_lock_irqsave(&h->lock, flags); |
3502 | h->last_intr_timestamp = get_jiffies_64(); | 3522 | h->last_intr_timestamp = get_jiffies_64(); |
3503 | raw_tag = get_next_completion(h); | 3523 | raw_tag = get_next_completion(h, q); |
3504 | while (raw_tag != FIFO_EMPTY) { | 3524 | while (raw_tag != FIFO_EMPTY) { |
3505 | if (likely(hpsa_tag_contains_index(raw_tag))) | 3525 | if (likely(hpsa_tag_contains_index(raw_tag))) |
3506 | process_indexed_cmd(h, raw_tag); | 3526 | process_indexed_cmd(h, raw_tag); |
3507 | else | 3527 | else |
3508 | process_nonindexed_cmd(h, raw_tag); | 3528 | process_nonindexed_cmd(h, raw_tag); |
3509 | raw_tag = next_command(h); | 3529 | raw_tag = next_command(h, q); |
3510 | } | 3530 | } |
3511 | spin_unlock_irqrestore(&h->lock, flags); | 3531 | spin_unlock_irqrestore(&h->lock, flags); |
3512 | return IRQ_HANDLED; | 3532 | return IRQ_HANDLED; |
@@ -3942,10 +3962,13 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr) | |||
3942 | static void __devinit hpsa_interrupt_mode(struct ctlr_info *h) | 3962 | static void __devinit hpsa_interrupt_mode(struct ctlr_info *h) |
3943 | { | 3963 | { |
3944 | #ifdef CONFIG_PCI_MSI | 3964 | #ifdef CONFIG_PCI_MSI |
3945 | int err; | 3965 | int err, i; |
3946 | struct msix_entry hpsa_msix_entries[4] = { {0, 0}, {0, 1}, | 3966 | struct msix_entry hpsa_msix_entries[MAX_REPLY_QUEUES]; |
3947 | {0, 2}, {0, 3} | 3967 | |
3948 | }; | 3968 | for (i = 0; i < MAX_REPLY_QUEUES; i++) { |
3969 | hpsa_msix_entries[i].vector = 0; | ||
3970 | hpsa_msix_entries[i].entry = i; | ||
3971 | } | ||
3949 | 3972 | ||
3950 | /* Some boards advertise MSI but don't really support it */ | 3973 | /* Some boards advertise MSI but don't really support it */ |
3951 | if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || | 3974 | if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || |
@@ -3953,12 +3976,11 @@ static void __devinit hpsa_interrupt_mode(struct ctlr_info *h) | |||
3953 | goto default_int_mode; | 3976 | goto default_int_mode; |
3954 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { | 3977 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { |
3955 | dev_info(&h->pdev->dev, "MSIX\n"); | 3978 | dev_info(&h->pdev->dev, "MSIX\n"); |
3956 | err = pci_enable_msix(h->pdev, hpsa_msix_entries, 4); | 3979 | err = pci_enable_msix(h->pdev, hpsa_msix_entries, |
3980 | MAX_REPLY_QUEUES); | ||
3957 | if (!err) { | 3981 | if (!err) { |
3958 | h->intr[0] = hpsa_msix_entries[0].vector; | 3982 | for (i = 0; i < MAX_REPLY_QUEUES; i++) |
3959 | h->intr[1] = hpsa_msix_entries[1].vector; | 3983 | h->intr[i] = hpsa_msix_entries[i].vector; |
3960 | h->intr[2] = hpsa_msix_entries[2].vector; | ||
3961 | h->intr[3] = hpsa_msix_entries[3].vector; | ||
3962 | h->msix_vector = 1; | 3984 | h->msix_vector = 1; |
3963 | return; | 3985 | return; |
3964 | } | 3986 | } |
@@ -4372,14 +4394,33 @@ static int hpsa_request_irq(struct ctlr_info *h, | |||
4372 | irqreturn_t (*msixhandler)(int, void *), | 4394 | irqreturn_t (*msixhandler)(int, void *), |
4373 | irqreturn_t (*intxhandler)(int, void *)) | 4395 | irqreturn_t (*intxhandler)(int, void *)) |
4374 | { | 4396 | { |
4375 | int rc; | 4397 | int rc, i; |
4376 | 4398 | ||
4377 | if (h->msix_vector || h->msi_vector) | 4399 | /* |
4378 | rc = request_irq(h->intr[h->intr_mode], msixhandler, | 4400 | * initialize h->q[x] = x so that interrupt handlers know which |
4379 | 0, h->devname, h); | 4401 | * queue to process. |
4380 | else | 4402 | */ |
4381 | rc = request_irq(h->intr[h->intr_mode], intxhandler, | 4403 | for (i = 0; i < MAX_REPLY_QUEUES; i++) |
4382 | IRQF_SHARED, h->devname, h); | 4404 | h->q[i] = (u8) i; |
4405 | |||
4406 | if (h->intr_mode == PERF_MODE_INT && h->msix_vector) { | ||
4407 | /* If performant mode and MSI-X, use multiple reply queues */ | ||
4408 | for (i = 0; i < MAX_REPLY_QUEUES; i++) | ||
4409 | rc = request_irq(h->intr[i], msixhandler, | ||
4410 | 0, h->devname, | ||
4411 | &h->q[i]); | ||
4412 | } else { | ||
4413 | /* Use single reply pool */ | ||
4414 | if (h->msix_vector || h->msi_vector) { | ||
4415 | rc = request_irq(h->intr[h->intr_mode], | ||
4416 | msixhandler, 0, h->devname, | ||
4417 | &h->q[h->intr_mode]); | ||
4418 | } else { | ||
4419 | rc = request_irq(h->intr[h->intr_mode], | ||
4420 | intxhandler, IRQF_SHARED, h->devname, | ||
4421 | &h->q[h->intr_mode]); | ||
4422 | } | ||
4423 | } | ||
4383 | if (rc) { | 4424 | if (rc) { |
4384 | dev_err(&h->pdev->dev, "unable to get irq %d for %s\n", | 4425 | dev_err(&h->pdev->dev, "unable to get irq %d for %s\n", |
4385 | h->intr[h->intr_mode], h->devname); | 4426 | h->intr[h->intr_mode], h->devname); |
@@ -4412,9 +4453,24 @@ static int __devinit hpsa_kdump_soft_reset(struct ctlr_info *h) | |||
4412 | return 0; | 4453 | return 0; |
4413 | } | 4454 | } |
4414 | 4455 | ||
4456 | static void free_irqs(struct ctlr_info *h) | ||
4457 | { | ||
4458 | int i; | ||
4459 | |||
4460 | if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) { | ||
4461 | /* Single reply queue, only one irq to free */ | ||
4462 | i = h->intr_mode; | ||
4463 | free_irq(h->intr[i], &h->q[i]); | ||
4464 | return; | ||
4465 | } | ||
4466 | |||
4467 | for (i = 0; i < MAX_REPLY_QUEUES; i++) | ||
4468 | free_irq(h->intr[i], &h->q[i]); | ||
4469 | } | ||
4470 | |||
4415 | static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) | 4471 | static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) |
4416 | { | 4472 | { |
4417 | free_irq(h->intr[h->intr_mode], h); | 4473 | free_irqs(h); |
4418 | #ifdef CONFIG_PCI_MSI | 4474 | #ifdef CONFIG_PCI_MSI |
4419 | if (h->msix_vector) | 4475 | if (h->msix_vector) |
4420 | pci_disable_msix(h->pdev); | 4476 | pci_disable_msix(h->pdev); |
@@ -4682,7 +4738,7 @@ reinit_after_soft_reset: | |||
4682 | spin_lock_irqsave(&h->lock, flags); | 4738 | spin_lock_irqsave(&h->lock, flags); |
4683 | h->access.set_intr_mask(h, HPSA_INTR_OFF); | 4739 | h->access.set_intr_mask(h, HPSA_INTR_OFF); |
4684 | spin_unlock_irqrestore(&h->lock, flags); | 4740 | spin_unlock_irqrestore(&h->lock, flags); |
4685 | free_irq(h->intr[h->intr_mode], h); | 4741 | free_irqs(h); |
4686 | rc = hpsa_request_irq(h, hpsa_msix_discard_completions, | 4742 | rc = hpsa_request_irq(h, hpsa_msix_discard_completions, |
4687 | hpsa_intx_discard_completions); | 4743 | hpsa_intx_discard_completions); |
4688 | if (rc) { | 4744 | if (rc) { |
@@ -4732,7 +4788,7 @@ reinit_after_soft_reset: | |||
4732 | clean4: | 4788 | clean4: |
4733 | hpsa_free_sg_chain_blocks(h); | 4789 | hpsa_free_sg_chain_blocks(h); |
4734 | hpsa_free_cmd_pool(h); | 4790 | hpsa_free_cmd_pool(h); |
4735 | free_irq(h->intr[h->intr_mode], h); | 4791 | free_irqs(h); |
4736 | clean2: | 4792 | clean2: |
4737 | clean1: | 4793 | clean1: |
4738 | kfree(h); | 4794 | kfree(h); |
@@ -4775,7 +4831,7 @@ static void hpsa_shutdown(struct pci_dev *pdev) | |||
4775 | */ | 4831 | */ |
4776 | hpsa_flush_cache(h); | 4832 | hpsa_flush_cache(h); |
4777 | h->access.set_intr_mask(h, HPSA_INTR_OFF); | 4833 | h->access.set_intr_mask(h, HPSA_INTR_OFF); |
4778 | free_irq(h->intr[h->intr_mode], h); | 4834 | free_irqs(h); |
4779 | #ifdef CONFIG_PCI_MSI | 4835 | #ifdef CONFIG_PCI_MSI |
4780 | if (h->msix_vector) | 4836 | if (h->msix_vector) |
4781 | pci_disable_msix(h->pdev); | 4837 | pci_disable_msix(h->pdev); |
@@ -4915,11 +4971,8 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h, | |||
4915 | * 10 = 6 s/g entry or 24k | 4971 | * 10 = 6 s/g entry or 24k |
4916 | */ | 4972 | */ |
4917 | 4973 | ||
4918 | h->reply_pool_wraparound = 1; /* spec: init to 1 */ | ||
4919 | |||
4920 | /* Controller spec: zero out this buffer. */ | 4974 | /* Controller spec: zero out this buffer. */ |
4921 | memset(h->reply_pool, 0, h->reply_pool_size); | 4975 | memset(h->reply_pool, 0, h->reply_pool_size); |
4922 | h->reply_pool_head = h->reply_pool; | ||
4923 | 4976 | ||
4924 | bft[7] = SG_ENTRIES_IN_CMD + 4; | 4977 | bft[7] = SG_ENTRIES_IN_CMD + 4; |
4925 | calc_bucket_map(bft, ARRAY_SIZE(bft), | 4978 | calc_bucket_map(bft, ARRAY_SIZE(bft), |
@@ -4929,12 +4982,19 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h, | |||
4929 | 4982 | ||
4930 | /* size of controller ring buffer */ | 4983 | /* size of controller ring buffer */ |
4931 | writel(h->max_commands, &h->transtable->RepQSize); | 4984 | writel(h->max_commands, &h->transtable->RepQSize); |
4932 | writel(1, &h->transtable->RepQCount); | 4985 | writel(h->nreply_queues, &h->transtable->RepQCount); |
4933 | writel(0, &h->transtable->RepQCtrAddrLow32); | 4986 | writel(0, &h->transtable->RepQCtrAddrLow32); |
4934 | writel(0, &h->transtable->RepQCtrAddrHigh32); | 4987 | writel(0, &h->transtable->RepQCtrAddrHigh32); |
4935 | writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); | 4988 | |
4936 | writel(0, &h->transtable->RepQAddr0High32); | 4989 | for (i = 0; i < h->nreply_queues; i++) { |
4937 | writel(CFGTBL_Trans_Performant | use_short_tags, | 4990 | writel(0, &h->transtable->RepQAddr[i].upper); |
4991 | writel(h->reply_pool_dhandle + | ||
4992 | (h->max_commands * sizeof(u64) * i), | ||
4993 | &h->transtable->RepQAddr[i].lower); | ||
4994 | } | ||
4995 | |||
4996 | writel(CFGTBL_Trans_Performant | use_short_tags | | ||
4997 | CFGTBL_Trans_enable_directed_msix, | ||
4938 | &(h->cfgtable->HostWrite.TransportRequest)); | 4998 | &(h->cfgtable->HostWrite.TransportRequest)); |
4939 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); | 4999 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); |
4940 | hpsa_wait_for_mode_change_ack(h); | 5000 | hpsa_wait_for_mode_change_ack(h); |
@@ -4952,6 +5012,7 @@ static __devinit void hpsa_enter_performant_mode(struct ctlr_info *h, | |||
4952 | static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) | 5012 | static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) |
4953 | { | 5013 | { |
4954 | u32 trans_support; | 5014 | u32 trans_support; |
5015 | int i; | ||
4955 | 5016 | ||
4956 | if (hpsa_simple_mode) | 5017 | if (hpsa_simple_mode) |
4957 | return; | 5018 | return; |
@@ -4960,12 +5021,20 @@ static __devinit void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) | |||
4960 | if (!(trans_support & PERFORMANT_MODE)) | 5021 | if (!(trans_support & PERFORMANT_MODE)) |
4961 | return; | 5022 | return; |
4962 | 5023 | ||
5024 | h->nreply_queues = h->msix_vector ? MAX_REPLY_QUEUES : 1; | ||
4963 | hpsa_get_max_perf_mode_cmds(h); | 5025 | hpsa_get_max_perf_mode_cmds(h); |
4964 | /* Performant mode ring buffer and supporting data structures */ | 5026 | /* Performant mode ring buffer and supporting data structures */ |
4965 | h->reply_pool_size = h->max_commands * sizeof(u64); | 5027 | h->reply_pool_size = h->max_commands * sizeof(u64) * h->nreply_queues; |
4966 | h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size, | 5028 | h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size, |
4967 | &(h->reply_pool_dhandle)); | 5029 | &(h->reply_pool_dhandle)); |
4968 | 5030 | ||
5031 | for (i = 0; i < h->nreply_queues; i++) { | ||
5032 | h->reply_queue[i].head = &h->reply_pool[h->max_commands * i]; | ||
5033 | h->reply_queue[i].size = h->max_commands; | ||
5034 | h->reply_queue[i].wraparound = 1; /* spec: init to 1 */ | ||
5035 | h->reply_queue[i].current_entry = 0; | ||
5036 | } | ||
5037 | |||
4969 | /* Need a block fetch table for performant mode */ | 5038 | /* Need a block fetch table for performant mode */ |
4970 | h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) * | 5039 | h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) * |
4971 | sizeof(u32)), GFP_KERNEL); | 5040 | sizeof(u32)), GFP_KERNEL); |
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index d8aa95c43f4d..486a7c099246 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h | |||
@@ -34,7 +34,7 @@ struct access_method { | |||
34 | void (*set_intr_mask)(struct ctlr_info *h, unsigned long val); | 34 | void (*set_intr_mask)(struct ctlr_info *h, unsigned long val); |
35 | unsigned long (*fifo_full)(struct ctlr_info *h); | 35 | unsigned long (*fifo_full)(struct ctlr_info *h); |
36 | bool (*intr_pending)(struct ctlr_info *h); | 36 | bool (*intr_pending)(struct ctlr_info *h); |
37 | unsigned long (*command_completed)(struct ctlr_info *h); | 37 | unsigned long (*command_completed)(struct ctlr_info *h, u8 q); |
38 | }; | 38 | }; |
39 | 39 | ||
40 | struct hpsa_scsi_dev_t { | 40 | struct hpsa_scsi_dev_t { |
@@ -48,6 +48,13 @@ struct hpsa_scsi_dev_t { | |||
48 | unsigned char raid_level; /* from inquiry page 0xC1 */ | 48 | unsigned char raid_level; /* from inquiry page 0xC1 */ |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct reply_pool { | ||
52 | u64 *head; | ||
53 | size_t size; | ||
54 | u8 wraparound; | ||
55 | u32 current_entry; | ||
56 | }; | ||
57 | |||
51 | struct ctlr_info { | 58 | struct ctlr_info { |
52 | int ctlr; | 59 | int ctlr; |
53 | char devname[8]; | 60 | char devname[8]; |
@@ -68,7 +75,7 @@ struct ctlr_info { | |||
68 | # define DOORBELL_INT 1 | 75 | # define DOORBELL_INT 1 |
69 | # define SIMPLE_MODE_INT 2 | 76 | # define SIMPLE_MODE_INT 2 |
70 | # define MEMQ_MODE_INT 3 | 77 | # define MEMQ_MODE_INT 3 |
71 | unsigned int intr[4]; | 78 | unsigned int intr[MAX_REPLY_QUEUES]; |
72 | unsigned int msix_vector; | 79 | unsigned int msix_vector; |
73 | unsigned int msi_vector; | 80 | unsigned int msi_vector; |
74 | int intr_mode; /* either PERF_MODE_INT or SIMPLE_MODE_INT */ | 81 | int intr_mode; /* either PERF_MODE_INT or SIMPLE_MODE_INT */ |
@@ -111,13 +118,13 @@ struct ctlr_info { | |||
111 | unsigned long transMethod; | 118 | unsigned long transMethod; |
112 | 119 | ||
113 | /* | 120 | /* |
114 | * Performant mode completion buffer | 121 | * Performant mode completion buffers |
115 | */ | 122 | */ |
116 | u64 *reply_pool; | 123 | u64 *reply_pool; |
117 | dma_addr_t reply_pool_dhandle; | ||
118 | u64 *reply_pool_head; | ||
119 | size_t reply_pool_size; | 124 | size_t reply_pool_size; |
120 | unsigned char reply_pool_wraparound; | 125 | struct reply_pool reply_queue[MAX_REPLY_QUEUES]; |
126 | u8 nreply_queues; | ||
127 | dma_addr_t reply_pool_dhandle; | ||
121 | u32 *blockFetchTable; | 128 | u32 *blockFetchTable; |
122 | unsigned char *hba_inquiry_data; | 129 | unsigned char *hba_inquiry_data; |
123 | u64 last_intr_timestamp; | 130 | u64 last_intr_timestamp; |
@@ -125,6 +132,8 @@ struct ctlr_info { | |||
125 | u64 last_heartbeat_timestamp; | 132 | u64 last_heartbeat_timestamp; |
126 | u32 lockup_detected; | 133 | u32 lockup_detected; |
127 | struct list_head lockup_list; | 134 | struct list_head lockup_list; |
135 | /* Address of h->q[x] is passed to intr handler to know which queue */ | ||
136 | u8 q[MAX_REPLY_QUEUES]; | ||
128 | u32 TMFSupportFlags; /* cache what task mgmt funcs are supported. */ | 137 | u32 TMFSupportFlags; /* cache what task mgmt funcs are supported. */ |
129 | #define HPSATMF_BITS_SUPPORTED (1 << 0) | 138 | #define HPSATMF_BITS_SUPPORTED (1 << 0) |
130 | #define HPSATMF_PHYS_LUN_RESET (1 << 1) | 139 | #define HPSATMF_PHYS_LUN_RESET (1 << 1) |
@@ -275,8 +284,9 @@ static void SA5_performant_intr_mask(struct ctlr_info *h, unsigned long val) | |||
275 | } | 284 | } |
276 | } | 285 | } |
277 | 286 | ||
278 | static unsigned long SA5_performant_completed(struct ctlr_info *h) | 287 | static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q) |
279 | { | 288 | { |
289 | struct reply_pool *rq = &h->reply_queue[q]; | ||
280 | unsigned long register_value = FIFO_EMPTY; | 290 | unsigned long register_value = FIFO_EMPTY; |
281 | 291 | ||
282 | /* msi auto clears the interrupt pending bit. */ | 292 | /* msi auto clears the interrupt pending bit. */ |
@@ -292,19 +302,18 @@ static unsigned long SA5_performant_completed(struct ctlr_info *h) | |||
292 | register_value = readl(h->vaddr + SA5_OUTDB_STATUS); | 302 | register_value = readl(h->vaddr + SA5_OUTDB_STATUS); |
293 | } | 303 | } |
294 | 304 | ||
295 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { | 305 | if ((rq->head[rq->current_entry] & 1) == rq->wraparound) { |
296 | register_value = *(h->reply_pool_head); | 306 | register_value = rq->head[rq->current_entry]; |
297 | (h->reply_pool_head)++; | 307 | rq->current_entry++; |
298 | h->commands_outstanding--; | 308 | h->commands_outstanding--; |
299 | } else { | 309 | } else { |
300 | register_value = FIFO_EMPTY; | 310 | register_value = FIFO_EMPTY; |
301 | } | 311 | } |
302 | /* Check for wraparound */ | 312 | /* Check for wraparound */ |
303 | if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { | 313 | if (rq->current_entry == h->max_commands) { |
304 | h->reply_pool_head = h->reply_pool; | 314 | rq->current_entry = 0; |
305 | h->reply_pool_wraparound ^= 1; | 315 | rq->wraparound ^= 1; |
306 | } | 316 | } |
307 | |||
308 | return register_value; | 317 | return register_value; |
309 | } | 318 | } |
310 | 319 | ||
@@ -324,7 +333,8 @@ static unsigned long SA5_fifo_full(struct ctlr_info *h) | |||
324 | * returns value read from hardware. | 333 | * returns value read from hardware. |
325 | * returns FIFO_EMPTY if there is nothing to read | 334 | * returns FIFO_EMPTY if there is nothing to read |
326 | */ | 335 | */ |
327 | static unsigned long SA5_completed(struct ctlr_info *h) | 336 | static unsigned long SA5_completed(struct ctlr_info *h, |
337 | __attribute__((unused)) u8 q) | ||
328 | { | 338 | { |
329 | unsigned long register_value | 339 | unsigned long register_value |
330 | = readl(h->vaddr + SA5_REPLY_PORT_OFFSET); | 340 | = readl(h->vaddr + SA5_REPLY_PORT_OFFSET); |
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index 14b56c93cefa..43f163164b24 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h | |||
@@ -129,6 +129,7 @@ | |||
129 | #define CFGTBL_Trans_Simple 0x00000002l | 129 | #define CFGTBL_Trans_Simple 0x00000002l |
130 | #define CFGTBL_Trans_Performant 0x00000004l | 130 | #define CFGTBL_Trans_Performant 0x00000004l |
131 | #define CFGTBL_Trans_use_short_tags 0x20000000l | 131 | #define CFGTBL_Trans_use_short_tags 0x20000000l |
132 | #define CFGTBL_Trans_enable_directed_msix (1 << 30) | ||
132 | 133 | ||
133 | #define CFGTBL_BusType_Ultra2 0x00000001l | 134 | #define CFGTBL_BusType_Ultra2 0x00000001l |
134 | #define CFGTBL_BusType_Ultra3 0x00000002l | 135 | #define CFGTBL_BusType_Ultra3 0x00000002l |
@@ -380,8 +381,8 @@ struct TransTable_struct { | |||
380 | u32 RepQCount; | 381 | u32 RepQCount; |
381 | u32 RepQCtrAddrLow32; | 382 | u32 RepQCtrAddrLow32; |
382 | u32 RepQCtrAddrHigh32; | 383 | u32 RepQCtrAddrHigh32; |
383 | u32 RepQAddr0Low32; | 384 | #define MAX_REPLY_QUEUES 8 |
384 | u32 RepQAddr0High32; | 385 | struct vals32 RepQAddr[MAX_REPLY_QUEUES]; |
385 | }; | 386 | }; |
386 | 387 | ||
387 | struct hpsa_pci_info { | 388 | struct hpsa_pci_info { |