aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Roscher <ossrosch@linux.vnet.ibm.com>2008-04-29 16:46:53 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-29 16:46:53 -0400
commitd227fa7288adebe5ba37fa8e4a589c977d4e4a34 (patch)
tree8e0f080fc78f0342b5fd2531615ea525333d8d25
parentf56bcd8013566d4ad4759ae5fc85a6660e4655c7 (diff)
IB/ehca: Allocate event queue size depending on max number of CQs and QPs
If a lot of QPs fall into Error state at once and the EQ of the respective HCA is too small, it might overrun, causing the eHCA driver to stop processing completion events and calling the application's completion handlers, effectively causing traffic to stop. Fix this by limiting available QPs and CQs to a customizable max count, and determining EQ size based on these counts and a worst-case assumption. Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c11
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c36
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c26
4 files changed, 74 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3d6d9461c31d..00bab60f6de4 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -66,6 +66,7 @@ struct ehca_av;
66#include "ehca_irq.h" 66#include "ehca_irq.h"
67 67
68#define EHCA_EQE_CACHE_SIZE 20 68#define EHCA_EQE_CACHE_SIZE 20
69#define EHCA_MAX_NUM_QUEUES 0xffff
69 70
70struct ehca_eqe_cache_entry { 71struct ehca_eqe_cache_entry {
71 struct ehca_eqe *eqe; 72 struct ehca_eqe *eqe;
@@ -127,6 +128,8 @@ struct ehca_shca {
127 /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ 128 /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
128 u32 hca_cap_mr_pgsize; 129 u32 hca_cap_mr_pgsize;
129 int max_mtu; 130 int max_mtu;
131 atomic_t num_cqs;
132 atomic_t num_qps;
130}; 133};
131 134
132struct ehca_pd { 135struct ehca_pd {
@@ -344,6 +347,8 @@ extern int ehca_use_hp_mr;
344extern int ehca_scaling_code; 347extern int ehca_scaling_code;
345extern int ehca_lock_hcalls; 348extern int ehca_lock_hcalls;
346extern int ehca_nr_ports; 349extern int ehca_nr_ports;
350extern int ehca_max_cq;
351extern int ehca_max_qp;
347 352
348struct ipzu_queue_resp { 353struct ipzu_queue_resp {
349 u32 qe_size; /* queue entry size */ 354 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index ec0cfcf3073f..5540b276a33c 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
132 if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) 132 if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
133 return ERR_PTR(-EINVAL); 133 return ERR_PTR(-EINVAL);
134 134
135 if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
136 ehca_err(device, "Unable to create CQ, max number of %i "
137 "CQs reached.", ehca_max_cq);
138 ehca_err(device, "To increase the maximum number of CQs "
139 "use the number_of_cqs module parameter.\n");
140 return ERR_PTR(-ENOSPC);
141 }
142
135 my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); 143 my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
136 if (!my_cq) { 144 if (!my_cq) {
137 ehca_err(device, "Out of memory for ehca_cq struct device=%p", 145 ehca_err(device, "Out of memory for ehca_cq struct device=%p",
138 device); 146 device);
147 atomic_dec(&shca->num_cqs);
139 return ERR_PTR(-ENOMEM); 148 return ERR_PTR(-ENOMEM);
140 } 149 }
141 150
@@ -305,6 +314,7 @@ create_cq_exit2:
305create_cq_exit1: 314create_cq_exit1:
306 kmem_cache_free(cq_cache, my_cq); 315 kmem_cache_free(cq_cache, my_cq);
307 316
317 atomic_dec(&shca->num_cqs);
308 return cq; 318 return cq;
309} 319}
310 320
@@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
359 ipz_queue_dtor(NULL, &my_cq->ipz_queue); 369 ipz_queue_dtor(NULL, &my_cq->ipz_queue);
360 kmem_cache_free(cq_cache, my_cq); 370 kmem_cache_free(cq_cache, my_cq);
361 371
372 atomic_dec(&shca->num_cqs);
362 return 0; 373 return 0;
363} 374}
364 375
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 65048976198c..482103eb6eac 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -68,6 +68,8 @@ int ehca_port_act_time = 30;
68int ehca_static_rate = -1; 68int ehca_static_rate = -1;
69int ehca_scaling_code = 0; 69int ehca_scaling_code = 0;
70int ehca_lock_hcalls = -1; 70int ehca_lock_hcalls = -1;
71int ehca_max_cq = -1;
72int ehca_max_qp = -1;
71 73
72module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); 74module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
73module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); 75module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
@@ -79,6 +81,8 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
79module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); 81module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
80module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); 82module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
81module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); 83module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
84module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
85module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
82 86
83MODULE_PARM_DESC(open_aqp1, 87MODULE_PARM_DESC(open_aqp1,
84 "Open AQP1 on startup (default: no)"); 88 "Open AQP1 on startup (default: no)");
@@ -104,6 +108,12 @@ MODULE_PARM_DESC(scaling_code,
104MODULE_PARM_DESC(lock_hcalls, 108MODULE_PARM_DESC(lock_hcalls,
105 "Serialize all hCalls made by the driver " 109 "Serialize all hCalls made by the driver "
106 "(default: autodetect)"); 110 "(default: autodetect)");
111MODULE_PARM_DESC(number_of_cqs,
112 "Max number of CQs which can be allocated "
113 "(default: autodetect)");
114MODULE_PARM_DESC(number_of_qps,
115 "Max number of QPs which can be allocated "
116 "(default: autodetect)");
107 117
108DEFINE_RWLOCK(ehca_qp_idr_lock); 118DEFINE_RWLOCK(ehca_qp_idr_lock);
109DEFINE_RWLOCK(ehca_cq_idr_lock); 119DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
355 if (rblock->memory_page_size_supported & pgsize_map[i]) 365 if (rblock->memory_page_size_supported & pgsize_map[i])
356 shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; 366 shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
357 367
368 /* Set maximum number of CQs and QPs to calculate EQ size */
369 if (ehca_max_qp == -1)
370 ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
371 else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
372 ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
373 "specified by HW", rblock->max_qp);
374 ret = -EINVAL;
375 goto sense_attributes1;
376 }
377
378 if (ehca_max_cq == -1)
379 ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
380 else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
381 ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
382 "specified by HW", rblock->max_cq);
383 ret = -EINVAL;
384 goto sense_attributes1;
385 }
386
358 /* query max MTU from first port -- it's the same for all ports */ 387 /* query max MTU from first port -- it's the same for all ports */
359 port = (struct hipz_query_port *)rblock; 388 port = (struct hipz_query_port *)rblock;
360 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 389 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
@@ -684,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev,
684 struct ehca_shca *shca; 713 struct ehca_shca *shca;
685 const u64 *handle; 714 const u64 *handle;
686 struct ib_pd *ibpd; 715 struct ib_pd *ibpd;
687 int ret, i; 716 int ret, i, eq_size;
688 717
689 handle = of_get_property(dev->node, "ibm,hca-handle", NULL); 718 handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
690 if (!handle) { 719 if (!handle) {
@@ -705,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev,
705 return -ENOMEM; 734 return -ENOMEM;
706 } 735 }
707 mutex_init(&shca->modify_mutex); 736 mutex_init(&shca->modify_mutex);
737 atomic_set(&shca->num_cqs, 0);
738 atomic_set(&shca->num_qps, 0);
708 for (i = 0; i < ARRAY_SIZE(shca->sport); i++) 739 for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
709 spin_lock_init(&shca->sport[i].mod_sqp_lock); 740 spin_lock_init(&shca->sport[i].mod_sqp_lock);
710 741
@@ -724,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev,
724 goto probe1; 755 goto probe1;
725 } 756 }
726 757
758 eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
727 /* create event queues */ 759 /* create event queues */
728 ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); 760 ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
729 if (ret) { 761 if (ret) {
730 ehca_err(&shca->ib_device, "Cannot create EQ."); 762 ehca_err(&shca->ib_device, "Cannot create EQ.");
731 goto probe1; 763 goto probe1;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 57bef1152cc2..18fba92fa7ae 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp(
421 u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; 421 u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
422 unsigned long flags; 422 unsigned long flags;
423 423
424 if (init_attr->create_flags) 424 if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
425 ehca_err(pd->device, "Unable to create QP, max number of %i "
426 "QPs reached.", ehca_max_qp);
427 ehca_err(pd->device, "To increase the maximum number of QPs "
428 "use the number_of_qps module parameter.\n");
429 return ERR_PTR(-ENOSPC);
430 }
431
432 if (init_attr->create_flags) {
433 atomic_dec(&shca->num_qps);
425 return ERR_PTR(-EINVAL); 434 return ERR_PTR(-EINVAL);
435 }
426 436
427 memset(&parms, 0, sizeof(parms)); 437 memset(&parms, 0, sizeof(parms));
428 qp_type = init_attr->qp_type; 438 qp_type = init_attr->qp_type;
@@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp(
431 init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { 441 init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
432 ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", 442 ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
433 init_attr->sq_sig_type); 443 init_attr->sq_sig_type);
444 atomic_dec(&shca->num_qps);
434 return ERR_PTR(-EINVAL); 445 return ERR_PTR(-EINVAL);
435 } 446 }
436 447
@@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp(
455 466
456 if (is_llqp && has_srq) { 467 if (is_llqp && has_srq) {
457 ehca_err(pd->device, "LLQPs can't have an SRQ"); 468 ehca_err(pd->device, "LLQPs can't have an SRQ");
469 atomic_dec(&shca->num_qps);
458 return ERR_PTR(-EINVAL); 470 return ERR_PTR(-EINVAL);
459 } 471 }
460 472
@@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp(
466 ehca_err(pd->device, "no more than three SGEs " 478 ehca_err(pd->device, "no more than three SGEs "
467 "supported for SRQ pd=%p max_sge=%x", 479 "supported for SRQ pd=%p max_sge=%x",
468 pd, init_attr->cap.max_recv_sge); 480 pd, init_attr->cap.max_recv_sge);
481 atomic_dec(&shca->num_qps);
469 return ERR_PTR(-EINVAL); 482 return ERR_PTR(-EINVAL);
470 } 483 }
471 } 484 }
@@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp(
477 qp_type != IB_QPT_SMI && 490 qp_type != IB_QPT_SMI &&
478 qp_type != IB_QPT_GSI) { 491 qp_type != IB_QPT_GSI) {
479 ehca_err(pd->device, "wrong QP Type=%x", qp_type); 492 ehca_err(pd->device, "wrong QP Type=%x", qp_type);
493 atomic_dec(&shca->num_qps);
480 return ERR_PTR(-EINVAL); 494 return ERR_PTR(-EINVAL);
481 } 495 }
482 496
@@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp(
490 "or max_rq_wr=%x for RC LLQP", 504 "or max_rq_wr=%x for RC LLQP",
491 init_attr->cap.max_send_wr, 505 init_attr->cap.max_send_wr,
492 init_attr->cap.max_recv_wr); 506 init_attr->cap.max_recv_wr);
507 atomic_dec(&shca->num_qps);
493 return ERR_PTR(-EINVAL); 508 return ERR_PTR(-EINVAL);
494 } 509 }
495 break; 510 break;
@@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp(
497 if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { 512 if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
498 ehca_err(pd->device, "UD LLQP not supported " 513 ehca_err(pd->device, "UD LLQP not supported "
499 "by this adapter"); 514 "by this adapter");
515 atomic_dec(&shca->num_qps);
500 return ERR_PTR(-ENOSYS); 516 return ERR_PTR(-ENOSYS);
501 } 517 }
502 if (!(init_attr->cap.max_send_sge <= 5 518 if (!(init_attr->cap.max_send_sge <= 5
@@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp(
508 "or max_recv_sge=%x for UD LLQP", 524 "or max_recv_sge=%x for UD LLQP",
509 init_attr->cap.max_send_sge, 525 init_attr->cap.max_send_sge,
510 init_attr->cap.max_recv_sge); 526 init_attr->cap.max_recv_sge);
527 atomic_dec(&shca->num_qps);
511 return ERR_PTR(-EINVAL); 528 return ERR_PTR(-EINVAL);
512 } else if (init_attr->cap.max_send_wr > 255) { 529 } else if (init_attr->cap.max_send_wr > 255) {
513 ehca_err(pd->device, 530 ehca_err(pd->device,
514 "Invalid Number of " 531 "Invalid Number of "
515 "max_send_wr=%x for UD QP_TYPE=%x", 532 "max_send_wr=%x for UD QP_TYPE=%x",
516 init_attr->cap.max_send_wr, qp_type); 533 init_attr->cap.max_send_wr, qp_type);
534 atomic_dec(&shca->num_qps);
517 return ERR_PTR(-EINVAL); 535 return ERR_PTR(-EINVAL);
518 } 536 }
519 break; 537 break;
520 default: 538 default:
521 ehca_err(pd->device, "unsupported LL QP Type=%x", 539 ehca_err(pd->device, "unsupported LL QP Type=%x",
522 qp_type); 540 qp_type);
541 atomic_dec(&shca->num_qps);
523 return ERR_PTR(-EINVAL); 542 return ERR_PTR(-EINVAL);
524 break;
525 } 543 }
526 } else { 544 } else {
527 int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI 545 int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
@@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp(
533 "send_sge=%x recv_sge=%x max_sge=%x", 551 "send_sge=%x recv_sge=%x max_sge=%x",
534 init_attr->cap.max_send_sge, 552 init_attr->cap.max_send_sge,
535 init_attr->cap.max_recv_sge, max_sge); 553 init_attr->cap.max_recv_sge, max_sge);
554 atomic_dec(&shca->num_qps);
536 return ERR_PTR(-EINVAL); 555 return ERR_PTR(-EINVAL);
537 } 556 }
538 } 557 }
@@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp(
543 my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); 562 my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
544 if (!my_qp) { 563 if (!my_qp) {
545 ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); 564 ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
565 atomic_dec(&shca->num_qps);
546 return ERR_PTR(-ENOMEM); 566 return ERR_PTR(-ENOMEM);
547 } 567 }
548 568
@@ -823,6 +843,7 @@ create_qp_exit1:
823 843
824create_qp_exit0: 844create_qp_exit0:
825 kmem_cache_free(qp_cache, my_qp); 845 kmem_cache_free(qp_cache, my_qp);
846 atomic_dec(&shca->num_qps);
826 return ERR_PTR(ret); 847 return ERR_PTR(ret);
827} 848}
828 849
@@ -1948,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1948 if (HAS_SQ(my_qp)) 1969 if (HAS_SQ(my_qp))
1949 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); 1970 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
1950 kmem_cache_free(qp_cache, my_qp); 1971 kmem_cache_free(qp_cache, my_qp);
1972 atomic_dec(&shca->num_qps);
1951 return 0; 1973 return 0;
1952} 1974}
1953 1975