diff options
author | Stefan Roscher <ossrosch@linux.vnet.ibm.com> | 2008-04-29 16:46:53 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-04-29 16:46:53 -0400 |
commit | d227fa7288adebe5ba37fa8e4a589c977d4e4a34 (patch) | |
tree | 8e0f080fc78f0342b5fd2531615ea525333d8d25 | |
parent | f56bcd8013566d4ad4759ae5fc85a6660e4655c7 (diff) |
IB/ehca: Allocate event queue size depending on max number of CQs and QPs
If a lot of QPs fall into Error state at once and the EQ of the
respective HCA is too small, it might overrun, causing the eHCA driver
to stop processing completion events and calling the application's
completion handlers, effectively causing traffic to stop.
Fix this by limiting available QPs and CQs to a customizable max
count, and determining EQ size based on these counts and a worst-case
assumption.
Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_classes.h | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_cq.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_main.c | 36 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_qp.c | 26 |
4 files changed, 74 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 3d6d9461c31d..00bab60f6de4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h | |||
@@ -66,6 +66,7 @@ struct ehca_av; | |||
66 | #include "ehca_irq.h" | 66 | #include "ehca_irq.h" |
67 | 67 | ||
68 | #define EHCA_EQE_CACHE_SIZE 20 | 68 | #define EHCA_EQE_CACHE_SIZE 20 |
69 | #define EHCA_MAX_NUM_QUEUES 0xffff | ||
69 | 70 | ||
70 | struct ehca_eqe_cache_entry { | 71 | struct ehca_eqe_cache_entry { |
71 | struct ehca_eqe *eqe; | 72 | struct ehca_eqe *eqe; |
@@ -127,6 +128,8 @@ struct ehca_shca { | |||
127 | /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ | 128 | /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ |
128 | u32 hca_cap_mr_pgsize; | 129 | u32 hca_cap_mr_pgsize; |
129 | int max_mtu; | 130 | int max_mtu; |
131 | atomic_t num_cqs; | ||
132 | atomic_t num_qps; | ||
130 | }; | 133 | }; |
131 | 134 | ||
132 | struct ehca_pd { | 135 | struct ehca_pd { |
@@ -344,6 +347,8 @@ extern int ehca_use_hp_mr; | |||
344 | extern int ehca_scaling_code; | 347 | extern int ehca_scaling_code; |
345 | extern int ehca_lock_hcalls; | 348 | extern int ehca_lock_hcalls; |
346 | extern int ehca_nr_ports; | 349 | extern int ehca_nr_ports; |
350 | extern int ehca_max_cq; | ||
351 | extern int ehca_max_qp; | ||
347 | 352 | ||
348 | struct ipzu_queue_resp { | 353 | struct ipzu_queue_resp { |
349 | u32 qe_size; /* queue entry size */ | 354 | u32 qe_size; /* queue entry size */ |
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index ec0cfcf3073f..5540b276a33c 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c | |||
@@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, | |||
132 | if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) | 132 | if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) |
133 | return ERR_PTR(-EINVAL); | 133 | return ERR_PTR(-EINVAL); |
134 | 134 | ||
135 | if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { | ||
136 | ehca_err(device, "Unable to create CQ, max number of %i " | ||
137 | "CQs reached.", ehca_max_cq); | ||
138 | ehca_err(device, "To increase the maximum number of CQs " | ||
139 | "use the number_of_cqs module parameter.\n"); | ||
140 | return ERR_PTR(-ENOSPC); | ||
141 | } | ||
142 | |||
135 | my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); | 143 | my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); |
136 | if (!my_cq) { | 144 | if (!my_cq) { |
137 | ehca_err(device, "Out of memory for ehca_cq struct device=%p", | 145 | ehca_err(device, "Out of memory for ehca_cq struct device=%p", |
138 | device); | 146 | device); |
147 | atomic_dec(&shca->num_cqs); | ||
139 | return ERR_PTR(-ENOMEM); | 148 | return ERR_PTR(-ENOMEM); |
140 | } | 149 | } |
141 | 150 | ||
@@ -305,6 +314,7 @@ create_cq_exit2: | |||
305 | create_cq_exit1: | 314 | create_cq_exit1: |
306 | kmem_cache_free(cq_cache, my_cq); | 315 | kmem_cache_free(cq_cache, my_cq); |
307 | 316 | ||
317 | atomic_dec(&shca->num_cqs); | ||
308 | return cq; | 318 | return cq; |
309 | } | 319 | } |
310 | 320 | ||
@@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq) | |||
359 | ipz_queue_dtor(NULL, &my_cq->ipz_queue); | 369 | ipz_queue_dtor(NULL, &my_cq->ipz_queue); |
360 | kmem_cache_free(cq_cache, my_cq); | 370 | kmem_cache_free(cq_cache, my_cq); |
361 | 371 | ||
372 | atomic_dec(&shca->num_cqs); | ||
362 | return 0; | 373 | return 0; |
363 | } | 374 | } |
364 | 375 | ||
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 65048976198c..482103eb6eac 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c | |||
@@ -68,6 +68,8 @@ int ehca_port_act_time = 30; | |||
68 | int ehca_static_rate = -1; | 68 | int ehca_static_rate = -1; |
69 | int ehca_scaling_code = 0; | 69 | int ehca_scaling_code = 0; |
70 | int ehca_lock_hcalls = -1; | 70 | int ehca_lock_hcalls = -1; |
71 | int ehca_max_cq = -1; | ||
72 | int ehca_max_qp = -1; | ||
71 | 73 | ||
72 | module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); | 74 | module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); |
73 | module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); | 75 | module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); |
@@ -79,6 +81,8 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); | |||
79 | module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); | 81 | module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); |
80 | module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); | 82 | module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); |
81 | module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); | 83 | module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); |
84 | module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); | ||
85 | module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); | ||
82 | 86 | ||
83 | MODULE_PARM_DESC(open_aqp1, | 87 | MODULE_PARM_DESC(open_aqp1, |
84 | "Open AQP1 on startup (default: no)"); | 88 | "Open AQP1 on startup (default: no)"); |
@@ -104,6 +108,12 @@ MODULE_PARM_DESC(scaling_code, | |||
104 | MODULE_PARM_DESC(lock_hcalls, | 108 | MODULE_PARM_DESC(lock_hcalls, |
105 | "Serialize all hCalls made by the driver " | 109 | "Serialize all hCalls made by the driver " |
106 | "(default: autodetect)"); | 110 | "(default: autodetect)"); |
111 | MODULE_PARM_DESC(number_of_cqs, | ||
112 | "Max number of CQs which can be allocated " | ||
113 | "(default: autodetect)"); | ||
114 | MODULE_PARM_DESC(number_of_qps, | ||
115 | "Max number of QPs which can be allocated " | ||
116 | "(default: autodetect)"); | ||
107 | 117 | ||
108 | DEFINE_RWLOCK(ehca_qp_idr_lock); | 118 | DEFINE_RWLOCK(ehca_qp_idr_lock); |
109 | DEFINE_RWLOCK(ehca_cq_idr_lock); | 119 | DEFINE_RWLOCK(ehca_cq_idr_lock); |
@@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca) | |||
355 | if (rblock->memory_page_size_supported & pgsize_map[i]) | 365 | if (rblock->memory_page_size_supported & pgsize_map[i]) |
356 | shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; | 366 | shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; |
357 | 367 | ||
368 | /* Set maximum number of CQs and QPs to calculate EQ size */ | ||
369 | if (ehca_max_qp == -1) | ||
370 | ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); | ||
371 | else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { | ||
372 | ehca_gen_err("Requested number of QPs is out of range (1 - %i) " | ||
373 | "specified by HW", rblock->max_qp); | ||
374 | ret = -EINVAL; | ||
375 | goto sense_attributes1; | ||
376 | } | ||
377 | |||
378 | if (ehca_max_cq == -1) | ||
379 | ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); | ||
380 | else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { | ||
381 | ehca_gen_err("Requested number of CQs is out of range (1 - %i) " | ||
382 | "specified by HW", rblock->max_cq); | ||
383 | ret = -EINVAL; | ||
384 | goto sense_attributes1; | ||
385 | } | ||
386 | |||
358 | /* query max MTU from first port -- it's the same for all ports */ | 387 | /* query max MTU from first port -- it's the same for all ports */ |
359 | port = (struct hipz_query_port *)rblock; | 388 | port = (struct hipz_query_port *)rblock; |
360 | h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); | 389 | h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); |
@@ -684,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev, | |||
684 | struct ehca_shca *shca; | 713 | struct ehca_shca *shca; |
685 | const u64 *handle; | 714 | const u64 *handle; |
686 | struct ib_pd *ibpd; | 715 | struct ib_pd *ibpd; |
687 | int ret, i; | 716 | int ret, i, eq_size; |
688 | 717 | ||
689 | handle = of_get_property(dev->node, "ibm,hca-handle", NULL); | 718 | handle = of_get_property(dev->node, "ibm,hca-handle", NULL); |
690 | if (!handle) { | 719 | if (!handle) { |
@@ -705,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev, | |||
705 | return -ENOMEM; | 734 | return -ENOMEM; |
706 | } | 735 | } |
707 | mutex_init(&shca->modify_mutex); | 736 | mutex_init(&shca->modify_mutex); |
737 | atomic_set(&shca->num_cqs, 0); | ||
738 | atomic_set(&shca->num_qps, 0); | ||
708 | for (i = 0; i < ARRAY_SIZE(shca->sport); i++) | 739 | for (i = 0; i < ARRAY_SIZE(shca->sport); i++) |
709 | spin_lock_init(&shca->sport[i].mod_sqp_lock); | 740 | spin_lock_init(&shca->sport[i].mod_sqp_lock); |
710 | 741 | ||
@@ -724,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev, | |||
724 | goto probe1; | 755 | goto probe1; |
725 | } | 756 | } |
726 | 757 | ||
758 | eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; | ||
727 | /* create event queues */ | 759 | /* create event queues */ |
728 | ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); | 760 | ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); |
729 | if (ret) { | 761 | if (ret) { |
730 | ehca_err(&shca->ib_device, "Cannot create EQ."); | 762 | ehca_err(&shca->ib_device, "Cannot create EQ."); |
731 | goto probe1; | 763 | goto probe1; |
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 57bef1152cc2..18fba92fa7ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c | |||
@@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp( | |||
421 | u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; | 421 | u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; |
422 | unsigned long flags; | 422 | unsigned long flags; |
423 | 423 | ||
424 | if (init_attr->create_flags) | 424 | if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { |
425 | ehca_err(pd->device, "Unable to create QP, max number of %i " | ||
426 | "QPs reached.", ehca_max_qp); | ||
427 | ehca_err(pd->device, "To increase the maximum number of QPs " | ||
428 | "use the number_of_qps module parameter.\n"); | ||
429 | return ERR_PTR(-ENOSPC); | ||
430 | } | ||
431 | |||
432 | if (init_attr->create_flags) { | ||
433 | atomic_dec(&shca->num_qps); | ||
425 | return ERR_PTR(-EINVAL); | 434 | return ERR_PTR(-EINVAL); |
435 | } | ||
426 | 436 | ||
427 | memset(&parms, 0, sizeof(parms)); | 437 | memset(&parms, 0, sizeof(parms)); |
428 | qp_type = init_attr->qp_type; | 438 | qp_type = init_attr->qp_type; |
@@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp( | |||
431 | init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { | 441 | init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { |
432 | ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", | 442 | ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", |
433 | init_attr->sq_sig_type); | 443 | init_attr->sq_sig_type); |
444 | atomic_dec(&shca->num_qps); | ||
434 | return ERR_PTR(-EINVAL); | 445 | return ERR_PTR(-EINVAL); |
435 | } | 446 | } |
436 | 447 | ||
@@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp( | |||
455 | 466 | ||
456 | if (is_llqp && has_srq) { | 467 | if (is_llqp && has_srq) { |
457 | ehca_err(pd->device, "LLQPs can't have an SRQ"); | 468 | ehca_err(pd->device, "LLQPs can't have an SRQ"); |
469 | atomic_dec(&shca->num_qps); | ||
458 | return ERR_PTR(-EINVAL); | 470 | return ERR_PTR(-EINVAL); |
459 | } | 471 | } |
460 | 472 | ||
@@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp( | |||
466 | ehca_err(pd->device, "no more than three SGEs " | 478 | ehca_err(pd->device, "no more than three SGEs " |
467 | "supported for SRQ pd=%p max_sge=%x", | 479 | "supported for SRQ pd=%p max_sge=%x", |
468 | pd, init_attr->cap.max_recv_sge); | 480 | pd, init_attr->cap.max_recv_sge); |
481 | atomic_dec(&shca->num_qps); | ||
469 | return ERR_PTR(-EINVAL); | 482 | return ERR_PTR(-EINVAL); |
470 | } | 483 | } |
471 | } | 484 | } |
@@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp( | |||
477 | qp_type != IB_QPT_SMI && | 490 | qp_type != IB_QPT_SMI && |
478 | qp_type != IB_QPT_GSI) { | 491 | qp_type != IB_QPT_GSI) { |
479 | ehca_err(pd->device, "wrong QP Type=%x", qp_type); | 492 | ehca_err(pd->device, "wrong QP Type=%x", qp_type); |
493 | atomic_dec(&shca->num_qps); | ||
480 | return ERR_PTR(-EINVAL); | 494 | return ERR_PTR(-EINVAL); |
481 | } | 495 | } |
482 | 496 | ||
@@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp( | |||
490 | "or max_rq_wr=%x for RC LLQP", | 504 | "or max_rq_wr=%x for RC LLQP", |
491 | init_attr->cap.max_send_wr, | 505 | init_attr->cap.max_send_wr, |
492 | init_attr->cap.max_recv_wr); | 506 | init_attr->cap.max_recv_wr); |
507 | atomic_dec(&shca->num_qps); | ||
493 | return ERR_PTR(-EINVAL); | 508 | return ERR_PTR(-EINVAL); |
494 | } | 509 | } |
495 | break; | 510 | break; |
@@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp( | |||
497 | if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { | 512 | if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { |
498 | ehca_err(pd->device, "UD LLQP not supported " | 513 | ehca_err(pd->device, "UD LLQP not supported " |
499 | "by this adapter"); | 514 | "by this adapter"); |
515 | atomic_dec(&shca->num_qps); | ||
500 | return ERR_PTR(-ENOSYS); | 516 | return ERR_PTR(-ENOSYS); |
501 | } | 517 | } |
502 | if (!(init_attr->cap.max_send_sge <= 5 | 518 | if (!(init_attr->cap.max_send_sge <= 5 |
@@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp( | |||
508 | "or max_recv_sge=%x for UD LLQP", | 524 | "or max_recv_sge=%x for UD LLQP", |
509 | init_attr->cap.max_send_sge, | 525 | init_attr->cap.max_send_sge, |
510 | init_attr->cap.max_recv_sge); | 526 | init_attr->cap.max_recv_sge); |
527 | atomic_dec(&shca->num_qps); | ||
511 | return ERR_PTR(-EINVAL); | 528 | return ERR_PTR(-EINVAL); |
512 | } else if (init_attr->cap.max_send_wr > 255) { | 529 | } else if (init_attr->cap.max_send_wr > 255) { |
513 | ehca_err(pd->device, | 530 | ehca_err(pd->device, |
514 | "Invalid Number of " | 531 | "Invalid Number of " |
515 | "max_send_wr=%x for UD QP_TYPE=%x", | 532 | "max_send_wr=%x for UD QP_TYPE=%x", |
516 | init_attr->cap.max_send_wr, qp_type); | 533 | init_attr->cap.max_send_wr, qp_type); |
534 | atomic_dec(&shca->num_qps); | ||
517 | return ERR_PTR(-EINVAL); | 535 | return ERR_PTR(-EINVAL); |
518 | } | 536 | } |
519 | break; | 537 | break; |
520 | default: | 538 | default: |
521 | ehca_err(pd->device, "unsupported LL QP Type=%x", | 539 | ehca_err(pd->device, "unsupported LL QP Type=%x", |
522 | qp_type); | 540 | qp_type); |
541 | atomic_dec(&shca->num_qps); | ||
523 | return ERR_PTR(-EINVAL); | 542 | return ERR_PTR(-EINVAL); |
524 | break; | ||
525 | } | 543 | } |
526 | } else { | 544 | } else { |
527 | int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI | 545 | int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI |
@@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp( | |||
533 | "send_sge=%x recv_sge=%x max_sge=%x", | 551 | "send_sge=%x recv_sge=%x max_sge=%x", |
534 | init_attr->cap.max_send_sge, | 552 | init_attr->cap.max_send_sge, |
535 | init_attr->cap.max_recv_sge, max_sge); | 553 | init_attr->cap.max_recv_sge, max_sge); |
554 | atomic_dec(&shca->num_qps); | ||
536 | return ERR_PTR(-EINVAL); | 555 | return ERR_PTR(-EINVAL); |
537 | } | 556 | } |
538 | } | 557 | } |
@@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp( | |||
543 | my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); | 562 | my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); |
544 | if (!my_qp) { | 563 | if (!my_qp) { |
545 | ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); | 564 | ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); |
565 | atomic_dec(&shca->num_qps); | ||
546 | return ERR_PTR(-ENOMEM); | 566 | return ERR_PTR(-ENOMEM); |
547 | } | 567 | } |
548 | 568 | ||
@@ -823,6 +843,7 @@ create_qp_exit1: | |||
823 | 843 | ||
824 | create_qp_exit0: | 844 | create_qp_exit0: |
825 | kmem_cache_free(qp_cache, my_qp); | 845 | kmem_cache_free(qp_cache, my_qp); |
846 | atomic_dec(&shca->num_qps); | ||
826 | return ERR_PTR(ret); | 847 | return ERR_PTR(ret); |
827 | } | 848 | } |
828 | 849 | ||
@@ -1948,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, | |||
1948 | if (HAS_SQ(my_qp)) | 1969 | if (HAS_SQ(my_qp)) |
1949 | ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); | 1970 | ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); |
1950 | kmem_cache_free(qp_cache, my_qp); | 1971 | kmem_cache_free(qp_cache, my_qp); |
1972 | atomic_dec(&shca->num_qps); | ||
1951 | return 0; | 1973 | return 0; |
1952 | } | 1974 | } |
1953 | 1975 | ||