aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatan Barak <matanb@mellanox.com>2014-12-11 03:57:57 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-11 14:47:35 -0500
commitd57febe1a47801ef8a55dbf10672850523dfaa60 (patch)
treed5a9cfb4c5c57c147e8ce61f5738337b03ea458a
parent7a89399ffad7b7c47b43afda010309b3b88538c0 (diff)
net/mlx4: Add A0 hybrid steering
A0 hybrid steering is a form of high performance flow steering. By using this mode, mlx4 cards use a fast limited table based steering, in order to enable fast steering of unicast packets to a QP. In order to implement A0 hybrid steering we allocate resources from different zones: (1) General range (2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region. When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP, we try hard to allocate the QP from range (2). Otherwise, we try hard not to allocate from this range. However, when the system is pushed to its limits and one needs every resource, the allocator uses every region it can. Meaning, when we run out of raw-eth qps, the allocator allocates from the general range (and the special-A0 area is no longer active). If we run out of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that is also exhausted, the allocator will allocate from the general range (and the A0 region is no longer active). Note that if a raw-eth qp is allocated from the general range, it attempts to allocate the range such that bits 6 and 7 (blueflame bits) in the QP number are not set. When the feature is used in SRIOV, the VF has to notify the PF what kind of QP attributes it needs. In order to do that, along with the "Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According to the combination of these bits, the PF tries to allocate a suitable QP. In order to maintain backward compatibility (with older PFs), the PF notifies which QP attributes it supports via QUERY_FUNC_CAP command. Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c277
-rw-r--r--include/linux/mlx4/device.h10
8 files changed, 300 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 506d1bdad227..cf000b7ad64f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -807,8 +807,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
807 * VLAN insertion. */ 807 * VLAN insertion. */
808 if (init_attr->qp_type == IB_QPT_RAW_PACKET) 808 if (init_attr->qp_type == IB_QPT_RAW_PACKET)
809 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 809 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
810 init_attr->cap.max_send_wr ? 810 (init_attr->cap.max_send_wr ?
811 MLX4_RESERVE_ETH_BF_QP : 0); 811 MLX4_RESERVE_ETH_BF_QP : 0) |
812 (init_attr->cap.max_recv_wr ?
813 MLX4_RESERVE_A0_QP : 0));
812 else 814 else
813 if (qp->flags & MLX4_IB_QP_NETIF) 815 if (qp->flags & MLX4_IB_QP_NETIF)
814 err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); 816 err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index c67effb05b2f..568e1f41fdd4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -595,7 +595,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
595 return 0; 595 return 0;
596 } 596 }
597 597
598 err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0); 598 err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP);
599 en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); 599 en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
600 if (err) { 600 if (err) {
601 en_err(priv, "Failed to reserve qp for mac registration\n"); 601 en_err(priv, "Failed to reserve qp for mac registration\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a850f24fabdf..a0474eb94aa3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1131,7 +1131,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
1131 int err; 1131 int err;
1132 u32 qpn; 1132 u32 qpn;
1133 1133
1134 err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); 1134 err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
1135 MLX4_RESERVE_A0_QP);
1135 if (err) { 1136 if (err) {
1136 en_err(priv, "Failed reserving drop qpn\n"); 1137 en_err(priv, "Failed reserving drop qpn\n");
1137 return err; 1138 return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 1469b5b5be64..622bffaa9d78 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -275,6 +275,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
275#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04 275#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04
276 276
277#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31) 277#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31)
278#define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG (1UL << 30)
278 279
279/* when opcode modifier = 1 */ 280/* when opcode modifier = 1 */
280#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 281#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3
@@ -406,7 +407,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
406 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); 407 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
407 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); 408 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
408 409
409 size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG; 410 size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG |
411 QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG;
410 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); 412 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
411 } else 413 } else
412 err = -EINVAL; 414 err = -EINVAL;
@@ -509,6 +511,8 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
509 MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); 511 MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
510 if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG) 512 if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG)
511 func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP; 513 func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP;
514 if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG)
515 func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP;
512 } 516 }
513 517
514 goto out; 518 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6a9a941ddf58..3bfe90b95f96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -436,6 +436,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
436 (1 << dev->caps.log_num_vlans) * 436 (1 << dev->caps.log_num_vlans) *
437 dev->caps.num_ports; 437 dev->caps.num_ports;
438 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 438 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
439 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
440 MLX4_A0_STEERING_TABLE_SIZE;
439 441
440 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 442 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
441 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 443 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
@@ -469,7 +471,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
469 if (!mlx4_is_slave(dev)) { 471 if (!mlx4_is_slave(dev)) {
470 mlx4_enable_cqe_eqe_stride(dev); 472 mlx4_enable_cqe_eqe_stride(dev);
471 dev->caps.alloc_res_qp_mask = 473 dev->caps.alloc_res_qp_mask =
472 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0); 474 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
475 MLX4_RESERVE_A0_QP;
473 } else { 476 } else {
474 dev->caps.alloc_res_qp_mask = 0; 477 dev->caps.alloc_res_qp_mask = 0;
475 } 478 }
@@ -826,6 +829,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
826 dev->caps.bf_reg_size) 829 dev->caps.bf_reg_size)
827 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; 830 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
828 831
832 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
833 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
834
829 return 0; 835 return 0;
830 836
831err_mem: 837err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index bc1505efa436..cebd1180702b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -682,8 +682,19 @@ struct mlx4_srq_table {
682 struct mlx4_icm_table cmpt_table; 682 struct mlx4_icm_table cmpt_table;
683}; 683};
684 684
685enum mlx4_qp_table_zones {
686 MLX4_QP_TABLE_ZONE_GENERAL,
687 MLX4_QP_TABLE_ZONE_RSS,
688 MLX4_QP_TABLE_ZONE_RAW_ETH,
689 MLX4_QP_TABLE_ZONE_NUM
690};
691
692#define MLX4_A0_STEERING_TABLE_SIZE 256
693
685struct mlx4_qp_table { 694struct mlx4_qp_table {
686 struct mlx4_bitmap bitmap; 695 struct mlx4_bitmap *bitmap_gen;
696 struct mlx4_zone_allocator *zones;
697 u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM];
687 u32 rdmarc_base; 698 u32 rdmarc_base;
688 int rdmarc_shift; 699 int rdmarc_shift;
689 spinlock_t lock; 700 spinlock_t lock;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 8720428c9807..d8d040c366f4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -213,6 +213,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_modify);
213int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, 213int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
214 int *base, u8 flags) 214 int *base, u8 flags)
215{ 215{
216 u32 uid;
216 int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP); 217 int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP);
217 218
218 struct mlx4_priv *priv = mlx4_priv(dev); 219 struct mlx4_priv *priv = mlx4_priv(dev);
@@ -221,8 +222,16 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
221 if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp) 222 if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
222 return -ENOMEM; 223 return -ENOMEM;
223 224
224 *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align, 225 uid = MLX4_QP_TABLE_ZONE_GENERAL;
225 bf_qp ? MLX4_BF_QP_SKIP_MASK : 0); 226 if (flags & (u8)MLX4_RESERVE_A0_QP) {
227 if (bf_qp)
228 uid = MLX4_QP_TABLE_ZONE_RAW_ETH;
229 else
230 uid = MLX4_QP_TABLE_ZONE_RSS;
231 }
232
233 *base = mlx4_zone_alloc_entries(qp_table->zones, uid, cnt, align,
234 bf_qp ? MLX4_BF_QP_SKIP_MASK : 0, NULL);
226 if (*base == -1) 235 if (*base == -1)
227 return -ENOMEM; 236 return -ENOMEM;
228 237
@@ -263,7 +272,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
263 272
264 if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) 273 if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
265 return; 274 return;
266 mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR); 275 mlx4_zone_free_entries_unique(qp_table->zones, base_qpn, cnt);
267} 276}
268 277
269void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) 278void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
@@ -473,6 +482,227 @@ static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
473 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); 482 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
474} 483}
475 484
485#define MLX4_QP_TABLE_RSS_ETH_PRIORITY 2
486#define MLX4_QP_TABLE_RAW_ETH_PRIORITY 1
487#define MLX4_QP_TABLE_RAW_ETH_SIZE 256
488
489static int mlx4_create_zones(struct mlx4_dev *dev,
490 u32 reserved_bottom_general,
491 u32 reserved_top_general,
492 u32 reserved_bottom_rss,
493 u32 start_offset_rss,
494 u32 max_table_offset)
495{
496 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
497 struct mlx4_bitmap (*bitmap)[MLX4_QP_TABLE_ZONE_NUM] = NULL;
498 int bitmap_initialized = 0;
499 u32 last_offset;
500 int k;
501 int err;
502
503 qp_table->zones = mlx4_zone_allocator_create(MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP);
504
505 if (NULL == qp_table->zones)
506 return -ENOMEM;
507
508 bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
509
510 if (NULL == bitmap) {
511 err = -ENOMEM;
512 goto free_zone;
513 }
514
515 err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_GENERAL, dev->caps.num_qps,
516 (1 << 23) - 1, reserved_bottom_general,
517 reserved_top_general);
518
519 if (err)
520 goto free_bitmap;
521
522 ++bitmap_initialized;
523
524 err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_GENERAL,
525 MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO |
526 MLX4_ZONE_USE_RR, 0,
527 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_GENERAL);
528
529 if (err)
530 goto free_bitmap;
531
532 err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_RSS,
533 reserved_bottom_rss,
534 reserved_bottom_rss - 1,
535 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
536 reserved_bottom_rss - start_offset_rss);
537
538 if (err)
539 goto free_bitmap;
540
541 ++bitmap_initialized;
542
543 err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_RSS,
544 MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
545 MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
546 MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RSS_ETH_PRIORITY,
547 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_RSS);
548
549 if (err)
550 goto free_bitmap;
551
552 last_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
553 /* We have a single zone for the A0 steering QPs area of the FW. This area
554 * needs to be split into subareas. One set of subareas is for RSS QPs
555 * (in which qp number bits 6 and/or 7 are set); the other set of subareas
556 * is for RAW_ETH QPs, which require that both bits 6 and 7 are zero.
557 * Currently, the values returned by the FW (A0 steering area starting qp number
558 * and A0 steering area size) are such that there are only two subareas -- one
559 * for RSS and one for RAW_ETH.
560 */
561 for (k = MLX4_QP_TABLE_ZONE_RSS + 1; k < sizeof(*bitmap)/sizeof((*bitmap)[0]);
562 k++) {
563 int size;
564 u32 offset = start_offset_rss;
565 u32 bf_mask;
566 u32 requested_size;
567
568 /* Assuming MLX4_BF_QP_SKIP_MASK is consecutive ones, this calculates
569 * a mask of all LSB bits set until (and not including) the first
570 * set bit of MLX4_BF_QP_SKIP_MASK. For example, if MLX4_BF_QP_SKIP_MASK
571 * is 0xc0, bf_mask will be 0x3f.
572 */
573 bf_mask = (MLX4_BF_QP_SKIP_MASK & ~(MLX4_BF_QP_SKIP_MASK - 1)) - 1;
574 requested_size = min((u32)MLX4_QP_TABLE_RAW_ETH_SIZE, bf_mask + 1);
575
576 if (((last_offset & MLX4_BF_QP_SKIP_MASK) &&
577 ((int)(max_table_offset - last_offset)) >=
578 roundup_pow_of_two(MLX4_BF_QP_SKIP_MASK)) ||
579 (!(last_offset & MLX4_BF_QP_SKIP_MASK) &&
580 !((last_offset + requested_size - 1) &
581 MLX4_BF_QP_SKIP_MASK)))
582 size = requested_size;
583 else {
584 u32 candidate_offset =
585 (last_offset | MLX4_BF_QP_SKIP_MASK | bf_mask) + 1;
586
587 if (last_offset & MLX4_BF_QP_SKIP_MASK)
588 last_offset = candidate_offset;
589
590 /* From this point, the BF bits are 0 */
591
592 if (last_offset > max_table_offset) {
593 /* need to skip */
594 size = -1;
595 } else {
596 size = min3(max_table_offset - last_offset,
597 bf_mask - (last_offset & bf_mask),
598 requested_size);
599 if (size < requested_size) {
600 int candidate_size;
601
602 candidate_size = min3(
603 max_table_offset - candidate_offset,
604 bf_mask - (last_offset & bf_mask),
605 requested_size);
606
607 /* We will not take this path if last_offset was
608 * already set above to candidate_offset
609 */
610 if (candidate_size > size) {
611 last_offset = candidate_offset;
612 size = candidate_size;
613 }
614 }
615 }
616 }
617
618 if (size > 0) {
619 /* mlx4_bitmap_alloc_range will find a contiguous range of "size"
620 * QPs in which both bits 6 and 7 are zero, because we pass it the
621 * MLX4_BF_SKIP_MASK).
622 */
623 offset = mlx4_bitmap_alloc_range(
624 *bitmap + MLX4_QP_TABLE_ZONE_RSS,
625 size, 1,
626 MLX4_BF_QP_SKIP_MASK);
627
628 if (offset == (u32)-1) {
629 err = -ENOMEM;
630 break;
631 }
632
633 last_offset = offset + size;
634
635 err = mlx4_bitmap_init(*bitmap + k, roundup_pow_of_two(size),
636 roundup_pow_of_two(size) - 1, 0,
637 roundup_pow_of_two(size) - size);
638 } else {
639 /* Add an empty bitmap, we'll allocate from different zones (since
640 * at least one is reserved)
641 */
642 err = mlx4_bitmap_init(*bitmap + k, 1,
643 MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0,
644 0);
645 mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0);
646 }
647
648 if (err)
649 break;
650
651 ++bitmap_initialized;
652
653 err = mlx4_zone_add_one(qp_table->zones, *bitmap + k,
654 MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
655 MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
656 MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RAW_ETH_PRIORITY,
657 offset, qp_table->zones_uids + k);
658
659 if (err)
660 break;
661 }
662
663 if (err)
664 goto free_bitmap;
665
666 qp_table->bitmap_gen = *bitmap;
667
668 return err;
669
670free_bitmap:
671 for (k = 0; k < bitmap_initialized; k++)
672 mlx4_bitmap_cleanup(*bitmap + k);
673 kfree(bitmap);
674free_zone:
675 mlx4_zone_allocator_destroy(qp_table->zones);
676 return err;
677}
678
679static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev)
680{
681 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
682
683 if (qp_table->zones) {
684 int i;
685
686 for (i = 0;
687 i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]);
688 i++) {
689 struct mlx4_bitmap *bitmap =
690 mlx4_zone_get_bitmap(qp_table->zones,
691 qp_table->zones_uids[i]);
692
693 mlx4_zone_remove_one(qp_table->zones, qp_table->zones_uids[i]);
694 if (NULL == bitmap)
695 continue;
696
697 mlx4_bitmap_cleanup(bitmap);
698 }
699 mlx4_zone_allocator_destroy(qp_table->zones);
700 kfree(qp_table->bitmap_gen);
701 qp_table->bitmap_gen = NULL;
702 qp_table->zones = NULL;
703 }
704}
705
476int mlx4_init_qp_table(struct mlx4_dev *dev) 706int mlx4_init_qp_table(struct mlx4_dev *dev)
477{ 707{
478 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; 708 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -480,22 +710,33 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
480 int reserved_from_top = 0; 710 int reserved_from_top = 0;
481 int reserved_from_bot; 711 int reserved_from_bot;
482 int k; 712 int k;
713 int fixed_reserved_from_bot_rv = 0;
714 int bottom_reserved_for_rss_bitmap;
715 u32 max_table_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
716 MLX4_A0_STEERING_TABLE_SIZE;
483 717
484 spin_lock_init(&qp_table->lock); 718 spin_lock_init(&qp_table->lock);
485 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); 719 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
486 if (mlx4_is_slave(dev)) 720 if (mlx4_is_slave(dev))
487 return 0; 721 return 0;
488 722
489 /* 723 /* We reserve 2 extra QPs per port for the special QPs. The
490 * We reserve 2 extra QPs per port for the special QPs. The
491 * block of special QPs must be aligned to a multiple of 8, so 724 * block of special QPs must be aligned to a multiple of 8, so
492 * round up. 725 * round up.
493 * 726 *
494 * We also reserve the MSB of the 24-bit QP number to indicate 727 * We also reserve the MSB of the 24-bit QP number to indicate
495 * that a QP is an XRC QP. 728 * that a QP is an XRC QP.
496 */ 729 */
497 dev->phys_caps.base_sqpn = 730 for (k = 0; k <= MLX4_QP_REGION_BOTTOM; k++)
498 ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); 731 fixed_reserved_from_bot_rv += dev->caps.reserved_qps_cnt[k];
732
733 if (fixed_reserved_from_bot_rv < max_table_offset)
734 fixed_reserved_from_bot_rv = max_table_offset;
735
736 /* We reserve at least 1 extra for bitmaps that we don't have enough space for*/
737 bottom_reserved_for_rss_bitmap =
738 roundup_pow_of_two(fixed_reserved_from_bot_rv + 1);
739 dev->phys_caps.base_sqpn = ALIGN(bottom_reserved_for_rss_bitmap, 8);
499 740
500 { 741 {
501 int sort[MLX4_NUM_QP_REGION]; 742 int sort[MLX4_NUM_QP_REGION];
@@ -505,8 +746,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
505 for (i = 1; i < MLX4_NUM_QP_REGION; ++i) 746 for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
506 sort[i] = i; 747 sort[i] = i;
507 748
508 for (i = MLX4_NUM_QP_REGION; i > 0; --i) { 749 for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) {
509 for (j = 2; j < i; ++j) { 750 for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) {
510 if (dev->caps.reserved_qps_cnt[sort[j]] > 751 if (dev->caps.reserved_qps_cnt[sort[j]] >
511 dev->caps.reserved_qps_cnt[sort[j - 1]]) { 752 dev->caps.reserved_qps_cnt[sort[j - 1]]) {
512 tmp = sort[j]; 753 tmp = sort[j];
@@ -516,13 +757,12 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
516 } 757 }
517 } 758 }
518 759
519 for (i = 1; i < MLX4_NUM_QP_REGION; ++i) { 760 for (i = MLX4_QP_REGION_BOTTOM + 1; i < MLX4_NUM_QP_REGION; ++i) {
520 last_base -= dev->caps.reserved_qps_cnt[sort[i]]; 761 last_base -= dev->caps.reserved_qps_cnt[sort[i]];
521 dev->caps.reserved_qps_base[sort[i]] = last_base; 762 dev->caps.reserved_qps_base[sort[i]] = last_base;
522 reserved_from_top += 763 reserved_from_top +=
523 dev->caps.reserved_qps_cnt[sort[i]]; 764 dev->caps.reserved_qps_cnt[sort[i]];
524 } 765 }
525
526 } 766 }
527 767
528 /* Reserve 8 real SQPs in both native and SRIOV modes. 768 /* Reserve 8 real SQPs in both native and SRIOV modes.
@@ -541,9 +781,11 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
541 return -EINVAL; 781 return -EINVAL;
542 } 782 }
543 783
544 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, 784 err = mlx4_create_zones(dev, reserved_from_bot, reserved_from_bot,
545 (1 << 23) - 1, reserved_from_bot, 785 bottom_reserved_for_rss_bitmap,
546 reserved_from_top); 786 fixed_reserved_from_bot_rv,
787 max_table_offset);
788
547 if (err) 789 if (err)
548 return err; 790 return err;
549 791
@@ -579,7 +821,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
579 err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); 821 err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
580 if (err) 822 if (err)
581 goto err_mem; 823 goto err_mem;
582 return 0; 824
825 return err;
583 826
584err_mem: 827err_mem:
585 kfree(dev->caps.qp0_tunnel); 828 kfree(dev->caps.qp0_tunnel);
@@ -588,6 +831,7 @@ err_mem:
588 kfree(dev->caps.qp1_proxy); 831 kfree(dev->caps.qp1_proxy);
589 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 832 dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
590 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 833 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
834 mlx4_cleanup_qp_zones(dev);
591 return err; 835 return err;
592} 836}
593 837
@@ -597,7 +841,8 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
597 return; 841 return;
598 842
599 mlx4_CONF_SPECIAL_QP(dev, 0); 843 mlx4_CONF_SPECIAL_QP(dev, 0);
600 mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); 844
845 mlx4_cleanup_qp_zones(dev);
601} 846}
602 847
603int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, 848int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 272aa258c036..39890cddc5fa 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -195,7 +195,8 @@ enum {
195}; 195};
196 196
197enum { 197enum {
198 MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0 198 MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0,
199 MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1
199}; 200};
200 201
201/* bit enums for an 8-bit flags field indicating special use 202/* bit enums for an 8-bit flags field indicating special use
@@ -207,6 +208,7 @@ enum {
207 * This enum may use only bits 0..7. 208 * This enum may use only bits 0..7.
208 */ 209 */
209enum { 210enum {
211 MLX4_RESERVE_A0_QP = 1 << 6,
210 MLX4_RESERVE_ETH_BF_QP = 1 << 7, 212 MLX4_RESERVE_ETH_BF_QP = 1 << 7,
211}; 213};
212 214
@@ -349,6 +351,8 @@ enum {
349 351
350enum mlx4_qp_region { 352enum mlx4_qp_region {
351 MLX4_QP_REGION_FW = 0, 353 MLX4_QP_REGION_FW = 0,
354 MLX4_QP_REGION_RSS_RAW_ETH,
355 MLX4_QP_REGION_BOTTOM = MLX4_QP_REGION_RSS_RAW_ETH,
352 MLX4_QP_REGION_ETH_ADDR, 356 MLX4_QP_REGION_ETH_ADDR,
353 MLX4_QP_REGION_FC_ADDR, 357 MLX4_QP_REGION_FC_ADDR,
354 MLX4_QP_REGION_FC_EXCH, 358 MLX4_QP_REGION_FC_EXCH,
@@ -891,7 +895,9 @@ static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
891static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) 895static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
892{ 896{
893 return (qpn < dev->phys_caps.base_sqpn + 8 + 897 return (qpn < dev->phys_caps.base_sqpn + 8 +
894 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); 898 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev) &&
899 qpn >= dev->phys_caps.base_sqpn) ||
900 (qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]);
895} 901}
896 902
897static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) 903static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)